blob: 578a73cfcb5aee19eed40343a061bf42ea0f47cb [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Daniel Veillard328f48c2002-11-15 15:24:34 +0000108static int
109xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
110 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000111/************************************************************************
112 * *
113 * Parser stacks related functions and macros *
114 * *
115 ************************************************************************/
116
117xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
118 const xmlChar ** str);
119
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000120/**
121 * inputPush:
122 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000123 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000124 *
125 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000126 *
127 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000129extern int
130inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
131{
132 if (ctxt->inputNr >= ctxt->inputMax) {
133 ctxt->inputMax *= 2;
134 ctxt->inputTab =
135 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
136 ctxt->inputMax *
137 sizeof(ctxt->inputTab[0]));
138 if (ctxt->inputTab == NULL) {
139 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
140 return (0);
141 }
142 }
143 ctxt->inputTab[ctxt->inputNr] = value;
144 ctxt->input = value;
145 return (ctxt->inputNr++);
146}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000148 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000149 * @ctxt: an XML parser context
150 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000151 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000152 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000153 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000154 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000155extern xmlParserInputPtr
156inputPop(xmlParserCtxtPtr ctxt)
157{
158 xmlParserInputPtr ret;
159
160 if (ctxt->inputNr <= 0)
161 return (0);
162 ctxt->inputNr--;
163 if (ctxt->inputNr > 0)
164 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
165 else
166 ctxt->input = NULL;
167 ret = ctxt->inputTab[ctxt->inputNr];
168 ctxt->inputTab[ctxt->inputNr] = 0;
169 return (ret);
170}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000171/**
172 * nodePush:
173 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000174 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000175 *
176 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000177 *
178 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000180extern int
181nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
182{
183 if (ctxt->nodeNr >= ctxt->nodeMax) {
184 ctxt->nodeMax *= 2;
185 ctxt->nodeTab =
186 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
187 ctxt->nodeMax *
188 sizeof(ctxt->nodeTab[0]));
189 if (ctxt->nodeTab == NULL) {
190 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
191 return (0);
192 }
193 }
194 ctxt->nodeTab[ctxt->nodeNr] = value;
195 ctxt->node = value;
196 return (ctxt->nodeNr++);
197}
198/**
199 * nodePop:
200 * @ctxt: an XML parser context
201 *
202 * Pops the top element node from the node stack
203 *
204 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000205 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000206extern xmlNodePtr
207nodePop(xmlParserCtxtPtr ctxt)
208{
209 xmlNodePtr ret;
210
211 if (ctxt->nodeNr <= 0)
212 return (0);
213 ctxt->nodeNr--;
214 if (ctxt->nodeNr > 0)
215 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
216 else
217 ctxt->node = NULL;
218 ret = ctxt->nodeTab[ctxt->nodeNr];
219 ctxt->nodeTab[ctxt->nodeNr] = 0;
220 return (ret);
221}
222/**
223 * namePush:
224 * @ctxt: an XML parser context
225 * @value: the element name
226 *
227 * Pushes a new element name on top of the name stack
228 *
229 * Returns 0 in case of error, the index in the stack otherwise
230 */
231extern int
232namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
233{
234 if (ctxt->nameNr >= ctxt->nameMax) {
235 ctxt->nameMax *= 2;
236 ctxt->nameTab =
237 (xmlChar * *)xmlRealloc(ctxt->nameTab,
238 ctxt->nameMax *
239 sizeof(ctxt->nameTab[0]));
240 if (ctxt->nameTab == NULL) {
241 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
242 return (0);
243 }
244 }
245 ctxt->nameTab[ctxt->nameNr] = value;
246 ctxt->name = value;
247 return (ctxt->nameNr++);
248}
249/**
250 * namePop:
251 * @ctxt: an XML parser context
252 *
253 * Pops the top element name from the name stack
254 *
255 * Returns the name just removed
256 */
257extern xmlChar *
258namePop(xmlParserCtxtPtr ctxt)
259{
260 xmlChar *ret;
261
262 if (ctxt->nameNr <= 0)
263 return (0);
264 ctxt->nameNr--;
265 if (ctxt->nameNr > 0)
266 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
267 else
268 ctxt->name = NULL;
269 ret = ctxt->nameTab[ctxt->nameNr];
270 ctxt->nameTab[ctxt->nameNr] = 0;
271 return (ret);
272}
Owen Taylor3473f882001-02-23 17:55:21 +0000273
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000274static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000275 if (ctxt->spaceNr >= ctxt->spaceMax) {
276 ctxt->spaceMax *= 2;
277 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
278 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
279 if (ctxt->spaceTab == NULL) {
280 xmlGenericError(xmlGenericErrorContext,
281 "realloc failed !\n");
282 return(0);
283 }
284 }
285 ctxt->spaceTab[ctxt->spaceNr] = val;
286 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
287 return(ctxt->spaceNr++);
288}
289
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000290static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000291 int ret;
292 if (ctxt->spaceNr <= 0) return(0);
293 ctxt->spaceNr--;
294 if (ctxt->spaceNr > 0)
295 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
296 else
297 ctxt->space = NULL;
298 ret = ctxt->spaceTab[ctxt->spaceNr];
299 ctxt->spaceTab[ctxt->spaceNr] = -1;
300 return(ret);
301}
302
303/*
304 * Macros for accessing the content. Those should be used only by the parser,
305 * and not exported.
306 *
307 * Dirty macros, i.e. one often need to make assumption on the context to
308 * use them
309 *
310 * CUR_PTR return the current pointer to the xmlChar to be parsed.
311 * To be used with extreme caution since operations consuming
312 * characters may move the input buffer to a different location !
313 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
314 * This should be used internally by the parser
315 * only to compare to ASCII values otherwise it would break when
316 * running with UTF-8 encoding.
317 * RAW same as CUR but in the input buffer, bypass any token
318 * extraction that may have been done
319 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
320 * to compare on ASCII based substring.
321 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
322 * strings within the parser.
323 *
324 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
325 *
326 * NEXT Skip to the next character, this does the proper decoding
327 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000328 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000329 * CUR_CHAR(l) returns the current unicode character (int), set l
330 * to the number of xmlChars used for the encoding [0-5].
331 * CUR_SCHAR same but operate on a string instead of the context
332 * COPY_BUF copy the current unicode char to the target buffer, increment
333 * the index
334 * GROW, SHRINK handling of input buffers
335 */
336
Daniel Veillardfdc91562002-07-01 21:52:03 +0000337#define RAW (*ctxt->input->cur)
338#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000339#define NXT(val) ctxt->input->cur[(val)]
340#define CUR_PTR ctxt->input->cur
341
342#define SKIP(val) do { \
343 ctxt->nbChars += (val),ctxt->input->cur += (val); \
344 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000345 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000346 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
347 xmlPopInput(ctxt); \
348 } while (0)
349
Daniel Veillard46de64e2002-05-29 08:21:33 +0000350#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
351 xmlSHRINK (ctxt);
352
353static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
354 xmlParserInputShrink(ctxt->input);
355 if ((*ctxt->input->cur == 0) &&
356 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
357 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000358 }
Owen Taylor3473f882001-02-23 17:55:21 +0000359
Daniel Veillard46de64e2002-05-29 08:21:33 +0000360#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
361 xmlGROW (ctxt);
362
363static void xmlGROW (xmlParserCtxtPtr ctxt) {
364 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
365 if ((*ctxt->input->cur == 0) &&
366 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
367 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000368 }
Owen Taylor3473f882001-02-23 17:55:21 +0000369
370#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
371
372#define NEXT xmlNextChar(ctxt)
373
Daniel Veillard21a0f912001-02-25 19:54:14 +0000374#define NEXT1 { \
375 ctxt->input->cur++; \
376 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000377 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000378 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
379 }
380
Owen Taylor3473f882001-02-23 17:55:21 +0000381#define NEXTL(l) do { \
382 if (*(ctxt->input->cur) == '\n') { \
383 ctxt->input->line++; ctxt->input->col = 1; \
384 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000385 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000386 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000387 } while (0)
388
389#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
390#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
391
392#define COPY_BUF(l,b,i,v) \
393 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000394 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000395
396/**
397 * xmlSkipBlankChars:
398 * @ctxt: the XML parser context
399 *
400 * skip all blanks character found at that point in the input streams.
401 * It pops up finished entities in the process if allowable at that point.
402 *
403 * Returns the number of space chars skipped
404 */
405
406int
407xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000408 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000409
410 /*
411 * It's Okay to use CUR/NEXT here since all the blanks are on
412 * the ASCII range.
413 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000414 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
415 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000416 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000417 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000418 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000419 cur = ctxt->input->cur;
420 while (IS_BLANK(*cur)) {
421 if (*cur == '\n') {
422 ctxt->input->line++; ctxt->input->col = 1;
423 }
424 cur++;
425 res++;
426 if (*cur == 0) {
427 ctxt->input->cur = cur;
428 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
429 cur = ctxt->input->cur;
430 }
431 }
432 ctxt->input->cur = cur;
433 } else {
434 int cur;
435 do {
436 cur = CUR;
437 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
438 NEXT;
439 cur = CUR;
440 res++;
441 }
442 while ((cur == 0) && (ctxt->inputNr > 1) &&
443 (ctxt->instate != XML_PARSER_COMMENT)) {
444 xmlPopInput(ctxt);
445 cur = CUR;
446 }
447 /*
448 * Need to handle support of entities branching here
449 */
450 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
451 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
452 }
Owen Taylor3473f882001-02-23 17:55:21 +0000453 return(res);
454}
455
456/************************************************************************
457 * *
458 * Commodity functions to handle entities *
459 * *
460 ************************************************************************/
461
462/**
463 * xmlPopInput:
464 * @ctxt: an XML parser context
465 *
466 * xmlPopInput: the current input pointed by ctxt->input came to an end
467 * pop it and return the next char.
468 *
469 * Returns the current xmlChar in the parser context
470 */
471xmlChar
472xmlPopInput(xmlParserCtxtPtr ctxt) {
473 if (ctxt->inputNr == 1) return(0); /* End of main Input */
474 if (xmlParserDebugEntities)
475 xmlGenericError(xmlGenericErrorContext,
476 "Popping input %d\n", ctxt->inputNr);
477 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000478 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000479 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
480 return(xmlPopInput(ctxt));
481 return(CUR);
482}
483
484/**
485 * xmlPushInput:
486 * @ctxt: an XML parser context
487 * @input: an XML parser input fragment (entity, XML fragment ...).
488 *
489 * xmlPushInput: switch to a new input stream which is stacked on top
490 * of the previous one(s).
491 */
492void
493xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
494 if (input == NULL) return;
495
496 if (xmlParserDebugEntities) {
497 if ((ctxt->input != NULL) && (ctxt->input->filename))
498 xmlGenericError(xmlGenericErrorContext,
499 "%s(%d): ", ctxt->input->filename,
500 ctxt->input->line);
501 xmlGenericError(xmlGenericErrorContext,
502 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
503 }
504 inputPush(ctxt, input);
505 GROW;
506}
507
508/**
509 * xmlParseCharRef:
510 * @ctxt: an XML parser context
511 *
512 * parse Reference declarations
513 *
514 * [66] CharRef ::= '&#' [0-9]+ ';' |
515 * '&#x' [0-9a-fA-F]+ ';'
516 *
517 * [ WFC: Legal Character ]
518 * Characters referred to using character references must match the
519 * production for Char.
520 *
521 * Returns the value parsed (as an int), 0 in case of error
522 */
523int
524xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000525 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000526 int count = 0;
527
Owen Taylor3473f882001-02-23 17:55:21 +0000528 /*
529 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
530 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000531 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000532 (NXT(2) == 'x')) {
533 SKIP(3);
534 GROW;
535 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000536 if (count++ > 20) {
537 count = 0;
538 GROW;
539 }
540 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000541 val = val * 16 + (CUR - '0');
542 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
543 val = val * 16 + (CUR - 'a') + 10;
544 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
545 val = val * 16 + (CUR - 'A') + 10;
546 else {
547 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
549 ctxt->sax->error(ctxt->userData,
550 "xmlParseCharRef: invalid hexadecimal value\n");
551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000553 val = 0;
554 break;
555 }
556 NEXT;
557 count++;
558 }
559 if (RAW == ';') {
560 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
561 ctxt->nbChars ++;
562 ctxt->input->cur++;
563 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000564 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000565 SKIP(2);
566 GROW;
567 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000568 if (count++ > 20) {
569 count = 0;
570 GROW;
571 }
572 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000573 val = val * 10 + (CUR - '0');
574 else {
575 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
577 ctxt->sax->error(ctxt->userData,
578 "xmlParseCharRef: invalid decimal value\n");
579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000581 val = 0;
582 break;
583 }
584 NEXT;
585 count++;
586 }
587 if (RAW == ';') {
588 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
589 ctxt->nbChars ++;
590 ctxt->input->cur++;
591 }
592 } else {
593 ctxt->errNo = XML_ERR_INVALID_CHARREF;
594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
595 ctxt->sax->error(ctxt->userData,
596 "xmlParseCharRef: invalid value\n");
597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000599 }
600
601 /*
602 * [ WFC: Legal Character ]
603 * Characters referred to using character references must match the
604 * production for Char.
605 */
606 if (IS_CHAR(val)) {
607 return(val);
608 } else {
609 ctxt->errNo = XML_ERR_INVALID_CHAR;
610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000611 ctxt->sax->error(ctxt->userData,
612 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000613 val);
614 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000615 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000616 }
617 return(0);
618}
619
620/**
621 * xmlParseStringCharRef:
622 * @ctxt: an XML parser context
623 * @str: a pointer to an index in the string
624 *
625 * parse Reference declarations, variant parsing from a string rather
626 * than an an input flow.
627 *
628 * [66] CharRef ::= '&#' [0-9]+ ';' |
629 * '&#x' [0-9a-fA-F]+ ';'
630 *
631 * [ WFC: Legal Character ]
632 * Characters referred to using character references must match the
633 * production for Char.
634 *
635 * Returns the value parsed (as an int), 0 in case of error, str will be
636 * updated to the current value of the index
637 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000638static int
Owen Taylor3473f882001-02-23 17:55:21 +0000639xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
640 const xmlChar *ptr;
641 xmlChar cur;
642 int val = 0;
643
644 if ((str == NULL) || (*str == NULL)) return(0);
645 ptr = *str;
646 cur = *ptr;
647 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
648 ptr += 3;
649 cur = *ptr;
650 while (cur != ';') { /* Non input consuming loop */
651 if ((cur >= '0') && (cur <= '9'))
652 val = val * 16 + (cur - '0');
653 else if ((cur >= 'a') && (cur <= 'f'))
654 val = val * 16 + (cur - 'a') + 10;
655 else if ((cur >= 'A') && (cur <= 'F'))
656 val = val * 16 + (cur - 'A') + 10;
657 else {
658 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
660 ctxt->sax->error(ctxt->userData,
661 "xmlParseStringCharRef: invalid hexadecimal value\n");
662 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000663 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000664 val = 0;
665 break;
666 }
667 ptr++;
668 cur = *ptr;
669 }
670 if (cur == ';')
671 ptr++;
672 } else if ((cur == '&') && (ptr[1] == '#')){
673 ptr += 2;
674 cur = *ptr;
675 while (cur != ';') { /* Non input consuming loops */
676 if ((cur >= '0') && (cur <= '9'))
677 val = val * 10 + (cur - '0');
678 else {
679 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
681 ctxt->sax->error(ctxt->userData,
682 "xmlParseStringCharRef: invalid decimal value\n");
683 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000684 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000685 val = 0;
686 break;
687 }
688 ptr++;
689 cur = *ptr;
690 }
691 if (cur == ';')
692 ptr++;
693 } else {
694 ctxt->errNo = XML_ERR_INVALID_CHARREF;
695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
696 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000697 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000698 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000699 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000700 return(0);
701 }
702 *str = ptr;
703
704 /*
705 * [ WFC: Legal Character ]
706 * Characters referred to using character references must match the
707 * production for Char.
708 */
709 if (IS_CHAR(val)) {
710 return(val);
711 } else {
712 ctxt->errNo = XML_ERR_INVALID_CHAR;
713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
714 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000715 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000716 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000717 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000718 }
719 return(0);
720}
721
722/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000723 * xmlNewBlanksWrapperInputStream:
724 * @ctxt: an XML parser context
725 * @entity: an Entity pointer
726 *
727 * Create a new input stream for wrapping
728 * blanks around a PEReference
729 *
730 * Returns the new input stream or NULL
731 */
732
733static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
734
Daniel Veillardf4862f02002-09-10 11:13:43 +0000735static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000736xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
737 xmlParserInputPtr input;
738 xmlChar *buffer;
739 size_t length;
740 if (entity == NULL) {
741 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
743 ctxt->sax->error(ctxt->userData,
744 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
745 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
746 return(NULL);
747 }
748 if (xmlParserDebugEntities)
749 xmlGenericError(xmlGenericErrorContext,
750 "new blanks wrapper for entity: %s\n", entity->name);
751 input = xmlNewInputStream(ctxt);
752 if (input == NULL) {
753 return(NULL);
754 }
755 length = xmlStrlen(entity->name) + 5;
756 buffer = xmlMalloc(length);
757 if (buffer == NULL) {
758 return(NULL);
759 }
760 buffer [0] = ' ';
761 buffer [1] = '%';
762 buffer [length-3] = ';';
763 buffer [length-2] = ' ';
764 buffer [length-1] = 0;
765 memcpy(buffer + 2, entity->name, length - 5);
766 input->free = deallocblankswrapper;
767 input->base = buffer;
768 input->cur = buffer;
769 input->length = length;
770 input->end = &buffer[length];
771 return(input);
772}
773
774/**
Owen Taylor3473f882001-02-23 17:55:21 +0000775 * xmlParserHandlePEReference:
776 * @ctxt: the parser context
777 *
778 * [69] PEReference ::= '%' Name ';'
779 *
780 * [ WFC: No Recursion ]
781 * A parsed entity must not contain a recursive
782 * reference to itself, either directly or indirectly.
783 *
784 * [ WFC: Entity Declared ]
785 * In a document without any DTD, a document with only an internal DTD
786 * subset which contains no parameter entity references, or a document
787 * with "standalone='yes'", ... ... The declaration of a parameter
788 * entity must precede any reference to it...
789 *
790 * [ VC: Entity Declared ]
791 * In a document with an external subset or external parameter entities
792 * with "standalone='no'", ... ... The declaration of a parameter entity
793 * must precede any reference to it...
794 *
795 * [ WFC: In DTD ]
796 * Parameter-entity references may only appear in the DTD.
797 * NOTE: misleading but this is handled.
798 *
799 * A PEReference may have been detected in the current input stream
800 * the handling is done accordingly to
801 * http://www.w3.org/TR/REC-xml#entproc
802 * i.e.
803 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000804 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000805 */
806void
807xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
808 xmlChar *name;
809 xmlEntityPtr entity = NULL;
810 xmlParserInputPtr input;
811
Owen Taylor3473f882001-02-23 17:55:21 +0000812 if (RAW != '%') return;
813 switch(ctxt->instate) {
814 case XML_PARSER_CDATA_SECTION:
815 return;
816 case XML_PARSER_COMMENT:
817 return;
818 case XML_PARSER_START_TAG:
819 return;
820 case XML_PARSER_END_TAG:
821 return;
822 case XML_PARSER_EOF:
823 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
825 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
826 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000827 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000828 return;
829 case XML_PARSER_PROLOG:
830 case XML_PARSER_START:
831 case XML_PARSER_MISC:
832 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
834 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
835 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000836 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000837 return;
838 case XML_PARSER_ENTITY_DECL:
839 case XML_PARSER_CONTENT:
840 case XML_PARSER_ATTRIBUTE_VALUE:
841 case XML_PARSER_PI:
842 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000843 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000844 /* we just ignore it there */
845 return;
846 case XML_PARSER_EPILOG:
847 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
850 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000851 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000852 return;
853 case XML_PARSER_ENTITY_VALUE:
854 /*
855 * NOTE: in the case of entity values, we don't do the
856 * substitution here since we need the literal
857 * entity value to be able to save the internal
858 * subset of the document.
859 * This will be handled by xmlStringDecodeEntities
860 */
861 return;
862 case XML_PARSER_DTD:
863 /*
864 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
865 * In the internal DTD subset, parameter-entity references
866 * can occur only where markup declarations can occur, not
867 * within markup declarations.
868 * In that case this is handled in xmlParseMarkupDecl
869 */
870 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
871 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000872 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
873 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000874 break;
875 case XML_PARSER_IGNORE:
876 return;
877 }
878
879 NEXT;
880 name = xmlParseName(ctxt);
881 if (xmlParserDebugEntities)
882 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000883 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000884 if (name == NULL) {
885 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000887 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000888 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000889 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000890 } else {
891 if (RAW == ';') {
892 NEXT;
893 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
894 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
895 if (entity == NULL) {
896
897 /*
898 * [ WFC: Entity Declared ]
899 * In a document without any DTD, a document with only an
900 * internal DTD subset which contains no parameter entity
901 * references, or a document with "standalone='yes'", ...
902 * ... The declaration of a parameter entity must precede
903 * any reference to it...
904 */
905 if ((ctxt->standalone == 1) ||
906 ((ctxt->hasExternalSubset == 0) &&
907 (ctxt->hasPErefs == 0))) {
908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
909 ctxt->sax->error(ctxt->userData,
910 "PEReference: %%%s; not found\n", name);
911 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000912 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000913 } else {
914 /*
915 * [ VC: Entity Declared ]
916 * In a document with an external subset or external
917 * parameter entities with "standalone='no'", ...
918 * ... The declaration of a parameter entity must precede
919 * any reference to it...
920 */
921 if ((!ctxt->disableSAX) &&
922 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
923 ctxt->vctxt.error(ctxt->vctxt.userData,
924 "PEReference: %%%s; not found\n", name);
925 } else if ((!ctxt->disableSAX) &&
926 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
927 ctxt->sax->warning(ctxt->userData,
928 "PEReference: %%%s; not found\n", name);
929 ctxt->valid = 0;
930 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000931 } else if (ctxt->input->free != deallocblankswrapper) {
932 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
933 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000934 } else {
935 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
936 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000937 xmlChar start[4];
938 xmlCharEncoding enc;
939
Owen Taylor3473f882001-02-23 17:55:21 +0000940 /*
941 * handle the extra spaces added before and after
942 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000943 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000944 */
945 input = xmlNewEntityInputStream(ctxt, entity);
946 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000947
948 /*
949 * Get the 4 first bytes and decode the charset
950 * if enc != XML_CHAR_ENCODING_NONE
951 * plug some encoding conversion routines.
952 */
953 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000954 if (entity->length >= 4) {
955 start[0] = RAW;
956 start[1] = NXT(1);
957 start[2] = NXT(2);
958 start[3] = NXT(3);
959 enc = xmlDetectCharEncoding(start, 4);
960 if (enc != XML_CHAR_ENCODING_NONE) {
961 xmlSwitchEncoding(ctxt, enc);
962 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000963 }
964
Owen Taylor3473f882001-02-23 17:55:21 +0000965 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
966 (RAW == '<') && (NXT(1) == '?') &&
967 (NXT(2) == 'x') && (NXT(3) == 'm') &&
968 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
969 xmlParseTextDecl(ctxt);
970 }
Owen Taylor3473f882001-02-23 17:55:21 +0000971 } else {
972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
973 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000974 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000975 name);
976 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000977 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000978 }
979 }
980 } else {
981 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
983 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000984 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000985 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000986 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000987 }
988 xmlFree(name);
989 }
990}
991
992/*
993 * Macro used to grow the current buffer.
994 */
995#define growBuffer(buffer) { \
996 buffer##_size *= 2; \
997 buffer = (xmlChar *) \
998 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
999 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001000 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001001 return(NULL); \
1002 } \
1003}
1004
1005/**
1006 * xmlStringDecodeEntities:
1007 * @ctxt: the parser context
1008 * @str: the input string
1009 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1010 * @end: an end marker xmlChar, 0 if none
1011 * @end2: an end marker xmlChar, 0 if none
1012 * @end3: an end marker xmlChar, 0 if none
1013 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001014 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001015 *
1016 * [67] Reference ::= EntityRef | CharRef
1017 *
1018 * [69] PEReference ::= '%' Name ';'
1019 *
1020 * Returns A newly allocated string with the substitution done. The caller
1021 * must deallocate it !
1022 */
1023xmlChar *
1024xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1025 xmlChar end, xmlChar end2, xmlChar end3) {
1026 xmlChar *buffer = NULL;
1027 int buffer_size = 0;
1028
1029 xmlChar *current = NULL;
1030 xmlEntityPtr ent;
1031 int c,l;
1032 int nbchars = 0;
1033
1034 if (str == NULL)
1035 return(NULL);
1036
1037 if (ctxt->depth > 40) {
1038 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1040 ctxt->sax->error(ctxt->userData,
1041 "Detected entity reference loop\n");
1042 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001044 return(NULL);
1045 }
1046
1047 /*
1048 * allocate a translation buffer.
1049 */
1050 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1051 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1052 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001053 xmlGenericError(xmlGenericErrorContext,
1054 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001055 return(NULL);
1056 }
1057
1058 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001059 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001060 * we are operating on already parsed values.
1061 */
1062 c = CUR_SCHAR(str, l);
1063 while ((c != 0) && (c != end) && /* non input consuming loop */
1064 (c != end2) && (c != end3)) {
1065
1066 if (c == 0) break;
1067 if ((c == '&') && (str[1] == '#')) {
1068 int val = xmlParseStringCharRef(ctxt, &str);
1069 if (val != 0) {
1070 COPY_BUF(0,buffer,nbchars,val);
1071 }
1072 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1073 if (xmlParserDebugEntities)
1074 xmlGenericError(xmlGenericErrorContext,
1075 "String decoding Entity Reference: %.30s\n",
1076 str);
1077 ent = xmlParseStringEntityRef(ctxt, &str);
1078 if ((ent != NULL) &&
1079 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1080 if (ent->content != NULL) {
1081 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1082 } else {
1083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1084 ctxt->sax->error(ctxt->userData,
1085 "internal error entity has no content\n");
1086 }
1087 } else if ((ent != NULL) && (ent->content != NULL)) {
1088 xmlChar *rep;
1089
1090 ctxt->depth++;
1091 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1092 0, 0, 0);
1093 ctxt->depth--;
1094 if (rep != NULL) {
1095 current = rep;
1096 while (*current != 0) { /* non input consuming loop */
1097 buffer[nbchars++] = *current++;
1098 if (nbchars >
1099 buffer_size - XML_PARSER_BUFFER_SIZE) {
1100 growBuffer(buffer);
1101 }
1102 }
1103 xmlFree(rep);
1104 }
1105 } else if (ent != NULL) {
1106 int i = xmlStrlen(ent->name);
1107 const xmlChar *cur = ent->name;
1108
1109 buffer[nbchars++] = '&';
1110 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1111 growBuffer(buffer);
1112 }
1113 for (;i > 0;i--)
1114 buffer[nbchars++] = *cur++;
1115 buffer[nbchars++] = ';';
1116 }
1117 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1118 if (xmlParserDebugEntities)
1119 xmlGenericError(xmlGenericErrorContext,
1120 "String decoding PE Reference: %.30s\n", str);
1121 ent = xmlParseStringPEReference(ctxt, &str);
1122 if (ent != NULL) {
1123 xmlChar *rep;
1124
1125 ctxt->depth++;
1126 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1127 0, 0, 0);
1128 ctxt->depth--;
1129 if (rep != NULL) {
1130 current = rep;
1131 while (*current != 0) { /* non input consuming loop */
1132 buffer[nbchars++] = *current++;
1133 if (nbchars >
1134 buffer_size - XML_PARSER_BUFFER_SIZE) {
1135 growBuffer(buffer);
1136 }
1137 }
1138 xmlFree(rep);
1139 }
1140 }
1141 } else {
1142 COPY_BUF(l,buffer,nbchars,c);
1143 str += l;
1144 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1145 growBuffer(buffer);
1146 }
1147 }
1148 c = CUR_SCHAR(str, l);
1149 }
1150 buffer[nbchars++] = 0;
1151 return(buffer);
1152}
1153
1154
1155/************************************************************************
1156 * *
1157 * Commodity functions to handle xmlChars *
1158 * *
1159 ************************************************************************/
1160
1161/**
1162 * xmlStrndup:
1163 * @cur: the input xmlChar *
1164 * @len: the len of @cur
1165 *
1166 * a strndup for array of xmlChar's
1167 *
1168 * Returns a new xmlChar * or NULL
1169 */
1170xmlChar *
1171xmlStrndup(const xmlChar *cur, int len) {
1172 xmlChar *ret;
1173
1174 if ((cur == NULL) || (len < 0)) return(NULL);
1175 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1176 if (ret == NULL) {
1177 xmlGenericError(xmlGenericErrorContext,
1178 "malloc of %ld byte failed\n",
1179 (len + 1) * (long)sizeof(xmlChar));
1180 return(NULL);
1181 }
1182 memcpy(ret, cur, len * sizeof(xmlChar));
1183 ret[len] = 0;
1184 return(ret);
1185}
1186
1187/**
1188 * xmlStrdup:
1189 * @cur: the input xmlChar *
1190 *
1191 * a strdup for array of xmlChar's. Since they are supposed to be
1192 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1193 * a termination mark of '0'.
1194 *
1195 * Returns a new xmlChar * or NULL
1196 */
1197xmlChar *
1198xmlStrdup(const xmlChar *cur) {
1199 const xmlChar *p = cur;
1200
1201 if (cur == NULL) return(NULL);
1202 while (*p != 0) p++; /* non input consuming */
1203 return(xmlStrndup(cur, p - cur));
1204}
1205
1206/**
1207 * xmlCharStrndup:
1208 * @cur: the input char *
1209 * @len: the len of @cur
1210 *
1211 * a strndup for char's to xmlChar's
1212 *
1213 * Returns a new xmlChar * or NULL
1214 */
1215
1216xmlChar *
1217xmlCharStrndup(const char *cur, int len) {
1218 int i;
1219 xmlChar *ret;
1220
1221 if ((cur == NULL) || (len < 0)) return(NULL);
1222 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1223 if (ret == NULL) {
1224 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1225 (len + 1) * (long)sizeof(xmlChar));
1226 return(NULL);
1227 }
1228 for (i = 0;i < len;i++)
1229 ret[i] = (xmlChar) cur[i];
1230 ret[len] = 0;
1231 return(ret);
1232}
1233
1234/**
1235 * xmlCharStrdup:
1236 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001237 *
1238 * a strdup for char's to xmlChar's
1239 *
1240 * Returns a new xmlChar * or NULL
1241 */
1242
1243xmlChar *
1244xmlCharStrdup(const char *cur) {
1245 const char *p = cur;
1246
1247 if (cur == NULL) return(NULL);
1248 while (*p != '\0') p++; /* non input consuming */
1249 return(xmlCharStrndup(cur, p - cur));
1250}
1251
1252/**
1253 * xmlStrcmp:
1254 * @str1: the first xmlChar *
1255 * @str2: the second xmlChar *
1256 *
1257 * a strcmp for xmlChar's
1258 *
1259 * Returns the integer result of the comparison
1260 */
1261
1262int
1263xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1264 register int tmp;
1265
1266 if (str1 == str2) return(0);
1267 if (str1 == NULL) return(-1);
1268 if (str2 == NULL) return(1);
1269 do {
1270 tmp = *str1++ - *str2;
1271 if (tmp != 0) return(tmp);
1272 } while (*str2++ != 0);
1273 return 0;
1274}
1275
1276/**
1277 * xmlStrEqual:
1278 * @str1: the first xmlChar *
1279 * @str2: the second xmlChar *
1280 *
1281 * Check if both string are equal of have same content
1282 * Should be a bit more readable and faster than xmlStrEqual()
1283 *
1284 * Returns 1 if they are equal, 0 if they are different
1285 */
1286
1287int
1288xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1289 if (str1 == str2) return(1);
1290 if (str1 == NULL) return(0);
1291 if (str2 == NULL) return(0);
1292 do {
1293 if (*str1++ != *str2) return(0);
1294 } while (*str2++);
1295 return(1);
1296}
1297
1298/**
1299 * xmlStrncmp:
1300 * @str1: the first xmlChar *
1301 * @str2: the second xmlChar *
1302 * @len: the max comparison length
1303 *
1304 * a strncmp for xmlChar's
1305 *
1306 * Returns the integer result of the comparison
1307 */
1308
1309int
1310xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1311 register int tmp;
1312
1313 if (len <= 0) return(0);
1314 if (str1 == str2) return(0);
1315 if (str1 == NULL) return(-1);
1316 if (str2 == NULL) return(1);
1317 do {
1318 tmp = *str1++ - *str2;
1319 if (tmp != 0 || --len == 0) return(tmp);
1320 } while (*str2++ != 0);
1321 return 0;
1322}
1323
Daniel Veillardb44025c2001-10-11 22:55:55 +00001324static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001325 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1326 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1327 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1328 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1329 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1330 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1331 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1332 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1333 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1334 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1335 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1336 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1337 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1338 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1339 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1340 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1341 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1342 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1343 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1344 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1345 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1346 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1347 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1348 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1349 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1350 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1351 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1352 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1353 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1354 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1355 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1356 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1357};
1358
1359/**
1360 * xmlStrcasecmp:
1361 * @str1: the first xmlChar *
1362 * @str2: the second xmlChar *
1363 *
1364 * a strcasecmp for xmlChar's
1365 *
1366 * Returns the integer result of the comparison
1367 */
1368
1369int
1370xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1371 register int tmp;
1372
1373 if (str1 == str2) return(0);
1374 if (str1 == NULL) return(-1);
1375 if (str2 == NULL) return(1);
1376 do {
1377 tmp = casemap[*str1++] - casemap[*str2];
1378 if (tmp != 0) return(tmp);
1379 } while (*str2++ != 0);
1380 return 0;
1381}
1382
1383/**
1384 * xmlStrncasecmp:
1385 * @str1: the first xmlChar *
1386 * @str2: the second xmlChar *
1387 * @len: the max comparison length
1388 *
1389 * a strncasecmp for xmlChar's
1390 *
1391 * Returns the integer result of the comparison
1392 */
1393
1394int
1395xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1396 register int tmp;
1397
1398 if (len <= 0) return(0);
1399 if (str1 == str2) return(0);
1400 if (str1 == NULL) return(-1);
1401 if (str2 == NULL) return(1);
1402 do {
1403 tmp = casemap[*str1++] - casemap[*str2];
1404 if (tmp != 0 || --len == 0) return(tmp);
1405 } while (*str2++ != 0);
1406 return 0;
1407}
1408
1409/**
1410 * xmlStrchr:
1411 * @str: the xmlChar * array
1412 * @val: the xmlChar to search
1413 *
1414 * a strchr for xmlChar's
1415 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001416 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001417 */
1418
1419const xmlChar *
1420xmlStrchr(const xmlChar *str, xmlChar val) {
1421 if (str == NULL) return(NULL);
1422 while (*str != 0) { /* non input consuming */
1423 if (*str == val) return((xmlChar *) str);
1424 str++;
1425 }
1426 return(NULL);
1427}
1428
1429/**
1430 * xmlStrstr:
1431 * @str: the xmlChar * array (haystack)
1432 * @val: the xmlChar to search (needle)
1433 *
1434 * a strstr for xmlChar's
1435 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001436 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001437 */
1438
1439const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001440xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001441 int n;
1442
1443 if (str == NULL) return(NULL);
1444 if (val == NULL) return(NULL);
1445 n = xmlStrlen(val);
1446
1447 if (n == 0) return(str);
1448 while (*str != 0) { /* non input consuming */
1449 if (*str == *val) {
1450 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1451 }
1452 str++;
1453 }
1454 return(NULL);
1455}
1456
1457/**
1458 * xmlStrcasestr:
1459 * @str: the xmlChar * array (haystack)
1460 * @val: the xmlChar to search (needle)
1461 *
1462 * a case-ignoring strstr for xmlChar's
1463 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001464 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001465 */
1466
1467const xmlChar *
1468xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1469 int n;
1470
1471 if (str == NULL) return(NULL);
1472 if (val == NULL) return(NULL);
1473 n = xmlStrlen(val);
1474
1475 if (n == 0) return(str);
1476 while (*str != 0) { /* non input consuming */
1477 if (casemap[*str] == casemap[*val])
1478 if (!xmlStrncasecmp(str, val, n)) return(str);
1479 str++;
1480 }
1481 return(NULL);
1482}
1483
1484/**
1485 * xmlStrsub:
1486 * @str: the xmlChar * array (haystack)
1487 * @start: the index of the first char (zero based)
1488 * @len: the length of the substring
1489 *
1490 * Extract a substring of a given string
1491 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001492 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001493 */
1494
1495xmlChar *
1496xmlStrsub(const xmlChar *str, int start, int len) {
1497 int i;
1498
1499 if (str == NULL) return(NULL);
1500 if (start < 0) return(NULL);
1501 if (len < 0) return(NULL);
1502
1503 for (i = 0;i < start;i++) {
1504 if (*str == 0) return(NULL);
1505 str++;
1506 }
1507 if (*str == 0) return(NULL);
1508 return(xmlStrndup(str, len));
1509}
1510
1511/**
1512 * xmlStrlen:
1513 * @str: the xmlChar * array
1514 *
1515 * length of a xmlChar's string
1516 *
1517 * Returns the number of xmlChar contained in the ARRAY.
1518 */
1519
1520int
1521xmlStrlen(const xmlChar *str) {
1522 int len = 0;
1523
1524 if (str == NULL) return(0);
1525 while (*str != 0) { /* non input consuming */
1526 str++;
1527 len++;
1528 }
1529 return(len);
1530}
1531
1532/**
1533 * xmlStrncat:
1534 * @cur: the original xmlChar * array
1535 * @add: the xmlChar * array added
1536 * @len: the length of @add
1537 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001538 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001539 * first bytes of @add.
1540 *
1541 * Returns a new xmlChar *, the original @cur is reallocated if needed
1542 * and should not be freed
1543 */
1544
1545xmlChar *
1546xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1547 int size;
1548 xmlChar *ret;
1549
1550 if ((add == NULL) || (len == 0))
1551 return(cur);
1552 if (cur == NULL)
1553 return(xmlStrndup(add, len));
1554
1555 size = xmlStrlen(cur);
1556 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1557 if (ret == NULL) {
1558 xmlGenericError(xmlGenericErrorContext,
1559 "xmlStrncat: realloc of %ld byte failed\n",
1560 (size + len + 1) * (long)sizeof(xmlChar));
1561 return(cur);
1562 }
1563 memcpy(&ret[size], add, len * sizeof(xmlChar));
1564 ret[size + len] = 0;
1565 return(ret);
1566}
1567
1568/**
1569 * xmlStrcat:
1570 * @cur: the original xmlChar * array
1571 * @add: the xmlChar * array added
1572 *
1573 * a strcat for array of xmlChar's. Since they are supposed to be
1574 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1575 * a termination mark of '0'.
1576 *
1577 * Returns a new xmlChar * containing the concatenated string.
1578 */
1579xmlChar *
1580xmlStrcat(xmlChar *cur, const xmlChar *add) {
1581 const xmlChar *p = add;
1582
1583 if (add == NULL) return(cur);
1584 if (cur == NULL)
1585 return(xmlStrdup(add));
1586
1587 while (*p != 0) p++; /* non input consuming */
1588 return(xmlStrncat(cur, add, p - add));
1589}
1590
1591/************************************************************************
1592 * *
1593 * Commodity functions, cleanup needed ? *
1594 * *
1595 ************************************************************************/
1596
1597/**
1598 * areBlanks:
1599 * @ctxt: an XML parser context
1600 * @str: a xmlChar *
1601 * @len: the size of @str
1602 *
1603 * Is this a sequence of blank chars that one can ignore ?
1604 *
1605 * Returns 1 if ignorable 0 otherwise.
1606 */
1607
1608static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1609 int i, ret;
1610 xmlNodePtr lastChild;
1611
Daniel Veillard05c13a22001-09-09 08:38:09 +00001612 /*
1613 * Don't spend time trying to differentiate them, the same callback is
1614 * used !
1615 */
1616 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001617 return(0);
1618
Owen Taylor3473f882001-02-23 17:55:21 +00001619 /*
1620 * Check for xml:space value.
1621 */
1622 if (*(ctxt->space) == 1)
1623 return(0);
1624
1625 /*
1626 * Check that the string is made of blanks
1627 */
1628 for (i = 0;i < len;i++)
1629 if (!(IS_BLANK(str[i]))) return(0);
1630
1631 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001632 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001633 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001634 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001635 if (ctxt->myDoc != NULL) {
1636 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1637 if (ret == 0) return(1);
1638 if (ret == 1) return(0);
1639 }
1640
1641 /*
1642 * Otherwise, heuristic :-\
1643 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001644 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001645 if ((ctxt->node->children == NULL) &&
1646 (RAW == '<') && (NXT(1) == '/')) return(0);
1647
1648 lastChild = xmlGetLastChild(ctxt->node);
1649 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001650 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1651 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001652 } else if (xmlNodeIsText(lastChild))
1653 return(0);
1654 else if ((ctxt->node->children != NULL) &&
1655 (xmlNodeIsText(ctxt->node->children)))
1656 return(0);
1657 return(1);
1658}
1659
Owen Taylor3473f882001-02-23 17:55:21 +00001660/************************************************************************
1661 * *
1662 * Extra stuff for namespace support *
1663 * Relates to http://www.w3.org/TR/WD-xml-names *
1664 * *
1665 ************************************************************************/
1666
1667/**
1668 * xmlSplitQName:
1669 * @ctxt: an XML parser context
1670 * @name: an XML parser context
1671 * @prefix: a xmlChar **
1672 *
1673 * parse an UTF8 encoded XML qualified name string
1674 *
1675 * [NS 5] QName ::= (Prefix ':')? LocalPart
1676 *
1677 * [NS 6] Prefix ::= NCName
1678 *
1679 * [NS 7] LocalPart ::= NCName
1680 *
1681 * Returns the local part, and prefix is updated
1682 * to get the Prefix if any.
1683 */
1684
1685xmlChar *
1686xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1687 xmlChar buf[XML_MAX_NAMELEN + 5];
1688 xmlChar *buffer = NULL;
1689 int len = 0;
1690 int max = XML_MAX_NAMELEN;
1691 xmlChar *ret = NULL;
1692 const xmlChar *cur = name;
1693 int c;
1694
1695 *prefix = NULL;
1696
1697#ifndef XML_XML_NAMESPACE
1698 /* xml: prefix is not really a namespace */
1699 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1700 (cur[2] == 'l') && (cur[3] == ':'))
1701 return(xmlStrdup(name));
1702#endif
1703
1704 /* nasty but valid */
1705 if (cur[0] == ':')
1706 return(xmlStrdup(name));
1707
1708 c = *cur++;
1709 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1710 buf[len++] = c;
1711 c = *cur++;
1712 }
1713 if (len >= max) {
1714 /*
1715 * Okay someone managed to make a huge name, so he's ready to pay
1716 * for the processing speed.
1717 */
1718 max = len * 2;
1719
1720 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1721 if (buffer == NULL) {
1722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1723 ctxt->sax->error(ctxt->userData,
1724 "xmlSplitQName: out of memory\n");
1725 return(NULL);
1726 }
1727 memcpy(buffer, buf, len);
1728 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1729 if (len + 10 > max) {
1730 max *= 2;
1731 buffer = (xmlChar *) xmlRealloc(buffer,
1732 max * sizeof(xmlChar));
1733 if (buffer == NULL) {
1734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1735 ctxt->sax->error(ctxt->userData,
1736 "xmlSplitQName: out of memory\n");
1737 return(NULL);
1738 }
1739 }
1740 buffer[len++] = c;
1741 c = *cur++;
1742 }
1743 buffer[len] = 0;
1744 }
1745
1746 if (buffer == NULL)
1747 ret = xmlStrndup(buf, len);
1748 else {
1749 ret = buffer;
1750 buffer = NULL;
1751 max = XML_MAX_NAMELEN;
1752 }
1753
1754
1755 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001756 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001757 if (c == 0) return(ret);
1758 *prefix = ret;
1759 len = 0;
1760
Daniel Veillardbb284f42002-10-16 18:02:47 +00001761 /*
1762 * Check that the first character is proper to start
1763 * a new name
1764 */
1765 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1766 ((c >= 0x41) && (c <= 0x5A)) ||
1767 (c == '_') || (c == ':'))) {
1768 int l;
1769 int first = CUR_SCHAR(cur, l);
1770
1771 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001772 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1773 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001774 ctxt->sax->error(ctxt->userData,
1775 "Name %s is not XML Namespace compliant\n",
1776 name);
1777 }
1778 }
1779 cur++;
1780
Owen Taylor3473f882001-02-23 17:55:21 +00001781 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1782 buf[len++] = c;
1783 c = *cur++;
1784 }
1785 if (len >= max) {
1786 /*
1787 * Okay someone managed to make a huge name, so he's ready to pay
1788 * for the processing speed.
1789 */
1790 max = len * 2;
1791
1792 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1793 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001794 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1795 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001796 ctxt->sax->error(ctxt->userData,
1797 "xmlSplitQName: out of memory\n");
1798 return(NULL);
1799 }
1800 memcpy(buffer, buf, len);
1801 while (c != 0) { /* tested bigname2.xml */
1802 if (len + 10 > max) {
1803 max *= 2;
1804 buffer = (xmlChar *) xmlRealloc(buffer,
1805 max * sizeof(xmlChar));
1806 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001807 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1808 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001809 ctxt->sax->error(ctxt->userData,
1810 "xmlSplitQName: out of memory\n");
1811 return(NULL);
1812 }
1813 }
1814 buffer[len++] = c;
1815 c = *cur++;
1816 }
1817 buffer[len] = 0;
1818 }
1819
1820 if (buffer == NULL)
1821 ret = xmlStrndup(buf, len);
1822 else {
1823 ret = buffer;
1824 }
1825 }
1826
1827 return(ret);
1828}
1829
1830/************************************************************************
1831 * *
1832 * The parser itself *
1833 * Relates to http://www.w3.org/TR/REC-xml *
1834 * *
1835 ************************************************************************/
1836
Daniel Veillard76d66f42001-05-16 21:05:17 +00001837static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001838/**
1839 * xmlParseName:
1840 * @ctxt: an XML parser context
1841 *
1842 * parse an XML name.
1843 *
1844 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1845 * CombiningChar | Extender
1846 *
1847 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1848 *
1849 * [6] Names ::= Name (S Name)*
1850 *
1851 * Returns the Name parsed or NULL
1852 */
1853
1854xmlChar *
1855xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001856 const xmlChar *in;
1857 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001858 int count = 0;
1859
1860 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001861
1862 /*
1863 * Accelerator for simple ASCII names
1864 */
1865 in = ctxt->input->cur;
1866 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1867 ((*in >= 0x41) && (*in <= 0x5A)) ||
1868 (*in == '_') || (*in == ':')) {
1869 in++;
1870 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1871 ((*in >= 0x41) && (*in <= 0x5A)) ||
1872 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001873 (*in == '_') || (*in == '-') ||
1874 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001875 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001876 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001877 count = in - ctxt->input->cur;
1878 ret = xmlStrndup(ctxt->input->cur, count);
1879 ctxt->input->cur = in;
1880 return(ret);
1881 }
1882 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001883 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001884}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001885
Daniel Veillard46de64e2002-05-29 08:21:33 +00001886/**
1887 * xmlParseNameAndCompare:
1888 * @ctxt: an XML parser context
1889 *
1890 * parse an XML name and compares for match
1891 * (specialized for endtag parsing)
1892 *
1893 *
1894 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1895 * and the name for mismatch
1896 */
1897
Daniel Veillardf4862f02002-09-10 11:13:43 +00001898static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001899xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1900 const xmlChar *cmp = other;
1901 const xmlChar *in;
1902 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001903
1904 GROW;
1905
1906 in = ctxt->input->cur;
1907 while (*in != 0 && *in == *cmp) {
1908 ++in;
1909 ++cmp;
1910 }
1911 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1912 /* success */
1913 ctxt->input->cur = in;
1914 return (xmlChar*) 1;
1915 }
1916 /* failure (or end of input buffer), check with full function */
1917 ret = xmlParseName (ctxt);
1918 if (ret != 0 && xmlStrEqual (ret, other)) {
1919 xmlFree (ret);
1920 return (xmlChar*) 1;
1921 }
1922 return ret;
1923}
1924
Daniel Veillard76d66f42001-05-16 21:05:17 +00001925static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001926xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1927 xmlChar buf[XML_MAX_NAMELEN + 5];
1928 int len = 0, l;
1929 int c;
1930 int count = 0;
1931
1932 /*
1933 * Handler for more complex cases
1934 */
1935 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001936 c = CUR_CHAR(l);
1937 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1938 (!IS_LETTER(c) && (c != '_') &&
1939 (c != ':'))) {
1940 return(NULL);
1941 }
1942
1943 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1944 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1945 (c == '.') || (c == '-') ||
1946 (c == '_') || (c == ':') ||
1947 (IS_COMBINING(c)) ||
1948 (IS_EXTENDER(c)))) {
1949 if (count++ > 100) {
1950 count = 0;
1951 GROW;
1952 }
1953 COPY_BUF(l,buf,len,c);
1954 NEXTL(l);
1955 c = CUR_CHAR(l);
1956 if (len >= XML_MAX_NAMELEN) {
1957 /*
1958 * Okay someone managed to make a huge name, so he's ready to pay
1959 * for the processing speed.
1960 */
1961 xmlChar *buffer;
1962 int max = len * 2;
1963
1964 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1965 if (buffer == NULL) {
1966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1967 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001968 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001969 return(NULL);
1970 }
1971 memcpy(buffer, buf, len);
1972 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1973 (c == '.') || (c == '-') ||
1974 (c == '_') || (c == ':') ||
1975 (IS_COMBINING(c)) ||
1976 (IS_EXTENDER(c))) {
1977 if (count++ > 100) {
1978 count = 0;
1979 GROW;
1980 }
1981 if (len + 10 > max) {
1982 max *= 2;
1983 buffer = (xmlChar *) xmlRealloc(buffer,
1984 max * sizeof(xmlChar));
1985 if (buffer == NULL) {
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001988 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001989 return(NULL);
1990 }
1991 }
1992 COPY_BUF(l,buffer,len,c);
1993 NEXTL(l);
1994 c = CUR_CHAR(l);
1995 }
1996 buffer[len] = 0;
1997 return(buffer);
1998 }
1999 }
2000 return(xmlStrndup(buf, len));
2001}
2002
2003/**
2004 * xmlParseStringName:
2005 * @ctxt: an XML parser context
2006 * @str: a pointer to the string pointer (IN/OUT)
2007 *
2008 * parse an XML name.
2009 *
2010 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2011 * CombiningChar | Extender
2012 *
2013 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2014 *
2015 * [6] Names ::= Name (S Name)*
2016 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002017 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002018 * is updated to the current location in the string.
2019 */
2020
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002021static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002022xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2023 xmlChar buf[XML_MAX_NAMELEN + 5];
2024 const xmlChar *cur = *str;
2025 int len = 0, l;
2026 int c;
2027
2028 c = CUR_SCHAR(cur, l);
2029 if (!IS_LETTER(c) && (c != '_') &&
2030 (c != ':')) {
2031 return(NULL);
2032 }
2033
2034 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2035 (c == '.') || (c == '-') ||
2036 (c == '_') || (c == ':') ||
2037 (IS_COMBINING(c)) ||
2038 (IS_EXTENDER(c))) {
2039 COPY_BUF(l,buf,len,c);
2040 cur += l;
2041 c = CUR_SCHAR(cur, l);
2042 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2043 /*
2044 * Okay someone managed to make a huge name, so he's ready to pay
2045 * for the processing speed.
2046 */
2047 xmlChar *buffer;
2048 int max = len * 2;
2049
2050 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2051 if (buffer == NULL) {
2052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2053 ctxt->sax->error(ctxt->userData,
2054 "xmlParseStringName: out of memory\n");
2055 return(NULL);
2056 }
2057 memcpy(buffer, buf, len);
2058 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2059 (c == '.') || (c == '-') ||
2060 (c == '_') || (c == ':') ||
2061 (IS_COMBINING(c)) ||
2062 (IS_EXTENDER(c))) {
2063 if (len + 10 > max) {
2064 max *= 2;
2065 buffer = (xmlChar *) xmlRealloc(buffer,
2066 max * sizeof(xmlChar));
2067 if (buffer == NULL) {
2068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2069 ctxt->sax->error(ctxt->userData,
2070 "xmlParseStringName: out of memory\n");
2071 return(NULL);
2072 }
2073 }
2074 COPY_BUF(l,buffer,len,c);
2075 cur += l;
2076 c = CUR_SCHAR(cur, l);
2077 }
2078 buffer[len] = 0;
2079 *str = cur;
2080 return(buffer);
2081 }
2082 }
2083 *str = cur;
2084 return(xmlStrndup(buf, len));
2085}
2086
2087/**
2088 * xmlParseNmtoken:
2089 * @ctxt: an XML parser context
2090 *
2091 * parse an XML Nmtoken.
2092 *
2093 * [7] Nmtoken ::= (NameChar)+
2094 *
2095 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2096 *
2097 * Returns the Nmtoken parsed or NULL
2098 */
2099
2100xmlChar *
2101xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2102 xmlChar buf[XML_MAX_NAMELEN + 5];
2103 int len = 0, l;
2104 int c;
2105 int count = 0;
2106
2107 GROW;
2108 c = CUR_CHAR(l);
2109
2110 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2111 (c == '.') || (c == '-') ||
2112 (c == '_') || (c == ':') ||
2113 (IS_COMBINING(c)) ||
2114 (IS_EXTENDER(c))) {
2115 if (count++ > 100) {
2116 count = 0;
2117 GROW;
2118 }
2119 COPY_BUF(l,buf,len,c);
2120 NEXTL(l);
2121 c = CUR_CHAR(l);
2122 if (len >= XML_MAX_NAMELEN) {
2123 /*
2124 * Okay someone managed to make a huge token, so he's ready to pay
2125 * for the processing speed.
2126 */
2127 xmlChar *buffer;
2128 int max = len * 2;
2129
2130 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2131 if (buffer == NULL) {
2132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2133 ctxt->sax->error(ctxt->userData,
2134 "xmlParseNmtoken: out of memory\n");
2135 return(NULL);
2136 }
2137 memcpy(buffer, buf, len);
2138 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2139 (c == '.') || (c == '-') ||
2140 (c == '_') || (c == ':') ||
2141 (IS_COMBINING(c)) ||
2142 (IS_EXTENDER(c))) {
2143 if (count++ > 100) {
2144 count = 0;
2145 GROW;
2146 }
2147 if (len + 10 > max) {
2148 max *= 2;
2149 buffer = (xmlChar *) xmlRealloc(buffer,
2150 max * sizeof(xmlChar));
2151 if (buffer == NULL) {
2152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2153 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002154 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002155 return(NULL);
2156 }
2157 }
2158 COPY_BUF(l,buffer,len,c);
2159 NEXTL(l);
2160 c = CUR_CHAR(l);
2161 }
2162 buffer[len] = 0;
2163 return(buffer);
2164 }
2165 }
2166 if (len == 0)
2167 return(NULL);
2168 return(xmlStrndup(buf, len));
2169}
2170
2171/**
2172 * xmlParseEntityValue:
2173 * @ctxt: an XML parser context
2174 * @orig: if non-NULL store a copy of the original entity value
2175 *
2176 * parse a value for ENTITY declarations
2177 *
2178 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2179 * "'" ([^%&'] | PEReference | Reference)* "'"
2180 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002181 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002182 */
2183
2184xmlChar *
2185xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2186 xmlChar *buf = NULL;
2187 int len = 0;
2188 int size = XML_PARSER_BUFFER_SIZE;
2189 int c, l;
2190 xmlChar stop;
2191 xmlChar *ret = NULL;
2192 const xmlChar *cur = NULL;
2193 xmlParserInputPtr input;
2194
2195 if (RAW == '"') stop = '"';
2196 else if (RAW == '\'') stop = '\'';
2197 else {
2198 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2200 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2201 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002202 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 return(NULL);
2204 }
2205 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2206 if (buf == NULL) {
2207 xmlGenericError(xmlGenericErrorContext,
2208 "malloc of %d byte failed\n", size);
2209 return(NULL);
2210 }
2211
2212 /*
2213 * The content of the entity definition is copied in a buffer.
2214 */
2215
2216 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2217 input = ctxt->input;
2218 GROW;
2219 NEXT;
2220 c = CUR_CHAR(l);
2221 /*
2222 * NOTE: 4.4.5 Included in Literal
2223 * When a parameter entity reference appears in a literal entity
2224 * value, ... a single or double quote character in the replacement
2225 * text is always treated as a normal data character and will not
2226 * terminate the literal.
2227 * In practice it means we stop the loop only when back at parsing
2228 * the initial entity and the quote is found
2229 */
2230 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2231 (ctxt->input != input))) {
2232 if (len + 5 >= size) {
2233 size *= 2;
2234 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2235 if (buf == NULL) {
2236 xmlGenericError(xmlGenericErrorContext,
2237 "realloc of %d byte failed\n", size);
2238 return(NULL);
2239 }
2240 }
2241 COPY_BUF(l,buf,len,c);
2242 NEXTL(l);
2243 /*
2244 * Pop-up of finished entities.
2245 */
2246 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2247 xmlPopInput(ctxt);
2248
2249 GROW;
2250 c = CUR_CHAR(l);
2251 if (c == 0) {
2252 GROW;
2253 c = CUR_CHAR(l);
2254 }
2255 }
2256 buf[len] = 0;
2257
2258 /*
2259 * Raise problem w.r.t. '&' and '%' being used in non-entities
2260 * reference constructs. Note Charref will be handled in
2261 * xmlStringDecodeEntities()
2262 */
2263 cur = buf;
2264 while (*cur != 0) { /* non input consuming */
2265 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2266 xmlChar *name;
2267 xmlChar tmp = *cur;
2268
2269 cur++;
2270 name = xmlParseStringName(ctxt, &cur);
2271 if ((name == NULL) || (*cur != ';')) {
2272 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2274 ctxt->sax->error(ctxt->userData,
2275 "EntityValue: '%c' forbidden except for entities references\n",
2276 tmp);
2277 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002278 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002279 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002280 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2281 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002282 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2284 ctxt->sax->error(ctxt->userData,
2285 "EntityValue: PEReferences forbidden in internal subset\n",
2286 tmp);
2287 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002288 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002289 }
2290 if (name != NULL)
2291 xmlFree(name);
2292 }
2293 cur++;
2294 }
2295
2296 /*
2297 * Then PEReference entities are substituted.
2298 */
2299 if (c != stop) {
2300 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2302 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2303 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002305 xmlFree(buf);
2306 } else {
2307 NEXT;
2308 /*
2309 * NOTE: 4.4.7 Bypassed
2310 * When a general entity reference appears in the EntityValue in
2311 * an entity declaration, it is bypassed and left as is.
2312 * so XML_SUBSTITUTE_REF is not set here.
2313 */
2314 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2315 0, 0, 0);
2316 if (orig != NULL)
2317 *orig = buf;
2318 else
2319 xmlFree(buf);
2320 }
2321
2322 return(ret);
2323}
2324
2325/**
2326 * xmlParseAttValue:
2327 * @ctxt: an XML parser context
2328 *
2329 * parse a value for an attribute
2330 * Note: the parser won't do substitution of entities here, this
2331 * will be handled later in xmlStringGetNodeList
2332 *
2333 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2334 * "'" ([^<&'] | Reference)* "'"
2335 *
2336 * 3.3.3 Attribute-Value Normalization:
2337 * Before the value of an attribute is passed to the application or
2338 * checked for validity, the XML processor must normalize it as follows:
2339 * - a character reference is processed by appending the referenced
2340 * character to the attribute value
2341 * - an entity reference is processed by recursively processing the
2342 * replacement text of the entity
2343 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2344 * appending #x20 to the normalized value, except that only a single
2345 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2346 * parsed entity or the literal entity value of an internal parsed entity
2347 * - other characters are processed by appending them to the normalized value
2348 * If the declared value is not CDATA, then the XML processor must further
2349 * process the normalized attribute value by discarding any leading and
2350 * trailing space (#x20) characters, and by replacing sequences of space
2351 * (#x20) characters by a single space (#x20) character.
2352 * All attributes for which no declaration has been read should be treated
2353 * by a non-validating parser as if declared CDATA.
2354 *
2355 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2356 */
2357
2358xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002359xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2360
2361xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002362xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2363 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002364 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002365 xmlChar *ret = NULL;
2366 SHRINK;
2367 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002368 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002369 if (*in != '"' && *in != '\'') {
2370 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2372 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2373 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002374 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002375 return(NULL);
2376 }
2377 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2378 limit = *in;
2379 ++in;
2380
2381 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2382 *in != '&' && *in != '<'
2383 ) {
2384 ++in;
2385 }
2386 if (*in != limit) {
2387 return xmlParseAttValueComplex(ctxt);
2388 }
2389 ++in;
2390 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2391 CUR_PTR = in;
2392 return ret;
2393}
2394
Daniel Veillard01c13b52002-12-10 15:19:08 +00002395/**
2396 * xmlParseAttValueComplex:
2397 * @ctxt: an XML parser context
2398 *
2399 * parse a value for an attribute, this is the fallback function
2400 * of xmlParseAttValue() when the attribute parsing requires handling
2401 * of non-ASCII characters.
2402 *
2403 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2404 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002405xmlChar *
2406xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2407 xmlChar limit = 0;
2408 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002409 int len = 0;
2410 int buf_size = 0;
2411 int c, l;
2412 xmlChar *current = NULL;
2413 xmlEntityPtr ent;
2414
2415
2416 SHRINK;
2417 if (NXT(0) == '"') {
2418 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2419 limit = '"';
2420 NEXT;
2421 } else if (NXT(0) == '\'') {
2422 limit = '\'';
2423 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2424 NEXT;
2425 } else {
2426 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2428 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2429 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002430 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002431 return(NULL);
2432 }
2433
2434 /*
2435 * allocate a translation buffer.
2436 */
2437 buf_size = XML_PARSER_BUFFER_SIZE;
2438 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2439 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002440 xmlGenericError(xmlGenericErrorContext,
2441 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002442 return(NULL);
2443 }
2444
2445 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002446 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002447 */
2448 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002449 while ((NXT(0) != limit) && /* checked */
2450 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002451 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002452 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002453 if (NXT(1) == '#') {
2454 int val = xmlParseCharRef(ctxt);
2455 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002456 if (ctxt->replaceEntities) {
2457 if (len > buf_size - 10) {
2458 growBuffer(buf);
2459 }
2460 buf[len++] = '&';
2461 } else {
2462 /*
2463 * The reparsing will be done in xmlStringGetNodeList()
2464 * called by the attribute() function in SAX.c
2465 */
2466 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002467
Daniel Veillard319a7422001-09-11 09:27:09 +00002468 if (len > buf_size - 10) {
2469 growBuffer(buf);
2470 }
2471 current = &buffer[0];
2472 while (*current != 0) { /* non input consuming */
2473 buf[len++] = *current++;
2474 }
Owen Taylor3473f882001-02-23 17:55:21 +00002475 }
2476 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002477 if (len > buf_size - 10) {
2478 growBuffer(buf);
2479 }
Owen Taylor3473f882001-02-23 17:55:21 +00002480 len += xmlCopyChar(0, &buf[len], val);
2481 }
2482 } else {
2483 ent = xmlParseEntityRef(ctxt);
2484 if ((ent != NULL) &&
2485 (ctxt->replaceEntities != 0)) {
2486 xmlChar *rep;
2487
2488 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2489 rep = xmlStringDecodeEntities(ctxt, ent->content,
2490 XML_SUBSTITUTE_REF, 0, 0, 0);
2491 if (rep != NULL) {
2492 current = rep;
2493 while (*current != 0) { /* non input consuming */
2494 buf[len++] = *current++;
2495 if (len > buf_size - 10) {
2496 growBuffer(buf);
2497 }
2498 }
2499 xmlFree(rep);
2500 }
2501 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002502 if (len > buf_size - 10) {
2503 growBuffer(buf);
2504 }
Owen Taylor3473f882001-02-23 17:55:21 +00002505 if (ent->content != NULL)
2506 buf[len++] = ent->content[0];
2507 }
2508 } else if (ent != NULL) {
2509 int i = xmlStrlen(ent->name);
2510 const xmlChar *cur = ent->name;
2511
2512 /*
2513 * This may look absurd but is needed to detect
2514 * entities problems
2515 */
2516 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2517 (ent->content != NULL)) {
2518 xmlChar *rep;
2519 rep = xmlStringDecodeEntities(ctxt, ent->content,
2520 XML_SUBSTITUTE_REF, 0, 0, 0);
2521 if (rep != NULL)
2522 xmlFree(rep);
2523 }
2524
2525 /*
2526 * Just output the reference
2527 */
2528 buf[len++] = '&';
2529 if (len > buf_size - i - 10) {
2530 growBuffer(buf);
2531 }
2532 for (;i > 0;i--)
2533 buf[len++] = *cur++;
2534 buf[len++] = ';';
2535 }
2536 }
2537 } else {
2538 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2539 COPY_BUF(l,buf,len,0x20);
2540 if (len > buf_size - 10) {
2541 growBuffer(buf);
2542 }
2543 } else {
2544 COPY_BUF(l,buf,len,c);
2545 if (len > buf_size - 10) {
2546 growBuffer(buf);
2547 }
2548 }
2549 NEXTL(l);
2550 }
2551 GROW;
2552 c = CUR_CHAR(l);
2553 }
2554 buf[len++] = 0;
2555 if (RAW == '<') {
2556 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2558 ctxt->sax->error(ctxt->userData,
2559 "Unescaped '<' not allowed in attributes values\n");
2560 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002561 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002562 } else if (RAW != limit) {
2563 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2565 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2566 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002567 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002568 } else
2569 NEXT;
2570 return(buf);
2571}
2572
2573/**
2574 * xmlParseSystemLiteral:
2575 * @ctxt: an XML parser context
2576 *
2577 * parse an XML Literal
2578 *
2579 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2580 *
2581 * Returns the SystemLiteral parsed or NULL
2582 */
2583
2584xmlChar *
2585xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2586 xmlChar *buf = NULL;
2587 int len = 0;
2588 int size = XML_PARSER_BUFFER_SIZE;
2589 int cur, l;
2590 xmlChar stop;
2591 int state = ctxt->instate;
2592 int count = 0;
2593
2594 SHRINK;
2595 if (RAW == '"') {
2596 NEXT;
2597 stop = '"';
2598 } else if (RAW == '\'') {
2599 NEXT;
2600 stop = '\'';
2601 } else {
2602 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2604 ctxt->sax->error(ctxt->userData,
2605 "SystemLiteral \" or ' expected\n");
2606 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002607 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002608 return(NULL);
2609 }
2610
2611 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2612 if (buf == NULL) {
2613 xmlGenericError(xmlGenericErrorContext,
2614 "malloc of %d byte failed\n", size);
2615 return(NULL);
2616 }
2617 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2618 cur = CUR_CHAR(l);
2619 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2620 if (len + 5 >= size) {
2621 size *= 2;
2622 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2623 if (buf == NULL) {
2624 xmlGenericError(xmlGenericErrorContext,
2625 "realloc of %d byte failed\n", size);
2626 ctxt->instate = (xmlParserInputState) state;
2627 return(NULL);
2628 }
2629 }
2630 count++;
2631 if (count > 50) {
2632 GROW;
2633 count = 0;
2634 }
2635 COPY_BUF(l,buf,len,cur);
2636 NEXTL(l);
2637 cur = CUR_CHAR(l);
2638 if (cur == 0) {
2639 GROW;
2640 SHRINK;
2641 cur = CUR_CHAR(l);
2642 }
2643 }
2644 buf[len] = 0;
2645 ctxt->instate = (xmlParserInputState) state;
2646 if (!IS_CHAR(cur)) {
2647 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2649 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2650 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002651 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002652 } else {
2653 NEXT;
2654 }
2655 return(buf);
2656}
2657
2658/**
2659 * xmlParsePubidLiteral:
2660 * @ctxt: an XML parser context
2661 *
2662 * parse an XML public literal
2663 *
2664 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2665 *
2666 * Returns the PubidLiteral parsed or NULL.
2667 */
2668
2669xmlChar *
2670xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2671 xmlChar *buf = NULL;
2672 int len = 0;
2673 int size = XML_PARSER_BUFFER_SIZE;
2674 xmlChar cur;
2675 xmlChar stop;
2676 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002677 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002678
2679 SHRINK;
2680 if (RAW == '"') {
2681 NEXT;
2682 stop = '"';
2683 } else if (RAW == '\'') {
2684 NEXT;
2685 stop = '\'';
2686 } else {
2687 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2689 ctxt->sax->error(ctxt->userData,
2690 "SystemLiteral \" or ' expected\n");
2691 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002692 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002693 return(NULL);
2694 }
2695 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2696 if (buf == NULL) {
2697 xmlGenericError(xmlGenericErrorContext,
2698 "malloc of %d byte failed\n", size);
2699 return(NULL);
2700 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002701 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002702 cur = CUR;
2703 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2704 if (len + 1 >= size) {
2705 size *= 2;
2706 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2707 if (buf == NULL) {
2708 xmlGenericError(xmlGenericErrorContext,
2709 "realloc of %d byte failed\n", size);
2710 return(NULL);
2711 }
2712 }
2713 buf[len++] = cur;
2714 count++;
2715 if (count > 50) {
2716 GROW;
2717 count = 0;
2718 }
2719 NEXT;
2720 cur = CUR;
2721 if (cur == 0) {
2722 GROW;
2723 SHRINK;
2724 cur = CUR;
2725 }
2726 }
2727 buf[len] = 0;
2728 if (cur != stop) {
2729 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2731 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2732 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002733 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002734 } else {
2735 NEXT;
2736 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002737 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002738 return(buf);
2739}
2740
Daniel Veillard48b2f892001-02-25 16:11:03 +00002741void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002742/**
2743 * xmlParseCharData:
2744 * @ctxt: an XML parser context
2745 * @cdata: int indicating whether we are within a CDATA section
2746 *
2747 * parse a CharData section.
2748 * if we are within a CDATA section ']]>' marks an end of section.
2749 *
2750 * The right angle bracket (>) may be represented using the string "&gt;",
2751 * and must, for compatibility, be escaped using "&gt;" or a character
2752 * reference when it appears in the string "]]>" in content, when that
2753 * string is not marking the end of a CDATA section.
2754 *
2755 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2756 */
2757
2758void
2759xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002760 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002761 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002762 int line = ctxt->input->line;
2763 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002764
2765 SHRINK;
2766 GROW;
2767 /*
2768 * Accelerated common case where input don't need to be
2769 * modified before passing it to the handler.
2770 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002771 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002772 in = ctxt->input->cur;
2773 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002774get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002775 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2776 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002777 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002778 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002779 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002780 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002781 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002782 ctxt->input->line++;
2783 in++;
2784 }
2785 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002786 }
2787 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002788 if ((in[1] == ']') && (in[2] == '>')) {
2789 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2791 ctxt->sax->error(ctxt->userData,
2792 "Sequence ']]>' not allowed in content\n");
2793 ctxt->input->cur = in;
2794 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002795 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002796 return;
2797 }
2798 in++;
2799 goto get_more;
2800 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002801 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002802 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002803 if (IS_BLANK(*ctxt->input->cur)) {
2804 const xmlChar *tmp = ctxt->input->cur;
2805 ctxt->input->cur = in;
2806 if (areBlanks(ctxt, tmp, nbchar)) {
2807 if (ctxt->sax->ignorableWhitespace != NULL)
2808 ctxt->sax->ignorableWhitespace(ctxt->userData,
2809 tmp, nbchar);
2810 } else {
2811 if (ctxt->sax->characters != NULL)
2812 ctxt->sax->characters(ctxt->userData,
2813 tmp, nbchar);
2814 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002815 line = ctxt->input->line;
2816 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002817 } else {
2818 if (ctxt->sax->characters != NULL)
2819 ctxt->sax->characters(ctxt->userData,
2820 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002821 line = ctxt->input->line;
2822 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002823 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002824 }
2825 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002826 if (*in == 0xD) {
2827 in++;
2828 if (*in == 0xA) {
2829 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002830 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002831 ctxt->input->line++;
2832 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002833 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002834 in--;
2835 }
2836 if (*in == '<') {
2837 return;
2838 }
2839 if (*in == '&') {
2840 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002841 }
2842 SHRINK;
2843 GROW;
2844 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002845 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002846 nbchar = 0;
2847 }
Daniel Veillard50582112001-03-26 22:52:16 +00002848 ctxt->input->line = line;
2849 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002850 xmlParseCharDataComplex(ctxt, cdata);
2851}
2852
Daniel Veillard01c13b52002-12-10 15:19:08 +00002853/**
2854 * xmlParseCharDataComplex:
2855 * @ctxt: an XML parser context
2856 * @cdata: int indicating whether we are within a CDATA section
2857 *
2858 * parse a CharData section.this is the fallback function
2859 * of xmlParseCharData() when the parsing requires handling
2860 * of non-ASCII characters.
2861 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002862void
2863xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002864 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2865 int nbchar = 0;
2866 int cur, l;
2867 int count = 0;
2868
2869 SHRINK;
2870 GROW;
2871 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002872 while ((cur != '<') && /* checked */
2873 (cur != '&') &&
2874 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if ((cur == ']') && (NXT(1) == ']') &&
2876 (NXT(2) == '>')) {
2877 if (cdata) break;
2878 else {
2879 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882 "Sequence ']]>' not allowed in content\n");
2883 /* Should this be relaxed ??? I see a "must here */
2884 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002885 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002886 }
2887 }
2888 COPY_BUF(l,buf,nbchar,cur);
2889 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2890 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002891 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002892 */
2893 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2894 if (areBlanks(ctxt, buf, nbchar)) {
2895 if (ctxt->sax->ignorableWhitespace != NULL)
2896 ctxt->sax->ignorableWhitespace(ctxt->userData,
2897 buf, nbchar);
2898 } else {
2899 if (ctxt->sax->characters != NULL)
2900 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2901 }
2902 }
2903 nbchar = 0;
2904 }
2905 count++;
2906 if (count > 50) {
2907 GROW;
2908 count = 0;
2909 }
2910 NEXTL(l);
2911 cur = CUR_CHAR(l);
2912 }
2913 if (nbchar != 0) {
2914 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002915 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002916 */
2917 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2918 if (areBlanks(ctxt, buf, nbchar)) {
2919 if (ctxt->sax->ignorableWhitespace != NULL)
2920 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2921 } else {
2922 if (ctxt->sax->characters != NULL)
2923 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2924 }
2925 }
2926 }
2927}
2928
2929/**
2930 * xmlParseExternalID:
2931 * @ctxt: an XML parser context
2932 * @publicID: a xmlChar** receiving PubidLiteral
2933 * @strict: indicate whether we should restrict parsing to only
2934 * production [75], see NOTE below
2935 *
2936 * Parse an External ID or a Public ID
2937 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002938 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002939 * 'PUBLIC' S PubidLiteral S SystemLiteral
2940 *
2941 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2942 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2943 *
2944 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2945 *
2946 * Returns the function returns SystemLiteral and in the second
2947 * case publicID receives PubidLiteral, is strict is off
2948 * it is possible to return NULL and have publicID set.
2949 */
2950
2951xmlChar *
2952xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2953 xmlChar *URI = NULL;
2954
2955 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002956
2957 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002958 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2959 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2960 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2961 SKIP(6);
2962 if (!IS_BLANK(CUR)) {
2963 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2965 ctxt->sax->error(ctxt->userData,
2966 "Space required after 'SYSTEM'\n");
2967 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002968 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002969 }
2970 SKIP_BLANKS;
2971 URI = xmlParseSystemLiteral(ctxt);
2972 if (URI == NULL) {
2973 ctxt->errNo = XML_ERR_URI_REQUIRED;
2974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2975 ctxt->sax->error(ctxt->userData,
2976 "xmlParseExternalID: SYSTEM, no URI\n");
2977 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002978 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002979 }
2980 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2981 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2982 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2983 SKIP(6);
2984 if (!IS_BLANK(CUR)) {
2985 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2987 ctxt->sax->error(ctxt->userData,
2988 "Space required after 'PUBLIC'\n");
2989 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002990 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002991 }
2992 SKIP_BLANKS;
2993 *publicID = xmlParsePubidLiteral(ctxt);
2994 if (*publicID == NULL) {
2995 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2997 ctxt->sax->error(ctxt->userData,
2998 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2999 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003000 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003001 }
3002 if (strict) {
3003 /*
3004 * We don't handle [83] so "S SystemLiteral" is required.
3005 */
3006 if (!IS_BLANK(CUR)) {
3007 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3009 ctxt->sax->error(ctxt->userData,
3010 "Space required after the Public Identifier\n");
3011 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003012 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003013 }
3014 } else {
3015 /*
3016 * We handle [83] so we return immediately, if
3017 * "S SystemLiteral" is not detected. From a purely parsing
3018 * point of view that's a nice mess.
3019 */
3020 const xmlChar *ptr;
3021 GROW;
3022
3023 ptr = CUR_PTR;
3024 if (!IS_BLANK(*ptr)) return(NULL);
3025
3026 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3027 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3028 }
3029 SKIP_BLANKS;
3030 URI = xmlParseSystemLiteral(ctxt);
3031 if (URI == NULL) {
3032 ctxt->errNo = XML_ERR_URI_REQUIRED;
3033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3034 ctxt->sax->error(ctxt->userData,
3035 "xmlParseExternalID: PUBLIC, no URI\n");
3036 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003037 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003038 }
3039 }
3040 return(URI);
3041}
3042
3043/**
3044 * xmlParseComment:
3045 * @ctxt: an XML parser context
3046 *
3047 * Skip an XML (SGML) comment <!-- .... -->
3048 * The spec says that "For compatibility, the string "--" (double-hyphen)
3049 * must not occur within comments. "
3050 *
3051 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3052 */
3053void
3054xmlParseComment(xmlParserCtxtPtr ctxt) {
3055 xmlChar *buf = NULL;
3056 int len;
3057 int size = XML_PARSER_BUFFER_SIZE;
3058 int q, ql;
3059 int r, rl;
3060 int cur, l;
3061 xmlParserInputState state;
3062 xmlParserInputPtr input = ctxt->input;
3063 int count = 0;
3064
3065 /*
3066 * Check that there is a comment right here.
3067 */
3068 if ((RAW != '<') || (NXT(1) != '!') ||
3069 (NXT(2) != '-') || (NXT(3) != '-')) return;
3070
3071 state = ctxt->instate;
3072 ctxt->instate = XML_PARSER_COMMENT;
3073 SHRINK;
3074 SKIP(4);
3075 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3076 if (buf == NULL) {
3077 xmlGenericError(xmlGenericErrorContext,
3078 "malloc of %d byte failed\n", size);
3079 ctxt->instate = state;
3080 return;
3081 }
3082 q = CUR_CHAR(ql);
3083 NEXTL(ql);
3084 r = CUR_CHAR(rl);
3085 NEXTL(rl);
3086 cur = CUR_CHAR(l);
3087 len = 0;
3088 while (IS_CHAR(cur) && /* checked */
3089 ((cur != '>') ||
3090 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003091 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003092 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3094 ctxt->sax->error(ctxt->userData,
3095 "Comment must not contain '--' (double-hyphen)`\n");
3096 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003097 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003098 }
3099 if (len + 5 >= size) {
3100 size *= 2;
3101 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3102 if (buf == NULL) {
3103 xmlGenericError(xmlGenericErrorContext,
3104 "realloc of %d byte failed\n", size);
3105 ctxt->instate = state;
3106 return;
3107 }
3108 }
3109 COPY_BUF(ql,buf,len,q);
3110 q = r;
3111 ql = rl;
3112 r = cur;
3113 rl = l;
3114
3115 count++;
3116 if (count > 50) {
3117 GROW;
3118 count = 0;
3119 }
3120 NEXTL(l);
3121 cur = CUR_CHAR(l);
3122 if (cur == 0) {
3123 SHRINK;
3124 GROW;
3125 cur = CUR_CHAR(l);
3126 }
3127 }
3128 buf[len] = 0;
3129 if (!IS_CHAR(cur)) {
3130 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3132 ctxt->sax->error(ctxt->userData,
3133 "Comment not terminated \n<!--%.50s\n", buf);
3134 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003135 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003136 xmlFree(buf);
3137 } else {
3138 if (input != ctxt->input) {
3139 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3141 ctxt->sax->error(ctxt->userData,
3142"Comment doesn't start and stop in the same entity\n");
3143 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003144 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003145 }
3146 NEXT;
3147 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3148 (!ctxt->disableSAX))
3149 ctxt->sax->comment(ctxt->userData, buf);
3150 xmlFree(buf);
3151 }
3152 ctxt->instate = state;
3153}
3154
3155/**
3156 * xmlParsePITarget:
3157 * @ctxt: an XML parser context
3158 *
3159 * parse the name of a PI
3160 *
3161 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3162 *
3163 * Returns the PITarget name or NULL
3164 */
3165
3166xmlChar *
3167xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3168 xmlChar *name;
3169
3170 name = xmlParseName(ctxt);
3171 if ((name != NULL) &&
3172 ((name[0] == 'x') || (name[0] == 'X')) &&
3173 ((name[1] == 'm') || (name[1] == 'M')) &&
3174 ((name[2] == 'l') || (name[2] == 'L'))) {
3175 int i;
3176 if ((name[0] == 'x') && (name[1] == 'm') &&
3177 (name[2] == 'l') && (name[3] == 0)) {
3178 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3179 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3180 ctxt->sax->error(ctxt->userData,
3181 "XML declaration allowed only at the start of the document\n");
3182 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003183 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003184 return(name);
3185 } else if (name[3] == 0) {
3186 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3188 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3189 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003190 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003191 return(name);
3192 }
3193 for (i = 0;;i++) {
3194 if (xmlW3CPIs[i] == NULL) break;
3195 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3196 return(name);
3197 }
3198 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3199 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3200 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003201 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003202 }
3203 }
3204 return(name);
3205}
3206
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003207#ifdef LIBXML_CATALOG_ENABLED
3208/**
3209 * xmlParseCatalogPI:
3210 * @ctxt: an XML parser context
3211 * @catalog: the PI value string
3212 *
3213 * parse an XML Catalog Processing Instruction.
3214 *
3215 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3216 *
3217 * Occurs only if allowed by the user and if happening in the Misc
3218 * part of the document before any doctype informations
3219 * This will add the given catalog to the parsing context in order
3220 * to be used if there is a resolution need further down in the document
3221 */
3222
3223static void
3224xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3225 xmlChar *URL = NULL;
3226 const xmlChar *tmp, *base;
3227 xmlChar marker;
3228
3229 tmp = catalog;
3230 while (IS_BLANK(*tmp)) tmp++;
3231 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3232 goto error;
3233 tmp += 7;
3234 while (IS_BLANK(*tmp)) tmp++;
3235 if (*tmp != '=') {
3236 return;
3237 }
3238 tmp++;
3239 while (IS_BLANK(*tmp)) tmp++;
3240 marker = *tmp;
3241 if ((marker != '\'') && (marker != '"'))
3242 goto error;
3243 tmp++;
3244 base = tmp;
3245 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3246 if (*tmp == 0)
3247 goto error;
3248 URL = xmlStrndup(base, tmp - base);
3249 tmp++;
3250 while (IS_BLANK(*tmp)) tmp++;
3251 if (*tmp != 0)
3252 goto error;
3253
3254 if (URL != NULL) {
3255 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3256 xmlFree(URL);
3257 }
3258 return;
3259
3260error:
3261 ctxt->errNo = XML_WAR_CATALOG_PI;
3262 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3263 ctxt->sax->warning(ctxt->userData,
3264 "Catalog PI syntax error: %s\n", catalog);
3265 if (URL != NULL)
3266 xmlFree(URL);
3267}
3268#endif
3269
Owen Taylor3473f882001-02-23 17:55:21 +00003270/**
3271 * xmlParsePI:
3272 * @ctxt: an XML parser context
3273 *
3274 * parse an XML Processing Instruction.
3275 *
3276 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3277 *
3278 * The processing is transfered to SAX once parsed.
3279 */
3280
3281void
3282xmlParsePI(xmlParserCtxtPtr ctxt) {
3283 xmlChar *buf = NULL;
3284 int len = 0;
3285 int size = XML_PARSER_BUFFER_SIZE;
3286 int cur, l;
3287 xmlChar *target;
3288 xmlParserInputState state;
3289 int count = 0;
3290
3291 if ((RAW == '<') && (NXT(1) == '?')) {
3292 xmlParserInputPtr input = ctxt->input;
3293 state = ctxt->instate;
3294 ctxt->instate = XML_PARSER_PI;
3295 /*
3296 * this is a Processing Instruction.
3297 */
3298 SKIP(2);
3299 SHRINK;
3300
3301 /*
3302 * Parse the target name and check for special support like
3303 * namespace.
3304 */
3305 target = xmlParsePITarget(ctxt);
3306 if (target != NULL) {
3307 if ((RAW == '?') && (NXT(1) == '>')) {
3308 if (input != ctxt->input) {
3309 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3311 ctxt->sax->error(ctxt->userData,
3312 "PI declaration doesn't start and stop in the same entity\n");
3313 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003314 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003315 }
3316 SKIP(2);
3317
3318 /*
3319 * SAX: PI detected.
3320 */
3321 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3322 (ctxt->sax->processingInstruction != NULL))
3323 ctxt->sax->processingInstruction(ctxt->userData,
3324 target, NULL);
3325 ctxt->instate = state;
3326 xmlFree(target);
3327 return;
3328 }
3329 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3330 if (buf == NULL) {
3331 xmlGenericError(xmlGenericErrorContext,
3332 "malloc of %d byte failed\n", size);
3333 ctxt->instate = state;
3334 return;
3335 }
3336 cur = CUR;
3337 if (!IS_BLANK(cur)) {
3338 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3340 ctxt->sax->error(ctxt->userData,
3341 "xmlParsePI: PI %s space expected\n", target);
3342 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003343 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003344 }
3345 SKIP_BLANKS;
3346 cur = CUR_CHAR(l);
3347 while (IS_CHAR(cur) && /* checked */
3348 ((cur != '?') || (NXT(1) != '>'))) {
3349 if (len + 5 >= size) {
3350 size *= 2;
3351 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3352 if (buf == NULL) {
3353 xmlGenericError(xmlGenericErrorContext,
3354 "realloc of %d byte failed\n", size);
3355 ctxt->instate = state;
3356 return;
3357 }
3358 }
3359 count++;
3360 if (count > 50) {
3361 GROW;
3362 count = 0;
3363 }
3364 COPY_BUF(l,buf,len,cur);
3365 NEXTL(l);
3366 cur = CUR_CHAR(l);
3367 if (cur == 0) {
3368 SHRINK;
3369 GROW;
3370 cur = CUR_CHAR(l);
3371 }
3372 }
3373 buf[len] = 0;
3374 if (cur != '?') {
3375 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3377 ctxt->sax->error(ctxt->userData,
3378 "xmlParsePI: PI %s never end ...\n", target);
3379 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003380 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003381 } else {
3382 if (input != ctxt->input) {
3383 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3385 ctxt->sax->error(ctxt->userData,
3386 "PI declaration doesn't start and stop in the same entity\n");
3387 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003388 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003389 }
3390 SKIP(2);
3391
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003392#ifdef LIBXML_CATALOG_ENABLED
3393 if (((state == XML_PARSER_MISC) ||
3394 (state == XML_PARSER_START)) &&
3395 (xmlStrEqual(target, XML_CATALOG_PI))) {
3396 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3397 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3398 (allow == XML_CATA_ALLOW_ALL))
3399 xmlParseCatalogPI(ctxt, buf);
3400 }
3401#endif
3402
3403
Owen Taylor3473f882001-02-23 17:55:21 +00003404 /*
3405 * SAX: PI detected.
3406 */
3407 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3408 (ctxt->sax->processingInstruction != NULL))
3409 ctxt->sax->processingInstruction(ctxt->userData,
3410 target, buf);
3411 }
3412 xmlFree(buf);
3413 xmlFree(target);
3414 } else {
3415 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3417 ctxt->sax->error(ctxt->userData,
3418 "xmlParsePI : no target name\n");
3419 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003420 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003421 }
3422 ctxt->instate = state;
3423 }
3424}
3425
3426/**
3427 * xmlParseNotationDecl:
3428 * @ctxt: an XML parser context
3429 *
3430 * parse a notation declaration
3431 *
3432 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3433 *
3434 * Hence there is actually 3 choices:
3435 * 'PUBLIC' S PubidLiteral
3436 * 'PUBLIC' S PubidLiteral S SystemLiteral
3437 * and 'SYSTEM' S SystemLiteral
3438 *
3439 * See the NOTE on xmlParseExternalID().
3440 */
3441
3442void
3443xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3444 xmlChar *name;
3445 xmlChar *Pubid;
3446 xmlChar *Systemid;
3447
3448 if ((RAW == '<') && (NXT(1) == '!') &&
3449 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3450 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3451 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3452 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3453 xmlParserInputPtr input = ctxt->input;
3454 SHRINK;
3455 SKIP(10);
3456 if (!IS_BLANK(CUR)) {
3457 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3459 ctxt->sax->error(ctxt->userData,
3460 "Space required after '<!NOTATION'\n");
3461 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003462 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003463 return;
3464 }
3465 SKIP_BLANKS;
3466
Daniel Veillard76d66f42001-05-16 21:05:17 +00003467 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003468 if (name == NULL) {
3469 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3471 ctxt->sax->error(ctxt->userData,
3472 "NOTATION: Name expected here\n");
3473 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003474 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003475 return;
3476 }
3477 if (!IS_BLANK(CUR)) {
3478 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3480 ctxt->sax->error(ctxt->userData,
3481 "Space required after the NOTATION name'\n");
3482 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003483 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003484 return;
3485 }
3486 SKIP_BLANKS;
3487
3488 /*
3489 * Parse the IDs.
3490 */
3491 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3492 SKIP_BLANKS;
3493
3494 if (RAW == '>') {
3495 if (input != ctxt->input) {
3496 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3498 ctxt->sax->error(ctxt->userData,
3499"Notation declaration doesn't start and stop in the same entity\n");
3500 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003501 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003502 }
3503 NEXT;
3504 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3505 (ctxt->sax->notationDecl != NULL))
3506 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3507 } else {
3508 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3510 ctxt->sax->error(ctxt->userData,
3511 "'>' required to close NOTATION declaration\n");
3512 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003513 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003514 }
3515 xmlFree(name);
3516 if (Systemid != NULL) xmlFree(Systemid);
3517 if (Pubid != NULL) xmlFree(Pubid);
3518 }
3519}
3520
3521/**
3522 * xmlParseEntityDecl:
3523 * @ctxt: an XML parser context
3524 *
3525 * parse <!ENTITY declarations
3526 *
3527 * [70] EntityDecl ::= GEDecl | PEDecl
3528 *
3529 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3530 *
3531 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3532 *
3533 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3534 *
3535 * [74] PEDef ::= EntityValue | ExternalID
3536 *
3537 * [76] NDataDecl ::= S 'NDATA' S Name
3538 *
3539 * [ VC: Notation Declared ]
3540 * The Name must match the declared name of a notation.
3541 */
3542
3543void
3544xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3545 xmlChar *name = NULL;
3546 xmlChar *value = NULL;
3547 xmlChar *URI = NULL, *literal = NULL;
3548 xmlChar *ndata = NULL;
3549 int isParameter = 0;
3550 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003551 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003552
3553 GROW;
3554 if ((RAW == '<') && (NXT(1) == '!') &&
3555 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3556 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3557 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3558 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003559 SHRINK;
3560 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003561 skipped = SKIP_BLANKS;
3562 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003563 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3565 ctxt->sax->error(ctxt->userData,
3566 "Space required after '<!ENTITY'\n");
3567 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003568 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003569 }
Owen Taylor3473f882001-02-23 17:55:21 +00003570
3571 if (RAW == '%') {
3572 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003573 skipped = SKIP_BLANKS;
3574 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003575 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3577 ctxt->sax->error(ctxt->userData,
3578 "Space required after '%'\n");
3579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003581 }
Owen Taylor3473f882001-02-23 17:55:21 +00003582 isParameter = 1;
3583 }
3584
Daniel Veillard76d66f42001-05-16 21:05:17 +00003585 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003586 if (name == NULL) {
3587 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3589 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3590 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003591 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003592 return;
3593 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003594 skipped = SKIP_BLANKS;
3595 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003596 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3598 ctxt->sax->error(ctxt->userData,
3599 "Space required after the entity name\n");
3600 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003601 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003602 }
Owen Taylor3473f882001-02-23 17:55:21 +00003603
Daniel Veillardf5582f12002-06-11 10:08:16 +00003604 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003605 /*
3606 * handle the various case of definitions...
3607 */
3608 if (isParameter) {
3609 if ((RAW == '"') || (RAW == '\'')) {
3610 value = xmlParseEntityValue(ctxt, &orig);
3611 if (value) {
3612 if ((ctxt->sax != NULL) &&
3613 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3614 ctxt->sax->entityDecl(ctxt->userData, name,
3615 XML_INTERNAL_PARAMETER_ENTITY,
3616 NULL, NULL, value);
3617 }
3618 } else {
3619 URI = xmlParseExternalID(ctxt, &literal, 1);
3620 if ((URI == NULL) && (literal == NULL)) {
3621 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3623 ctxt->sax->error(ctxt->userData,
3624 "Entity value required\n");
3625 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003626 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003627 }
3628 if (URI) {
3629 xmlURIPtr uri;
3630
3631 uri = xmlParseURI((const char *) URI);
3632 if (uri == NULL) {
3633 ctxt->errNo = XML_ERR_INVALID_URI;
3634 if ((ctxt->sax != NULL) &&
3635 (!ctxt->disableSAX) &&
3636 (ctxt->sax->error != NULL))
3637 ctxt->sax->error(ctxt->userData,
3638 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003639 /*
3640 * This really ought to be a well formedness error
3641 * but the XML Core WG decided otherwise c.f. issue
3642 * E26 of the XML erratas.
3643 */
Owen Taylor3473f882001-02-23 17:55:21 +00003644 } else {
3645 if (uri->fragment != NULL) {
3646 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3647 if ((ctxt->sax != NULL) &&
3648 (!ctxt->disableSAX) &&
3649 (ctxt->sax->error != NULL))
3650 ctxt->sax->error(ctxt->userData,
3651 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003652 /*
3653 * Okay this is foolish to block those but not
3654 * invalid URIs.
3655 */
Owen Taylor3473f882001-02-23 17:55:21 +00003656 ctxt->wellFormed = 0;
3657 } else {
3658 if ((ctxt->sax != NULL) &&
3659 (!ctxt->disableSAX) &&
3660 (ctxt->sax->entityDecl != NULL))
3661 ctxt->sax->entityDecl(ctxt->userData, name,
3662 XML_EXTERNAL_PARAMETER_ENTITY,
3663 literal, URI, NULL);
3664 }
3665 xmlFreeURI(uri);
3666 }
3667 }
3668 }
3669 } else {
3670 if ((RAW == '"') || (RAW == '\'')) {
3671 value = xmlParseEntityValue(ctxt, &orig);
3672 if ((ctxt->sax != NULL) &&
3673 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3674 ctxt->sax->entityDecl(ctxt->userData, name,
3675 XML_INTERNAL_GENERAL_ENTITY,
3676 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003677 /*
3678 * For expat compatibility in SAX mode.
3679 */
3680 if ((ctxt->myDoc == NULL) ||
3681 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3682 if (ctxt->myDoc == NULL) {
3683 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3684 }
3685 if (ctxt->myDoc->intSubset == NULL)
3686 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3687 BAD_CAST "fake", NULL, NULL);
3688
3689 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3690 NULL, NULL, value);
3691 }
Owen Taylor3473f882001-02-23 17:55:21 +00003692 } else {
3693 URI = xmlParseExternalID(ctxt, &literal, 1);
3694 if ((URI == NULL) && (literal == NULL)) {
3695 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3697 ctxt->sax->error(ctxt->userData,
3698 "Entity value required\n");
3699 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003700 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003701 }
3702 if (URI) {
3703 xmlURIPtr uri;
3704
3705 uri = xmlParseURI((const char *)URI);
3706 if (uri == NULL) {
3707 ctxt->errNo = XML_ERR_INVALID_URI;
3708 if ((ctxt->sax != NULL) &&
3709 (!ctxt->disableSAX) &&
3710 (ctxt->sax->error != NULL))
3711 ctxt->sax->error(ctxt->userData,
3712 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003713 /*
3714 * This really ought to be a well formedness error
3715 * but the XML Core WG decided otherwise c.f. issue
3716 * E26 of the XML erratas.
3717 */
Owen Taylor3473f882001-02-23 17:55:21 +00003718 } else {
3719 if (uri->fragment != NULL) {
3720 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3721 if ((ctxt->sax != NULL) &&
3722 (!ctxt->disableSAX) &&
3723 (ctxt->sax->error != NULL))
3724 ctxt->sax->error(ctxt->userData,
3725 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003726 /*
3727 * Okay this is foolish to block those but not
3728 * invalid URIs.
3729 */
Owen Taylor3473f882001-02-23 17:55:21 +00003730 ctxt->wellFormed = 0;
3731 }
3732 xmlFreeURI(uri);
3733 }
3734 }
3735 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3736 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3738 ctxt->sax->error(ctxt->userData,
3739 "Space required before 'NDATA'\n");
3740 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003741 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003742 }
3743 SKIP_BLANKS;
3744 if ((RAW == 'N') && (NXT(1) == 'D') &&
3745 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3746 (NXT(4) == 'A')) {
3747 SKIP(5);
3748 if (!IS_BLANK(CUR)) {
3749 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3751 ctxt->sax->error(ctxt->userData,
3752 "Space required after 'NDATA'\n");
3753 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003754 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003755 }
3756 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003757 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003758 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3759 (ctxt->sax->unparsedEntityDecl != NULL))
3760 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3761 literal, URI, ndata);
3762 } else {
3763 if ((ctxt->sax != NULL) &&
3764 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3765 ctxt->sax->entityDecl(ctxt->userData, name,
3766 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3767 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003768 /*
3769 * For expat compatibility in SAX mode.
3770 * assuming the entity repalcement was asked for
3771 */
3772 if ((ctxt->replaceEntities != 0) &&
3773 ((ctxt->myDoc == NULL) ||
3774 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3775 if (ctxt->myDoc == NULL) {
3776 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3777 }
3778
3779 if (ctxt->myDoc->intSubset == NULL)
3780 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3781 BAD_CAST "fake", NULL, NULL);
3782 entityDecl(ctxt, name,
3783 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3784 literal, URI, NULL);
3785 }
Owen Taylor3473f882001-02-23 17:55:21 +00003786 }
3787 }
3788 }
3789 SKIP_BLANKS;
3790 if (RAW != '>') {
3791 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3793 ctxt->sax->error(ctxt->userData,
3794 "xmlParseEntityDecl: entity %s not terminated\n", name);
3795 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003796 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003797 } else {
3798 if (input != ctxt->input) {
3799 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3801 ctxt->sax->error(ctxt->userData,
3802"Entity declaration doesn't start and stop in the same entity\n");
3803 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003804 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003805 }
3806 NEXT;
3807 }
3808 if (orig != NULL) {
3809 /*
3810 * Ugly mechanism to save the raw entity value.
3811 */
3812 xmlEntityPtr cur = NULL;
3813
3814 if (isParameter) {
3815 if ((ctxt->sax != NULL) &&
3816 (ctxt->sax->getParameterEntity != NULL))
3817 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3818 } else {
3819 if ((ctxt->sax != NULL) &&
3820 (ctxt->sax->getEntity != NULL))
3821 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003822 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3823 cur = getEntity(ctxt, name);
3824 }
Owen Taylor3473f882001-02-23 17:55:21 +00003825 }
3826 if (cur != NULL) {
3827 if (cur->orig != NULL)
3828 xmlFree(orig);
3829 else
3830 cur->orig = orig;
3831 } else
3832 xmlFree(orig);
3833 }
3834 if (name != NULL) xmlFree(name);
3835 if (value != NULL) xmlFree(value);
3836 if (URI != NULL) xmlFree(URI);
3837 if (literal != NULL) xmlFree(literal);
3838 if (ndata != NULL) xmlFree(ndata);
3839 }
3840}
3841
3842/**
3843 * xmlParseDefaultDecl:
3844 * @ctxt: an XML parser context
3845 * @value: Receive a possible fixed default value for the attribute
3846 *
3847 * Parse an attribute default declaration
3848 *
3849 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3850 *
3851 * [ VC: Required Attribute ]
3852 * if the default declaration is the keyword #REQUIRED, then the
3853 * attribute must be specified for all elements of the type in the
3854 * attribute-list declaration.
3855 *
3856 * [ VC: Attribute Default Legal ]
3857 * The declared default value must meet the lexical constraints of
3858 * the declared attribute type c.f. xmlValidateAttributeDecl()
3859 *
3860 * [ VC: Fixed Attribute Default ]
3861 * if an attribute has a default value declared with the #FIXED
3862 * keyword, instances of that attribute must match the default value.
3863 *
3864 * [ WFC: No < in Attribute Values ]
3865 * handled in xmlParseAttValue()
3866 *
3867 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3868 * or XML_ATTRIBUTE_FIXED.
3869 */
3870
3871int
3872xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3873 int val;
3874 xmlChar *ret;
3875
3876 *value = NULL;
3877 if ((RAW == '#') && (NXT(1) == 'R') &&
3878 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3879 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3880 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3881 (NXT(8) == 'D')) {
3882 SKIP(9);
3883 return(XML_ATTRIBUTE_REQUIRED);
3884 }
3885 if ((RAW == '#') && (NXT(1) == 'I') &&
3886 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3887 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3888 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3889 SKIP(8);
3890 return(XML_ATTRIBUTE_IMPLIED);
3891 }
3892 val = XML_ATTRIBUTE_NONE;
3893 if ((RAW == '#') && (NXT(1) == 'F') &&
3894 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3895 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3896 SKIP(6);
3897 val = XML_ATTRIBUTE_FIXED;
3898 if (!IS_BLANK(CUR)) {
3899 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3901 ctxt->sax->error(ctxt->userData,
3902 "Space required after '#FIXED'\n");
3903 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003904 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003905 }
3906 SKIP_BLANKS;
3907 }
3908 ret = xmlParseAttValue(ctxt);
3909 ctxt->instate = XML_PARSER_DTD;
3910 if (ret == NULL) {
3911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3912 ctxt->sax->error(ctxt->userData,
3913 "Attribute default value declaration error\n");
3914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003916 } else
3917 *value = ret;
3918 return(val);
3919}
3920
3921/**
3922 * xmlParseNotationType:
3923 * @ctxt: an XML parser context
3924 *
3925 * parse an Notation attribute type.
3926 *
3927 * Note: the leading 'NOTATION' S part has already being parsed...
3928 *
3929 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3930 *
3931 * [ VC: Notation Attributes ]
3932 * Values of this type must match one of the notation names included
3933 * in the declaration; all notation names in the declaration must be declared.
3934 *
3935 * Returns: the notation attribute tree built while parsing
3936 */
3937
3938xmlEnumerationPtr
3939xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3940 xmlChar *name;
3941 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3942
3943 if (RAW != '(') {
3944 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3946 ctxt->sax->error(ctxt->userData,
3947 "'(' required to start 'NOTATION'\n");
3948 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003949 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003950 return(NULL);
3951 }
3952 SHRINK;
3953 do {
3954 NEXT;
3955 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003956 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003957 if (name == NULL) {
3958 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3960 ctxt->sax->error(ctxt->userData,
3961 "Name expected in NOTATION declaration\n");
3962 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003963 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003964 return(ret);
3965 }
3966 cur = xmlCreateEnumeration(name);
3967 xmlFree(name);
3968 if (cur == NULL) return(ret);
3969 if (last == NULL) ret = last = cur;
3970 else {
3971 last->next = cur;
3972 last = cur;
3973 }
3974 SKIP_BLANKS;
3975 } while (RAW == '|');
3976 if (RAW != ')') {
3977 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3979 ctxt->sax->error(ctxt->userData,
3980 "')' required to finish NOTATION declaration\n");
3981 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003982 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003983 if ((last != NULL) && (last != ret))
3984 xmlFreeEnumeration(last);
3985 return(ret);
3986 }
3987 NEXT;
3988 return(ret);
3989}
3990
3991/**
3992 * xmlParseEnumerationType:
3993 * @ctxt: an XML parser context
3994 *
3995 * parse an Enumeration attribute type.
3996 *
3997 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3998 *
3999 * [ VC: Enumeration ]
4000 * Values of this type must match one of the Nmtoken tokens in
4001 * the declaration
4002 *
4003 * Returns: the enumeration attribute tree built while parsing
4004 */
4005
4006xmlEnumerationPtr
4007xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4008 xmlChar *name;
4009 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4010
4011 if (RAW != '(') {
4012 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4014 ctxt->sax->error(ctxt->userData,
4015 "'(' required to start ATTLIST enumeration\n");
4016 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004017 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004018 return(NULL);
4019 }
4020 SHRINK;
4021 do {
4022 NEXT;
4023 SKIP_BLANKS;
4024 name = xmlParseNmtoken(ctxt);
4025 if (name == NULL) {
4026 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4028 ctxt->sax->error(ctxt->userData,
4029 "NmToken expected in ATTLIST enumeration\n");
4030 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004031 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004032 return(ret);
4033 }
4034 cur = xmlCreateEnumeration(name);
4035 xmlFree(name);
4036 if (cur == NULL) return(ret);
4037 if (last == NULL) ret = last = cur;
4038 else {
4039 last->next = cur;
4040 last = cur;
4041 }
4042 SKIP_BLANKS;
4043 } while (RAW == '|');
4044 if (RAW != ')') {
4045 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4047 ctxt->sax->error(ctxt->userData,
4048 "')' required to finish ATTLIST enumeration\n");
4049 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004050 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004051 return(ret);
4052 }
4053 NEXT;
4054 return(ret);
4055}
4056
4057/**
4058 * xmlParseEnumeratedType:
4059 * @ctxt: an XML parser context
4060 * @tree: the enumeration tree built while parsing
4061 *
4062 * parse an Enumerated attribute type.
4063 *
4064 * [57] EnumeratedType ::= NotationType | Enumeration
4065 *
4066 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4067 *
4068 *
4069 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4070 */
4071
4072int
4073xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4074 if ((RAW == 'N') && (NXT(1) == 'O') &&
4075 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4076 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4077 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4078 SKIP(8);
4079 if (!IS_BLANK(CUR)) {
4080 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4082 ctxt->sax->error(ctxt->userData,
4083 "Space required after 'NOTATION'\n");
4084 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004085 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004086 return(0);
4087 }
4088 SKIP_BLANKS;
4089 *tree = xmlParseNotationType(ctxt);
4090 if (*tree == NULL) return(0);
4091 return(XML_ATTRIBUTE_NOTATION);
4092 }
4093 *tree = xmlParseEnumerationType(ctxt);
4094 if (*tree == NULL) return(0);
4095 return(XML_ATTRIBUTE_ENUMERATION);
4096}
4097
4098/**
4099 * xmlParseAttributeType:
4100 * @ctxt: an XML parser context
4101 * @tree: the enumeration tree built while parsing
4102 *
4103 * parse the Attribute list def for an element
4104 *
4105 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4106 *
4107 * [55] StringType ::= 'CDATA'
4108 *
4109 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4110 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4111 *
4112 * Validity constraints for attribute values syntax are checked in
4113 * xmlValidateAttributeValue()
4114 *
4115 * [ VC: ID ]
4116 * Values of type ID must match the Name production. A name must not
4117 * appear more than once in an XML document as a value of this type;
4118 * i.e., ID values must uniquely identify the elements which bear them.
4119 *
4120 * [ VC: One ID per Element Type ]
4121 * No element type may have more than one ID attribute specified.
4122 *
4123 * [ VC: ID Attribute Default ]
4124 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4125 *
4126 * [ VC: IDREF ]
4127 * Values of type IDREF must match the Name production, and values
4128 * of type IDREFS must match Names; each IDREF Name must match the value
4129 * of an ID attribute on some element in the XML document; i.e. IDREF
4130 * values must match the value of some ID attribute.
4131 *
4132 * [ VC: Entity Name ]
4133 * Values of type ENTITY must match the Name production, values
4134 * of type ENTITIES must match Names; each Entity Name must match the
4135 * name of an unparsed entity declared in the DTD.
4136 *
4137 * [ VC: Name Token ]
4138 * Values of type NMTOKEN must match the Nmtoken production; values
4139 * of type NMTOKENS must match Nmtokens.
4140 *
4141 * Returns the attribute type
4142 */
4143int
4144xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4145 SHRINK;
4146 if ((RAW == 'C') && (NXT(1) == 'D') &&
4147 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4148 (NXT(4) == 'A')) {
4149 SKIP(5);
4150 return(XML_ATTRIBUTE_CDATA);
4151 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4152 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4153 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4154 SKIP(6);
4155 return(XML_ATTRIBUTE_IDREFS);
4156 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4157 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4158 (NXT(4) == 'F')) {
4159 SKIP(5);
4160 return(XML_ATTRIBUTE_IDREF);
4161 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4162 SKIP(2);
4163 return(XML_ATTRIBUTE_ID);
4164 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4165 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4166 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4167 SKIP(6);
4168 return(XML_ATTRIBUTE_ENTITY);
4169 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4170 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4171 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4172 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4173 SKIP(8);
4174 return(XML_ATTRIBUTE_ENTITIES);
4175 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4176 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4177 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4178 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4179 SKIP(8);
4180 return(XML_ATTRIBUTE_NMTOKENS);
4181 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4182 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4183 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4184 (NXT(6) == 'N')) {
4185 SKIP(7);
4186 return(XML_ATTRIBUTE_NMTOKEN);
4187 }
4188 return(xmlParseEnumeratedType(ctxt, tree));
4189}
4190
4191/**
4192 * xmlParseAttributeListDecl:
4193 * @ctxt: an XML parser context
4194 *
4195 * : parse the Attribute list def for an element
4196 *
4197 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4198 *
4199 * [53] AttDef ::= S Name S AttType S DefaultDecl
4200 *
4201 */
4202void
4203xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4204 xmlChar *elemName;
4205 xmlChar *attrName;
4206 xmlEnumerationPtr tree;
4207
4208 if ((RAW == '<') && (NXT(1) == '!') &&
4209 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4210 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4211 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4212 (NXT(8) == 'T')) {
4213 xmlParserInputPtr input = ctxt->input;
4214
4215 SKIP(9);
4216 if (!IS_BLANK(CUR)) {
4217 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4219 ctxt->sax->error(ctxt->userData,
4220 "Space required after '<!ATTLIST'\n");
4221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004223 }
4224 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004225 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004226 if (elemName == NULL) {
4227 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4229 ctxt->sax->error(ctxt->userData,
4230 "ATTLIST: no name for Element\n");
4231 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004232 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004233 return;
4234 }
4235 SKIP_BLANKS;
4236 GROW;
4237 while (RAW != '>') {
4238 const xmlChar *check = CUR_PTR;
4239 int type;
4240 int def;
4241 xmlChar *defaultValue = NULL;
4242
4243 GROW;
4244 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004245 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004246 if (attrName == NULL) {
4247 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4249 ctxt->sax->error(ctxt->userData,
4250 "ATTLIST: no name for Attribute\n");
4251 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004252 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004253 break;
4254 }
4255 GROW;
4256 if (!IS_BLANK(CUR)) {
4257 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4259 ctxt->sax->error(ctxt->userData,
4260 "Space required after the attribute name\n");
4261 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004262 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004263 if (attrName != NULL)
4264 xmlFree(attrName);
4265 if (defaultValue != NULL)
4266 xmlFree(defaultValue);
4267 break;
4268 }
4269 SKIP_BLANKS;
4270
4271 type = xmlParseAttributeType(ctxt, &tree);
4272 if (type <= 0) {
4273 if (attrName != NULL)
4274 xmlFree(attrName);
4275 if (defaultValue != NULL)
4276 xmlFree(defaultValue);
4277 break;
4278 }
4279
4280 GROW;
4281 if (!IS_BLANK(CUR)) {
4282 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4284 ctxt->sax->error(ctxt->userData,
4285 "Space required after the attribute type\n");
4286 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004287 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004288 if (attrName != NULL)
4289 xmlFree(attrName);
4290 if (defaultValue != NULL)
4291 xmlFree(defaultValue);
4292 if (tree != NULL)
4293 xmlFreeEnumeration(tree);
4294 break;
4295 }
4296 SKIP_BLANKS;
4297
4298 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4299 if (def <= 0) {
4300 if (attrName != NULL)
4301 xmlFree(attrName);
4302 if (defaultValue != NULL)
4303 xmlFree(defaultValue);
4304 if (tree != NULL)
4305 xmlFreeEnumeration(tree);
4306 break;
4307 }
4308
4309 GROW;
4310 if (RAW != '>') {
4311 if (!IS_BLANK(CUR)) {
4312 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4314 ctxt->sax->error(ctxt->userData,
4315 "Space required after the attribute default value\n");
4316 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004317 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004318 if (attrName != NULL)
4319 xmlFree(attrName);
4320 if (defaultValue != NULL)
4321 xmlFree(defaultValue);
4322 if (tree != NULL)
4323 xmlFreeEnumeration(tree);
4324 break;
4325 }
4326 SKIP_BLANKS;
4327 }
4328 if (check == CUR_PTR) {
4329 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4331 ctxt->sax->error(ctxt->userData,
4332 "xmlParseAttributeListDecl: detected internal error\n");
4333 if (attrName != NULL)
4334 xmlFree(attrName);
4335 if (defaultValue != NULL)
4336 xmlFree(defaultValue);
4337 if (tree != NULL)
4338 xmlFreeEnumeration(tree);
4339 break;
4340 }
4341 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4342 (ctxt->sax->attributeDecl != NULL))
4343 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4344 type, def, defaultValue, tree);
4345 if (attrName != NULL)
4346 xmlFree(attrName);
4347 if (defaultValue != NULL)
4348 xmlFree(defaultValue);
4349 GROW;
4350 }
4351 if (RAW == '>') {
4352 if (input != ctxt->input) {
4353 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4355 ctxt->sax->error(ctxt->userData,
4356"Attribute list declaration doesn't start and stop in the same entity\n");
4357 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004358 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004359 }
4360 NEXT;
4361 }
4362
4363 xmlFree(elemName);
4364 }
4365}
4366
4367/**
4368 * xmlParseElementMixedContentDecl:
4369 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004370 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004371 *
4372 * parse the declaration for a Mixed Element content
4373 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4374 *
4375 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4376 * '(' S? '#PCDATA' S? ')'
4377 *
4378 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4379 *
4380 * [ VC: No Duplicate Types ]
4381 * The same name must not appear more than once in a single
4382 * mixed-content declaration.
4383 *
4384 * returns: the list of the xmlElementContentPtr describing the element choices
4385 */
4386xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004387xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004388 xmlElementContentPtr ret = NULL, cur = NULL, n;
4389 xmlChar *elem = NULL;
4390
4391 GROW;
4392 if ((RAW == '#') && (NXT(1) == 'P') &&
4393 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4394 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4395 (NXT(6) == 'A')) {
4396 SKIP(7);
4397 SKIP_BLANKS;
4398 SHRINK;
4399 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004400 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4401 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4402 if (ctxt->vctxt.error != NULL)
4403 ctxt->vctxt.error(ctxt->vctxt.userData,
4404"Element content declaration doesn't start and stop in the same entity\n");
4405 ctxt->valid = 0;
4406 }
Owen Taylor3473f882001-02-23 17:55:21 +00004407 NEXT;
4408 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4409 if (RAW == '*') {
4410 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4411 NEXT;
4412 }
4413 return(ret);
4414 }
4415 if ((RAW == '(') || (RAW == '|')) {
4416 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4417 if (ret == NULL) return(NULL);
4418 }
4419 while (RAW == '|') {
4420 NEXT;
4421 if (elem == NULL) {
4422 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4423 if (ret == NULL) return(NULL);
4424 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004425 if (cur != NULL)
4426 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004427 cur = ret;
4428 } else {
4429 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4430 if (n == NULL) return(NULL);
4431 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004432 if (n->c1 != NULL)
4433 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004434 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004435 if (n != NULL)
4436 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004437 cur = n;
4438 xmlFree(elem);
4439 }
4440 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004441 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004442 if (elem == NULL) {
4443 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4445 ctxt->sax->error(ctxt->userData,
4446 "xmlParseElementMixedContentDecl : Name expected\n");
4447 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004448 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004449 xmlFreeElementContent(cur);
4450 return(NULL);
4451 }
4452 SKIP_BLANKS;
4453 GROW;
4454 }
4455 if ((RAW == ')') && (NXT(1) == '*')) {
4456 if (elem != NULL) {
4457 cur->c2 = xmlNewElementContent(elem,
4458 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004459 if (cur->c2 != NULL)
4460 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004461 xmlFree(elem);
4462 }
4463 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004464 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4465 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4466 if (ctxt->vctxt.error != NULL)
4467 ctxt->vctxt.error(ctxt->vctxt.userData,
4468"Element content declaration doesn't start and stop in the same entity\n");
4469 ctxt->valid = 0;
4470 }
Owen Taylor3473f882001-02-23 17:55:21 +00004471 SKIP(2);
4472 } else {
4473 if (elem != NULL) xmlFree(elem);
4474 xmlFreeElementContent(ret);
4475 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4477 ctxt->sax->error(ctxt->userData,
4478 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4479 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004480 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004481 return(NULL);
4482 }
4483
4484 } else {
4485 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4487 ctxt->sax->error(ctxt->userData,
4488 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4489 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004490 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004491 }
4492 return(ret);
4493}
4494
4495/**
4496 * xmlParseElementChildrenContentDecl:
4497 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004498 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004499 *
4500 * parse the declaration for a Mixed Element content
4501 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4502 *
4503 *
4504 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4505 *
4506 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4507 *
4508 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4509 *
4510 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4511 *
4512 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4513 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004514 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004515 * opening or closing parentheses in a choice, seq, or Mixed
4516 * construct is contained in the replacement text for a parameter
4517 * entity, both must be contained in the same replacement text. For
4518 * interoperability, if a parameter-entity reference appears in a
4519 * choice, seq, or Mixed construct, its replacement text should not
4520 * be empty, and neither the first nor last non-blank character of
4521 * the replacement text should be a connector (| or ,).
4522 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004523 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004524 * hierarchy.
4525 */
4526xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004527xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004528(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004529 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4530 xmlChar *elem;
4531 xmlChar type = 0;
4532
4533 SKIP_BLANKS;
4534 GROW;
4535 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004536 xmlParserInputPtr input = ctxt->input;
4537
Owen Taylor3473f882001-02-23 17:55:21 +00004538 /* Recurse on first child */
4539 NEXT;
4540 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004541 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004542 SKIP_BLANKS;
4543 GROW;
4544 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004545 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004546 if (elem == NULL) {
4547 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4549 ctxt->sax->error(ctxt->userData,
4550 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004553 return(NULL);
4554 }
4555 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4556 GROW;
4557 if (RAW == '?') {
4558 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4559 NEXT;
4560 } else if (RAW == '*') {
4561 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4562 NEXT;
4563 } else if (RAW == '+') {
4564 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4565 NEXT;
4566 } else {
4567 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4568 }
4569 xmlFree(elem);
4570 GROW;
4571 }
4572 SKIP_BLANKS;
4573 SHRINK;
4574 while (RAW != ')') {
4575 /*
4576 * Each loop we parse one separator and one element.
4577 */
4578 if (RAW == ',') {
4579 if (type == 0) type = CUR;
4580
4581 /*
4582 * Detect "Name | Name , Name" error
4583 */
4584 else if (type != CUR) {
4585 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4587 ctxt->sax->error(ctxt->userData,
4588 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4589 type);
4590 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004591 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004592 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004593 xmlFreeElementContent(last);
4594 if (ret != NULL)
4595 xmlFreeElementContent(ret);
4596 return(NULL);
4597 }
4598 NEXT;
4599
4600 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4601 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004602 if ((last != NULL) && (last != ret))
4603 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004604 xmlFreeElementContent(ret);
4605 return(NULL);
4606 }
4607 if (last == NULL) {
4608 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004609 if (ret != NULL)
4610 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004611 ret = cur = op;
4612 } else {
4613 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004614 if (op != NULL)
4615 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004616 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004617 if (last != NULL)
4618 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004619 cur =op;
4620 last = NULL;
4621 }
4622 } else if (RAW == '|') {
4623 if (type == 0) type = CUR;
4624
4625 /*
4626 * Detect "Name , Name | Name" error
4627 */
4628 else if (type != CUR) {
4629 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4631 ctxt->sax->error(ctxt->userData,
4632 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4633 type);
4634 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004635 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004636 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004637 xmlFreeElementContent(last);
4638 if (ret != NULL)
4639 xmlFreeElementContent(ret);
4640 return(NULL);
4641 }
4642 NEXT;
4643
4644 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4645 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004646 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004647 xmlFreeElementContent(last);
4648 if (ret != NULL)
4649 xmlFreeElementContent(ret);
4650 return(NULL);
4651 }
4652 if (last == NULL) {
4653 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004654 if (ret != NULL)
4655 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004656 ret = cur = op;
4657 } else {
4658 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004659 if (op != NULL)
4660 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004661 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004662 if (last != NULL)
4663 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004664 cur =op;
4665 last = NULL;
4666 }
4667 } else {
4668 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4670 ctxt->sax->error(ctxt->userData,
4671 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4672 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004673 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004674 if (ret != NULL)
4675 xmlFreeElementContent(ret);
4676 return(NULL);
4677 }
4678 GROW;
4679 SKIP_BLANKS;
4680 GROW;
4681 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004682 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004683 /* Recurse on second child */
4684 NEXT;
4685 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004686 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004687 SKIP_BLANKS;
4688 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004689 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004690 if (elem == NULL) {
4691 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4693 ctxt->sax->error(ctxt->userData,
4694 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4695 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004696 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004697 if (ret != NULL)
4698 xmlFreeElementContent(ret);
4699 return(NULL);
4700 }
4701 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4702 xmlFree(elem);
4703 if (RAW == '?') {
4704 last->ocur = XML_ELEMENT_CONTENT_OPT;
4705 NEXT;
4706 } else if (RAW == '*') {
4707 last->ocur = XML_ELEMENT_CONTENT_MULT;
4708 NEXT;
4709 } else if (RAW == '+') {
4710 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4711 NEXT;
4712 } else {
4713 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4714 }
4715 }
4716 SKIP_BLANKS;
4717 GROW;
4718 }
4719 if ((cur != NULL) && (last != NULL)) {
4720 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004721 if (last != NULL)
4722 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004723 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004724 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4725 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4726 if (ctxt->vctxt.error != NULL)
4727 ctxt->vctxt.error(ctxt->vctxt.userData,
4728"Element content declaration doesn't start and stop in the same entity\n");
4729 ctxt->valid = 0;
4730 }
Owen Taylor3473f882001-02-23 17:55:21 +00004731 NEXT;
4732 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004733 if (ret != NULL)
4734 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004735 NEXT;
4736 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004737 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004738 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004739 cur = ret;
4740 /*
4741 * Some normalization:
4742 * (a | b* | c?)* == (a | b | c)*
4743 */
4744 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4745 if ((cur->c1 != NULL) &&
4746 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4747 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4748 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4749 if ((cur->c2 != NULL) &&
4750 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4751 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4752 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4753 cur = cur->c2;
4754 }
4755 }
Owen Taylor3473f882001-02-23 17:55:21 +00004756 NEXT;
4757 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004758 if (ret != NULL) {
4759 int found = 0;
4760
Daniel Veillarde470df72001-04-18 21:41:07 +00004761 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004762 /*
4763 * Some normalization:
4764 * (a | b*)+ == (a | b)*
4765 * (a | b?)+ == (a | b)*
4766 */
4767 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4768 if ((cur->c1 != NULL) &&
4769 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4770 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4771 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4772 found = 1;
4773 }
4774 if ((cur->c2 != NULL) &&
4775 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4776 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4777 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4778 found = 1;
4779 }
4780 cur = cur->c2;
4781 }
4782 if (found)
4783 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4784 }
Owen Taylor3473f882001-02-23 17:55:21 +00004785 NEXT;
4786 }
4787 return(ret);
4788}
4789
4790/**
4791 * xmlParseElementContentDecl:
4792 * @ctxt: an XML parser context
4793 * @name: the name of the element being defined.
4794 * @result: the Element Content pointer will be stored here if any
4795 *
4796 * parse the declaration for an Element content either Mixed or Children,
4797 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4798 *
4799 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4800 *
4801 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4802 */
4803
4804int
4805xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4806 xmlElementContentPtr *result) {
4807
4808 xmlElementContentPtr tree = NULL;
4809 xmlParserInputPtr input = ctxt->input;
4810 int res;
4811
4812 *result = NULL;
4813
4814 if (RAW != '(') {
4815 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4817 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004818 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004819 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004821 return(-1);
4822 }
4823 NEXT;
4824 GROW;
4825 SKIP_BLANKS;
4826 if ((RAW == '#') && (NXT(1) == 'P') &&
4827 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4828 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4829 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004830 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004831 res = XML_ELEMENT_TYPE_MIXED;
4832 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004833 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004834 res = XML_ELEMENT_TYPE_ELEMENT;
4835 }
Owen Taylor3473f882001-02-23 17:55:21 +00004836 SKIP_BLANKS;
4837 *result = tree;
4838 return(res);
4839}
4840
4841/**
4842 * xmlParseElementDecl:
4843 * @ctxt: an XML parser context
4844 *
4845 * parse an Element declaration.
4846 *
4847 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4848 *
4849 * [ VC: Unique Element Type Declaration ]
4850 * No element type may be declared more than once
4851 *
4852 * Returns the type of the element, or -1 in case of error
4853 */
4854int
4855xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4856 xmlChar *name;
4857 int ret = -1;
4858 xmlElementContentPtr content = NULL;
4859
4860 GROW;
4861 if ((RAW == '<') && (NXT(1) == '!') &&
4862 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4863 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4864 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4865 (NXT(8) == 'T')) {
4866 xmlParserInputPtr input = ctxt->input;
4867
4868 SKIP(9);
4869 if (!IS_BLANK(CUR)) {
4870 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4872 ctxt->sax->error(ctxt->userData,
4873 "Space required after 'ELEMENT'\n");
4874 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004875 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004876 }
4877 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004878 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004879 if (name == NULL) {
4880 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4882 ctxt->sax->error(ctxt->userData,
4883 "xmlParseElementDecl: no name for Element\n");
4884 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004885 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004886 return(-1);
4887 }
4888 while ((RAW == 0) && (ctxt->inputNr > 1))
4889 xmlPopInput(ctxt);
4890 if (!IS_BLANK(CUR)) {
4891 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4893 ctxt->sax->error(ctxt->userData,
4894 "Space required after the element name\n");
4895 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004896 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004897 }
4898 SKIP_BLANKS;
4899 if ((RAW == 'E') && (NXT(1) == 'M') &&
4900 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4901 (NXT(4) == 'Y')) {
4902 SKIP(5);
4903 /*
4904 * Element must always be empty.
4905 */
4906 ret = XML_ELEMENT_TYPE_EMPTY;
4907 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4908 (NXT(2) == 'Y')) {
4909 SKIP(3);
4910 /*
4911 * Element is a generic container.
4912 */
4913 ret = XML_ELEMENT_TYPE_ANY;
4914 } else if (RAW == '(') {
4915 ret = xmlParseElementContentDecl(ctxt, name, &content);
4916 } else {
4917 /*
4918 * [ WFC: PEs in Internal Subset ] error handling.
4919 */
4920 if ((RAW == '%') && (ctxt->external == 0) &&
4921 (ctxt->inputNr == 1)) {
4922 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4924 ctxt->sax->error(ctxt->userData,
4925 "PEReference: forbidden within markup decl in internal subset\n");
4926 } else {
4927 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4929 ctxt->sax->error(ctxt->userData,
4930 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4931 }
4932 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004933 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004934 if (name != NULL) xmlFree(name);
4935 return(-1);
4936 }
4937
4938 SKIP_BLANKS;
4939 /*
4940 * Pop-up of finished entities.
4941 */
4942 while ((RAW == 0) && (ctxt->inputNr > 1))
4943 xmlPopInput(ctxt);
4944 SKIP_BLANKS;
4945
4946 if (RAW != '>') {
4947 ctxt->errNo = XML_ERR_GT_REQUIRED;
4948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4949 ctxt->sax->error(ctxt->userData,
4950 "xmlParseElementDecl: expected '>' at the end\n");
4951 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004952 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004953 } else {
4954 if (input != ctxt->input) {
4955 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4957 ctxt->sax->error(ctxt->userData,
4958"Element declaration doesn't start and stop in the same entity\n");
4959 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004960 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004961 }
4962
4963 NEXT;
4964 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4965 (ctxt->sax->elementDecl != NULL))
4966 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4967 content);
4968 }
4969 if (content != NULL) {
4970 xmlFreeElementContent(content);
4971 }
4972 if (name != NULL) {
4973 xmlFree(name);
4974 }
4975 }
4976 return(ret);
4977}
4978
4979/**
Owen Taylor3473f882001-02-23 17:55:21 +00004980 * xmlParseConditionalSections
4981 * @ctxt: an XML parser context
4982 *
4983 * [61] conditionalSect ::= includeSect | ignoreSect
4984 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4985 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4986 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4987 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4988 */
4989
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004990static void
Owen Taylor3473f882001-02-23 17:55:21 +00004991xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4992 SKIP(3);
4993 SKIP_BLANKS;
4994 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4995 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4996 (NXT(6) == 'E')) {
4997 SKIP(7);
4998 SKIP_BLANKS;
4999 if (RAW != '[') {
5000 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5002 ctxt->sax->error(ctxt->userData,
5003 "XML conditional section '[' expected\n");
5004 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005005 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005006 } else {
5007 NEXT;
5008 }
5009 if (xmlParserDebugEntities) {
5010 if ((ctxt->input != NULL) && (ctxt->input->filename))
5011 xmlGenericError(xmlGenericErrorContext,
5012 "%s(%d): ", ctxt->input->filename,
5013 ctxt->input->line);
5014 xmlGenericError(xmlGenericErrorContext,
5015 "Entering INCLUDE Conditional Section\n");
5016 }
5017
5018 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5019 (NXT(2) != '>'))) {
5020 const xmlChar *check = CUR_PTR;
5021 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005022
5023 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5024 xmlParseConditionalSections(ctxt);
5025 } else if (IS_BLANK(CUR)) {
5026 NEXT;
5027 } else if (RAW == '%') {
5028 xmlParsePEReference(ctxt);
5029 } else
5030 xmlParseMarkupDecl(ctxt);
5031
5032 /*
5033 * Pop-up of finished entities.
5034 */
5035 while ((RAW == 0) && (ctxt->inputNr > 1))
5036 xmlPopInput(ctxt);
5037
Daniel Veillardfdc91562002-07-01 21:52:03 +00005038 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005039 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5041 ctxt->sax->error(ctxt->userData,
5042 "Content error in the external subset\n");
5043 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005044 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005045 break;
5046 }
5047 }
5048 if (xmlParserDebugEntities) {
5049 if ((ctxt->input != NULL) && (ctxt->input->filename))
5050 xmlGenericError(xmlGenericErrorContext,
5051 "%s(%d): ", ctxt->input->filename,
5052 ctxt->input->line);
5053 xmlGenericError(xmlGenericErrorContext,
5054 "Leaving INCLUDE Conditional Section\n");
5055 }
5056
5057 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5058 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5059 int state;
5060 int instate;
5061 int depth = 0;
5062
5063 SKIP(6);
5064 SKIP_BLANKS;
5065 if (RAW != '[') {
5066 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5067 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5068 ctxt->sax->error(ctxt->userData,
5069 "XML conditional section '[' expected\n");
5070 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005071 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005072 } else {
5073 NEXT;
5074 }
5075 if (xmlParserDebugEntities) {
5076 if ((ctxt->input != NULL) && (ctxt->input->filename))
5077 xmlGenericError(xmlGenericErrorContext,
5078 "%s(%d): ", ctxt->input->filename,
5079 ctxt->input->line);
5080 xmlGenericError(xmlGenericErrorContext,
5081 "Entering IGNORE Conditional Section\n");
5082 }
5083
5084 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005085 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005086 * But disable SAX event generating DTD building in the meantime
5087 */
5088 state = ctxt->disableSAX;
5089 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005090 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005091 ctxt->instate = XML_PARSER_IGNORE;
5092
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005093 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005094 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5095 depth++;
5096 SKIP(3);
5097 continue;
5098 }
5099 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5100 if (--depth >= 0) SKIP(3);
5101 continue;
5102 }
5103 NEXT;
5104 continue;
5105 }
5106
5107 ctxt->disableSAX = state;
5108 ctxt->instate = instate;
5109
5110 if (xmlParserDebugEntities) {
5111 if ((ctxt->input != NULL) && (ctxt->input->filename))
5112 xmlGenericError(xmlGenericErrorContext,
5113 "%s(%d): ", ctxt->input->filename,
5114 ctxt->input->line);
5115 xmlGenericError(xmlGenericErrorContext,
5116 "Leaving IGNORE Conditional Section\n");
5117 }
5118
5119 } else {
5120 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5122 ctxt->sax->error(ctxt->userData,
5123 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5124 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005125 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005126 }
5127
5128 if (RAW == 0)
5129 SHRINK;
5130
5131 if (RAW == 0) {
5132 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5134 ctxt->sax->error(ctxt->userData,
5135 "XML conditional section not closed\n");
5136 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005137 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005138 } else {
5139 SKIP(3);
5140 }
5141}
5142
5143/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005144 * xmlParseMarkupDecl:
5145 * @ctxt: an XML parser context
5146 *
5147 * parse Markup declarations
5148 *
5149 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5150 * NotationDecl | PI | Comment
5151 *
5152 * [ VC: Proper Declaration/PE Nesting ]
5153 * Parameter-entity replacement text must be properly nested with
5154 * markup declarations. That is to say, if either the first character
5155 * or the last character of a markup declaration (markupdecl above) is
5156 * contained in the replacement text for a parameter-entity reference,
5157 * both must be contained in the same replacement text.
5158 *
5159 * [ WFC: PEs in Internal Subset ]
5160 * In the internal DTD subset, parameter-entity references can occur
5161 * only where markup declarations can occur, not within markup declarations.
5162 * (This does not apply to references that occur in external parameter
5163 * entities or to the external subset.)
5164 */
5165void
5166xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5167 GROW;
5168 xmlParseElementDecl(ctxt);
5169 xmlParseAttributeListDecl(ctxt);
5170 xmlParseEntityDecl(ctxt);
5171 xmlParseNotationDecl(ctxt);
5172 xmlParsePI(ctxt);
5173 xmlParseComment(ctxt);
5174 /*
5175 * This is only for internal subset. On external entities,
5176 * the replacement is done before parsing stage
5177 */
5178 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5179 xmlParsePEReference(ctxt);
5180
5181 /*
5182 * Conditional sections are allowed from entities included
5183 * by PE References in the internal subset.
5184 */
5185 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5186 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5187 xmlParseConditionalSections(ctxt);
5188 }
5189 }
5190
5191 ctxt->instate = XML_PARSER_DTD;
5192}
5193
5194/**
5195 * xmlParseTextDecl:
5196 * @ctxt: an XML parser context
5197 *
5198 * parse an XML declaration header for external entities
5199 *
5200 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5201 *
5202 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5203 */
5204
5205void
5206xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5207 xmlChar *version;
5208
5209 /*
5210 * We know that '<?xml' is here.
5211 */
5212 if ((RAW == '<') && (NXT(1) == '?') &&
5213 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5214 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5215 SKIP(5);
5216 } else {
5217 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5219 ctxt->sax->error(ctxt->userData,
5220 "Text declaration '<?xml' required\n");
5221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005223
5224 return;
5225 }
5226
5227 if (!IS_BLANK(CUR)) {
5228 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5230 ctxt->sax->error(ctxt->userData,
5231 "Space needed after '<?xml'\n");
5232 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005233 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005234 }
5235 SKIP_BLANKS;
5236
5237 /*
5238 * We may have the VersionInfo here.
5239 */
5240 version = xmlParseVersionInfo(ctxt);
5241 if (version == NULL)
5242 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005243 else {
5244 if (!IS_BLANK(CUR)) {
5245 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5247 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5248 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005249 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005250 }
5251 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005252 ctxt->input->version = version;
5253
5254 /*
5255 * We must have the encoding declaration
5256 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005257 xmlParseEncodingDecl(ctxt);
5258 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5259 /*
5260 * The XML REC instructs us to stop parsing right here
5261 */
5262 return;
5263 }
5264
5265 SKIP_BLANKS;
5266 if ((RAW == '?') && (NXT(1) == '>')) {
5267 SKIP(2);
5268 } else if (RAW == '>') {
5269 /* Deprecated old WD ... */
5270 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5272 ctxt->sax->error(ctxt->userData,
5273 "XML declaration must end-up with '?>'\n");
5274 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005275 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005276 NEXT;
5277 } else {
5278 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5280 ctxt->sax->error(ctxt->userData,
5281 "parsing XML declaration: '?>' expected\n");
5282 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005283 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005284 MOVETO_ENDTAG(CUR_PTR);
5285 NEXT;
5286 }
5287}
5288
5289/**
Owen Taylor3473f882001-02-23 17:55:21 +00005290 * xmlParseExternalSubset:
5291 * @ctxt: an XML parser context
5292 * @ExternalID: the external identifier
5293 * @SystemID: the system identifier (or URL)
5294 *
5295 * parse Markup declarations from an external subset
5296 *
5297 * [30] extSubset ::= textDecl? extSubsetDecl
5298 *
5299 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5300 */
5301void
5302xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5303 const xmlChar *SystemID) {
5304 GROW;
5305 if ((RAW == '<') && (NXT(1) == '?') &&
5306 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5307 (NXT(4) == 'l')) {
5308 xmlParseTextDecl(ctxt);
5309 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5310 /*
5311 * The XML REC instructs us to stop parsing right here
5312 */
5313 ctxt->instate = XML_PARSER_EOF;
5314 return;
5315 }
5316 }
5317 if (ctxt->myDoc == NULL) {
5318 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5319 }
5320 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5321 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5322
5323 ctxt->instate = XML_PARSER_DTD;
5324 ctxt->external = 1;
5325 while (((RAW == '<') && (NXT(1) == '?')) ||
5326 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005327 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005328 const xmlChar *check = CUR_PTR;
5329 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005330
5331 GROW;
5332 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5333 xmlParseConditionalSections(ctxt);
5334 } else if (IS_BLANK(CUR)) {
5335 NEXT;
5336 } else if (RAW == '%') {
5337 xmlParsePEReference(ctxt);
5338 } else
5339 xmlParseMarkupDecl(ctxt);
5340
5341 /*
5342 * Pop-up of finished entities.
5343 */
5344 while ((RAW == 0) && (ctxt->inputNr > 1))
5345 xmlPopInput(ctxt);
5346
Daniel Veillardfdc91562002-07-01 21:52:03 +00005347 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005348 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5350 ctxt->sax->error(ctxt->userData,
5351 "Content error in the external subset\n");
5352 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005353 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005354 break;
5355 }
5356 }
5357
5358 if (RAW != 0) {
5359 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5361 ctxt->sax->error(ctxt->userData,
5362 "Extra content at the end of the document\n");
5363 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005364 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005365 }
5366
5367}
5368
5369/**
5370 * xmlParseReference:
5371 * @ctxt: an XML parser context
5372 *
5373 * parse and handle entity references in content, depending on the SAX
5374 * interface, this may end-up in a call to character() if this is a
5375 * CharRef, a predefined entity, if there is no reference() callback.
5376 * or if the parser was asked to switch to that mode.
5377 *
5378 * [67] Reference ::= EntityRef | CharRef
5379 */
5380void
5381xmlParseReference(xmlParserCtxtPtr ctxt) {
5382 xmlEntityPtr ent;
5383 xmlChar *val;
5384 if (RAW != '&') return;
5385
5386 if (NXT(1) == '#') {
5387 int i = 0;
5388 xmlChar out[10];
5389 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005390 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005391
5392 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5393 /*
5394 * So we are using non-UTF-8 buffers
5395 * Check that the char fit on 8bits, if not
5396 * generate a CharRef.
5397 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005398 if (value <= 0xFF) {
5399 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005400 out[1] = 0;
5401 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5402 (!ctxt->disableSAX))
5403 ctxt->sax->characters(ctxt->userData, out, 1);
5404 } else {
5405 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005406 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005407 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005408 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005409 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5410 (!ctxt->disableSAX))
5411 ctxt->sax->reference(ctxt->userData, out);
5412 }
5413 } else {
5414 /*
5415 * Just encode the value in UTF-8
5416 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005417 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005418 out[i] = 0;
5419 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5420 (!ctxt->disableSAX))
5421 ctxt->sax->characters(ctxt->userData, out, i);
5422 }
5423 } else {
5424 ent = xmlParseEntityRef(ctxt);
5425 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005426 if (!ctxt->wellFormed)
5427 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005428 if ((ent->name != NULL) &&
5429 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5430 xmlNodePtr list = NULL;
5431 int ret;
5432
5433
5434 /*
5435 * The first reference to the entity trigger a parsing phase
5436 * where the ent->children is filled with the result from
5437 * the parsing.
5438 */
5439 if (ent->children == NULL) {
5440 xmlChar *value;
5441 value = ent->content;
5442
5443 /*
5444 * Check that this entity is well formed
5445 */
5446 if ((value != NULL) &&
5447 (value[1] == 0) && (value[0] == '<') &&
5448 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5449 /*
5450 * DONE: get definite answer on this !!!
5451 * Lots of entity decls are used to declare a single
5452 * char
5453 * <!ENTITY lt "<">
5454 * Which seems to be valid since
5455 * 2.4: The ampersand character (&) and the left angle
5456 * bracket (<) may appear in their literal form only
5457 * when used ... They are also legal within the literal
5458 * entity value of an internal entity declaration;i
5459 * see "4.3.2 Well-Formed Parsed Entities".
5460 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5461 * Looking at the OASIS test suite and James Clark
5462 * tests, this is broken. However the XML REC uses
5463 * it. Is the XML REC not well-formed ????
5464 * This is a hack to avoid this problem
5465 *
5466 * ANSWER: since lt gt amp .. are already defined,
5467 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005468 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005469 * is lousy but acceptable.
5470 */
5471 list = xmlNewDocText(ctxt->myDoc, value);
5472 if (list != NULL) {
5473 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5474 (ent->children == NULL)) {
5475 ent->children = list;
5476 ent->last = list;
5477 list->parent = (xmlNodePtr) ent;
5478 } else {
5479 xmlFreeNodeList(list);
5480 }
5481 } else if (list != NULL) {
5482 xmlFreeNodeList(list);
5483 }
5484 } else {
5485 /*
5486 * 4.3.2: An internal general parsed entity is well-formed
5487 * if its replacement text matches the production labeled
5488 * content.
5489 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005490
5491 void *user_data;
5492 /*
5493 * This is a bit hackish but this seems the best
5494 * way to make sure both SAX and DOM entity support
5495 * behaves okay.
5496 */
5497 if (ctxt->userData == ctxt)
5498 user_data = NULL;
5499 else
5500 user_data = ctxt->userData;
5501
Owen Taylor3473f882001-02-23 17:55:21 +00005502 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5503 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005504 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5505 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005506 ctxt->depth--;
5507 } else if (ent->etype ==
5508 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5509 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005510 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005511 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005512 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005513 ctxt->depth--;
5514 } else {
5515 ret = -1;
5516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5517 ctxt->sax->error(ctxt->userData,
5518 "Internal: invalid entity type\n");
5519 }
5520 if (ret == XML_ERR_ENTITY_LOOP) {
5521 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5523 ctxt->sax->error(ctxt->userData,
5524 "Detected entity reference loop\n");
5525 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005526 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005527 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005528 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005529 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5530 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005531 (ent->children == NULL)) {
5532 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005533 if (ctxt->replaceEntities) {
5534 /*
5535 * Prune it directly in the generated document
5536 * except for single text nodes.
5537 */
5538 if ((list->type == XML_TEXT_NODE) &&
5539 (list->next == NULL)) {
5540 list->parent = (xmlNodePtr) ent;
5541 list = NULL;
5542 } else {
5543 while (list != NULL) {
5544 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005545 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005546 if (list->next == NULL)
5547 ent->last = list;
5548 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005549 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005550 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005551 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5552 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005553 }
5554 } else {
5555 while (list != NULL) {
5556 list->parent = (xmlNodePtr) ent;
5557 if (list->next == NULL)
5558 ent->last = list;
5559 list = list->next;
5560 }
Owen Taylor3473f882001-02-23 17:55:21 +00005561 }
5562 } else {
5563 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005564 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005565 }
5566 } else if (ret > 0) {
5567 ctxt->errNo = ret;
5568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5569 ctxt->sax->error(ctxt->userData,
5570 "Entity value required\n");
5571 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005572 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005573 } else if (list != NULL) {
5574 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005575 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005576 }
5577 }
5578 }
5579 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5580 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5581 /*
5582 * Create a node.
5583 */
5584 ctxt->sax->reference(ctxt->userData, ent->name);
5585 return;
5586 } else if (ctxt->replaceEntities) {
5587 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5588 /*
5589 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005590 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005591 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005592 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005593 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005594 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005595 cur = ent->children;
5596 while (cur != NULL) {
5597 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005598 if (firstChild == NULL){
5599 firstChild = new;
5600 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005601 xmlAddChild(ctxt->node, new);
5602 if (cur == ent->last)
5603 break;
5604 cur = cur->next;
5605 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005606 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5607 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005608 } else {
5609 /*
5610 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005611 * node with a possible previous text one which
5612 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005613 */
5614 if (ent->children->type == XML_TEXT_NODE)
5615 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5616 if ((ent->last != ent->children) &&
5617 (ent->last->type == XML_TEXT_NODE))
5618 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5619 xmlAddChildList(ctxt->node, ent->children);
5620 }
5621
Owen Taylor3473f882001-02-23 17:55:21 +00005622 /*
5623 * This is to avoid a nasty side effect, see
5624 * characters() in SAX.c
5625 */
5626 ctxt->nodemem = 0;
5627 ctxt->nodelen = 0;
5628 return;
5629 } else {
5630 /*
5631 * Probably running in SAX mode
5632 */
5633 xmlParserInputPtr input;
5634
5635 input = xmlNewEntityInputStream(ctxt, ent);
5636 xmlPushInput(ctxt, input);
5637 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5638 (RAW == '<') && (NXT(1) == '?') &&
5639 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5640 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5641 xmlParseTextDecl(ctxt);
5642 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5643 /*
5644 * The XML REC instructs us to stop parsing right here
5645 */
5646 ctxt->instate = XML_PARSER_EOF;
5647 return;
5648 }
5649 if (input->standalone == 1) {
5650 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5652 ctxt->sax->error(ctxt->userData,
5653 "external parsed entities cannot be standalone\n");
5654 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005656 }
5657 }
5658 return;
5659 }
5660 }
5661 } else {
5662 val = ent->content;
5663 if (val == NULL) return;
5664 /*
5665 * inline the entity.
5666 */
5667 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5668 (!ctxt->disableSAX))
5669 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5670 }
5671 }
5672}
5673
5674/**
5675 * xmlParseEntityRef:
5676 * @ctxt: an XML parser context
5677 *
5678 * parse ENTITY references declarations
5679 *
5680 * [68] EntityRef ::= '&' Name ';'
5681 *
5682 * [ WFC: Entity Declared ]
5683 * In a document without any DTD, a document with only an internal DTD
5684 * subset which contains no parameter entity references, or a document
5685 * with "standalone='yes'", the Name given in the entity reference
5686 * must match that in an entity declaration, except that well-formed
5687 * documents need not declare any of the following entities: amp, lt,
5688 * gt, apos, quot. The declaration of a parameter entity must precede
5689 * any reference to it. Similarly, the declaration of a general entity
5690 * must precede any reference to it which appears in a default value in an
5691 * attribute-list declaration. Note that if entities are declared in the
5692 * external subset or in external parameter entities, a non-validating
5693 * processor is not obligated to read and process their declarations;
5694 * for such documents, the rule that an entity must be declared is a
5695 * well-formedness constraint only if standalone='yes'.
5696 *
5697 * [ WFC: Parsed Entity ]
5698 * An entity reference must not contain the name of an unparsed entity
5699 *
5700 * Returns the xmlEntityPtr if found, or NULL otherwise.
5701 */
5702xmlEntityPtr
5703xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5704 xmlChar *name;
5705 xmlEntityPtr ent = NULL;
5706
5707 GROW;
5708
5709 if (RAW == '&') {
5710 NEXT;
5711 name = xmlParseName(ctxt);
5712 if (name == NULL) {
5713 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5715 ctxt->sax->error(ctxt->userData,
5716 "xmlParseEntityRef: no name\n");
5717 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005718 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005719 } else {
5720 if (RAW == ';') {
5721 NEXT;
5722 /*
5723 * Ask first SAX for entity resolution, otherwise try the
5724 * predefined set.
5725 */
5726 if (ctxt->sax != NULL) {
5727 if (ctxt->sax->getEntity != NULL)
5728 ent = ctxt->sax->getEntity(ctxt->userData, name);
5729 if (ent == NULL)
5730 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005731 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5732 ent = getEntity(ctxt, name);
5733 }
Owen Taylor3473f882001-02-23 17:55:21 +00005734 }
5735 /*
5736 * [ WFC: Entity Declared ]
5737 * In a document without any DTD, a document with only an
5738 * internal DTD subset which contains no parameter entity
5739 * references, or a document with "standalone='yes'", the
5740 * Name given in the entity reference must match that in an
5741 * entity declaration, except that well-formed documents
5742 * need not declare any of the following entities: amp, lt,
5743 * gt, apos, quot.
5744 * The declaration of a parameter entity must precede any
5745 * reference to it.
5746 * Similarly, the declaration of a general entity must
5747 * precede any reference to it which appears in a default
5748 * value in an attribute-list declaration. Note that if
5749 * entities are declared in the external subset or in
5750 * external parameter entities, a non-validating processor
5751 * is not obligated to read and process their declarations;
5752 * for such documents, the rule that an entity must be
5753 * declared is a well-formedness constraint only if
5754 * standalone='yes'.
5755 */
5756 if (ent == NULL) {
5757 if ((ctxt->standalone == 1) ||
5758 ((ctxt->hasExternalSubset == 0) &&
5759 (ctxt->hasPErefs == 0))) {
5760 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5762 ctxt->sax->error(ctxt->userData,
5763 "Entity '%s' not defined\n", name);
5764 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005765 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005766 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005767 } else {
5768 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005770 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005771 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005772 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005773 }
5774 }
5775
5776 /*
5777 * [ WFC: Parsed Entity ]
5778 * An entity reference must not contain the name of an
5779 * unparsed entity
5780 */
5781 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5782 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5784 ctxt->sax->error(ctxt->userData,
5785 "Entity reference to unparsed entity %s\n", name);
5786 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005787 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005788 }
5789
5790 /*
5791 * [ WFC: No External Entity References ]
5792 * Attribute values cannot contain direct or indirect
5793 * entity references to external entities.
5794 */
5795 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5796 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5797 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5799 ctxt->sax->error(ctxt->userData,
5800 "Attribute references external entity '%s'\n", name);
5801 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005802 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005803 }
5804 /*
5805 * [ WFC: No < in Attribute Values ]
5806 * The replacement text of any entity referred to directly or
5807 * indirectly in an attribute value (other than "&lt;") must
5808 * not contain a <.
5809 */
5810 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5811 (ent != NULL) &&
5812 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5813 (ent->content != NULL) &&
5814 (xmlStrchr(ent->content, '<'))) {
5815 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5817 ctxt->sax->error(ctxt->userData,
5818 "'<' in entity '%s' is not allowed in attributes values\n", name);
5819 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005821 }
5822
5823 /*
5824 * Internal check, no parameter entities here ...
5825 */
5826 else {
5827 switch (ent->etype) {
5828 case XML_INTERNAL_PARAMETER_ENTITY:
5829 case XML_EXTERNAL_PARAMETER_ENTITY:
5830 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5832 ctxt->sax->error(ctxt->userData,
5833 "Attempt to reference the parameter entity '%s'\n", name);
5834 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005835 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005836 break;
5837 default:
5838 break;
5839 }
5840 }
5841
5842 /*
5843 * [ WFC: No Recursion ]
5844 * A parsed entity must not contain a recursive reference
5845 * to itself, either directly or indirectly.
5846 * Done somewhere else
5847 */
5848
5849 } else {
5850 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5852 ctxt->sax->error(ctxt->userData,
5853 "xmlParseEntityRef: expecting ';'\n");
5854 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005855 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005856 }
5857 xmlFree(name);
5858 }
5859 }
5860 return(ent);
5861}
5862
5863/**
5864 * xmlParseStringEntityRef:
5865 * @ctxt: an XML parser context
5866 * @str: a pointer to an index in the string
5867 *
5868 * parse ENTITY references declarations, but this version parses it from
5869 * a string value.
5870 *
5871 * [68] EntityRef ::= '&' Name ';'
5872 *
5873 * [ WFC: Entity Declared ]
5874 * In a document without any DTD, a document with only an internal DTD
5875 * subset which contains no parameter entity references, or a document
5876 * with "standalone='yes'", the Name given in the entity reference
5877 * must match that in an entity declaration, except that well-formed
5878 * documents need not declare any of the following entities: amp, lt,
5879 * gt, apos, quot. The declaration of a parameter entity must precede
5880 * any reference to it. Similarly, the declaration of a general entity
5881 * must precede any reference to it which appears in a default value in an
5882 * attribute-list declaration. Note that if entities are declared in the
5883 * external subset or in external parameter entities, a non-validating
5884 * processor is not obligated to read and process their declarations;
5885 * for such documents, the rule that an entity must be declared is a
5886 * well-formedness constraint only if standalone='yes'.
5887 *
5888 * [ WFC: Parsed Entity ]
5889 * An entity reference must not contain the name of an unparsed entity
5890 *
5891 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5892 * is updated to the current location in the string.
5893 */
5894xmlEntityPtr
5895xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5896 xmlChar *name;
5897 const xmlChar *ptr;
5898 xmlChar cur;
5899 xmlEntityPtr ent = NULL;
5900
5901 if ((str == NULL) || (*str == NULL))
5902 return(NULL);
5903 ptr = *str;
5904 cur = *ptr;
5905 if (cur == '&') {
5906 ptr++;
5907 cur = *ptr;
5908 name = xmlParseStringName(ctxt, &ptr);
5909 if (name == NULL) {
5910 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5912 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005913 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005916 } else {
5917 if (*ptr == ';') {
5918 ptr++;
5919 /*
5920 * Ask first SAX for entity resolution, otherwise try the
5921 * predefined set.
5922 */
5923 if (ctxt->sax != NULL) {
5924 if (ctxt->sax->getEntity != NULL)
5925 ent = ctxt->sax->getEntity(ctxt->userData, name);
5926 if (ent == NULL)
5927 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005928 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5929 ent = getEntity(ctxt, name);
5930 }
Owen Taylor3473f882001-02-23 17:55:21 +00005931 }
5932 /*
5933 * [ WFC: Entity Declared ]
5934 * In a document without any DTD, a document with only an
5935 * internal DTD subset which contains no parameter entity
5936 * references, or a document with "standalone='yes'", the
5937 * Name given in the entity reference must match that in an
5938 * entity declaration, except that well-formed documents
5939 * need not declare any of the following entities: amp, lt,
5940 * gt, apos, quot.
5941 * The declaration of a parameter entity must precede any
5942 * reference to it.
5943 * Similarly, the declaration of a general entity must
5944 * precede any reference to it which appears in a default
5945 * value in an attribute-list declaration. Note that if
5946 * entities are declared in the external subset or in
5947 * external parameter entities, a non-validating processor
5948 * is not obligated to read and process their declarations;
5949 * for such documents, the rule that an entity must be
5950 * declared is a well-formedness constraint only if
5951 * standalone='yes'.
5952 */
5953 if (ent == NULL) {
5954 if ((ctxt->standalone == 1) ||
5955 ((ctxt->hasExternalSubset == 0) &&
5956 (ctxt->hasPErefs == 0))) {
5957 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5959 ctxt->sax->error(ctxt->userData,
5960 "Entity '%s' not defined\n", name);
5961 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005962 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005963 } else {
5964 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5965 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5966 ctxt->sax->warning(ctxt->userData,
5967 "Entity '%s' not defined\n", name);
5968 }
5969 }
5970
5971 /*
5972 * [ WFC: Parsed Entity ]
5973 * An entity reference must not contain the name of an
5974 * unparsed entity
5975 */
5976 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5977 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5979 ctxt->sax->error(ctxt->userData,
5980 "Entity reference to unparsed entity %s\n", name);
5981 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005982 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005983 }
5984
5985 /*
5986 * [ WFC: No External Entity References ]
5987 * Attribute values cannot contain direct or indirect
5988 * entity references to external entities.
5989 */
5990 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5991 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5992 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5994 ctxt->sax->error(ctxt->userData,
5995 "Attribute references external entity '%s'\n", name);
5996 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005997 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005998 }
5999 /*
6000 * [ WFC: No < in Attribute Values ]
6001 * The replacement text of any entity referred to directly or
6002 * indirectly in an attribute value (other than "&lt;") must
6003 * not contain a <.
6004 */
6005 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6006 (ent != NULL) &&
6007 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6008 (ent->content != NULL) &&
6009 (xmlStrchr(ent->content, '<'))) {
6010 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6012 ctxt->sax->error(ctxt->userData,
6013 "'<' in entity '%s' is not allowed in attributes values\n", name);
6014 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006015 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006016 }
6017
6018 /*
6019 * Internal check, no parameter entities here ...
6020 */
6021 else {
6022 switch (ent->etype) {
6023 case XML_INTERNAL_PARAMETER_ENTITY:
6024 case XML_EXTERNAL_PARAMETER_ENTITY:
6025 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6027 ctxt->sax->error(ctxt->userData,
6028 "Attempt to reference the parameter entity '%s'\n", name);
6029 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006030 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006031 break;
6032 default:
6033 break;
6034 }
6035 }
6036
6037 /*
6038 * [ WFC: No Recursion ]
6039 * A parsed entity must not contain a recursive reference
6040 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006041 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006042 */
6043
6044 } else {
6045 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6047 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006048 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006049 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006050 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006051 }
6052 xmlFree(name);
6053 }
6054 }
6055 *str = ptr;
6056 return(ent);
6057}
6058
6059/**
6060 * xmlParsePEReference:
6061 * @ctxt: an XML parser context
6062 *
6063 * parse PEReference declarations
6064 * The entity content is handled directly by pushing it's content as
6065 * a new input stream.
6066 *
6067 * [69] PEReference ::= '%' Name ';'
6068 *
6069 * [ WFC: No Recursion ]
6070 * A parsed entity must not contain a recursive
6071 * reference to itself, either directly or indirectly.
6072 *
6073 * [ WFC: Entity Declared ]
6074 * In a document without any DTD, a document with only an internal DTD
6075 * subset which contains no parameter entity references, or a document
6076 * with "standalone='yes'", ... ... The declaration of a parameter
6077 * entity must precede any reference to it...
6078 *
6079 * [ VC: Entity Declared ]
6080 * In a document with an external subset or external parameter entities
6081 * with "standalone='no'", ... ... The declaration of a parameter entity
6082 * must precede any reference to it...
6083 *
6084 * [ WFC: In DTD ]
6085 * Parameter-entity references may only appear in the DTD.
6086 * NOTE: misleading but this is handled.
6087 */
6088void
6089xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6090 xmlChar *name;
6091 xmlEntityPtr entity = NULL;
6092 xmlParserInputPtr input;
6093
6094 if (RAW == '%') {
6095 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006096 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006097 if (name == NULL) {
6098 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6100 ctxt->sax->error(ctxt->userData,
6101 "xmlParsePEReference: no name\n");
6102 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006103 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006104 } else {
6105 if (RAW == ';') {
6106 NEXT;
6107 if ((ctxt->sax != NULL) &&
6108 (ctxt->sax->getParameterEntity != NULL))
6109 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6110 name);
6111 if (entity == NULL) {
6112 /*
6113 * [ WFC: Entity Declared ]
6114 * In a document without any DTD, a document with only an
6115 * internal DTD subset which contains no parameter entity
6116 * references, or a document with "standalone='yes'", ...
6117 * ... The declaration of a parameter entity must precede
6118 * any reference to it...
6119 */
6120 if ((ctxt->standalone == 1) ||
6121 ((ctxt->hasExternalSubset == 0) &&
6122 (ctxt->hasPErefs == 0))) {
6123 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6124 if ((!ctxt->disableSAX) &&
6125 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6126 ctxt->sax->error(ctxt->userData,
6127 "PEReference: %%%s; not found\n", name);
6128 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006129 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006130 } else {
6131 /*
6132 * [ VC: Entity Declared ]
6133 * In a document with an external subset or external
6134 * parameter entities with "standalone='no'", ...
6135 * ... The declaration of a parameter entity must precede
6136 * any reference to it...
6137 */
6138 if ((!ctxt->disableSAX) &&
6139 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6140 ctxt->sax->warning(ctxt->userData,
6141 "PEReference: %%%s; not found\n", name);
6142 ctxt->valid = 0;
6143 }
6144 } else {
6145 /*
6146 * Internal checking in case the entity quest barfed
6147 */
6148 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6149 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6150 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6151 ctxt->sax->warning(ctxt->userData,
6152 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006153 } else if (ctxt->input->free != deallocblankswrapper) {
6154 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6155 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006156 } else {
6157 /*
6158 * TODO !!!
6159 * handle the extra spaces added before and after
6160 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6161 */
6162 input = xmlNewEntityInputStream(ctxt, entity);
6163 xmlPushInput(ctxt, input);
6164 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6165 (RAW == '<') && (NXT(1) == '?') &&
6166 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6167 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6168 xmlParseTextDecl(ctxt);
6169 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6170 /*
6171 * The XML REC instructs us to stop parsing
6172 * right here
6173 */
6174 ctxt->instate = XML_PARSER_EOF;
6175 xmlFree(name);
6176 return;
6177 }
6178 }
Owen Taylor3473f882001-02-23 17:55:21 +00006179 }
6180 }
6181 ctxt->hasPErefs = 1;
6182 } else {
6183 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6185 ctxt->sax->error(ctxt->userData,
6186 "xmlParsePEReference: expecting ';'\n");
6187 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006188 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006189 }
6190 xmlFree(name);
6191 }
6192 }
6193}
6194
6195/**
6196 * xmlParseStringPEReference:
6197 * @ctxt: an XML parser context
6198 * @str: a pointer to an index in the string
6199 *
6200 * parse PEReference declarations
6201 *
6202 * [69] PEReference ::= '%' Name ';'
6203 *
6204 * [ WFC: No Recursion ]
6205 * A parsed entity must not contain a recursive
6206 * reference to itself, either directly or indirectly.
6207 *
6208 * [ WFC: Entity Declared ]
6209 * In a document without any DTD, a document with only an internal DTD
6210 * subset which contains no parameter entity references, or a document
6211 * with "standalone='yes'", ... ... The declaration of a parameter
6212 * entity must precede any reference to it...
6213 *
6214 * [ VC: Entity Declared ]
6215 * In a document with an external subset or external parameter entities
6216 * with "standalone='no'", ... ... The declaration of a parameter entity
6217 * must precede any reference to it...
6218 *
6219 * [ WFC: In DTD ]
6220 * Parameter-entity references may only appear in the DTD.
6221 * NOTE: misleading but this is handled.
6222 *
6223 * Returns the string of the entity content.
6224 * str is updated to the current value of the index
6225 */
6226xmlEntityPtr
6227xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6228 const xmlChar *ptr;
6229 xmlChar cur;
6230 xmlChar *name;
6231 xmlEntityPtr entity = NULL;
6232
6233 if ((str == NULL) || (*str == NULL)) return(NULL);
6234 ptr = *str;
6235 cur = *ptr;
6236 if (cur == '%') {
6237 ptr++;
6238 cur = *ptr;
6239 name = xmlParseStringName(ctxt, &ptr);
6240 if (name == NULL) {
6241 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6242 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6243 ctxt->sax->error(ctxt->userData,
6244 "xmlParseStringPEReference: no name\n");
6245 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006246 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006247 } else {
6248 cur = *ptr;
6249 if (cur == ';') {
6250 ptr++;
6251 cur = *ptr;
6252 if ((ctxt->sax != NULL) &&
6253 (ctxt->sax->getParameterEntity != NULL))
6254 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6255 name);
6256 if (entity == NULL) {
6257 /*
6258 * [ WFC: Entity Declared ]
6259 * In a document without any DTD, a document with only an
6260 * internal DTD subset which contains no parameter entity
6261 * references, or a document with "standalone='yes'", ...
6262 * ... The declaration of a parameter entity must precede
6263 * any reference to it...
6264 */
6265 if ((ctxt->standalone == 1) ||
6266 ((ctxt->hasExternalSubset == 0) &&
6267 (ctxt->hasPErefs == 0))) {
6268 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6270 ctxt->sax->error(ctxt->userData,
6271 "PEReference: %%%s; not found\n", name);
6272 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006273 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006274 } else {
6275 /*
6276 * [ VC: Entity Declared ]
6277 * In a document with an external subset or external
6278 * parameter entities with "standalone='no'", ...
6279 * ... The declaration of a parameter entity must
6280 * precede any reference to it...
6281 */
6282 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6283 ctxt->sax->warning(ctxt->userData,
6284 "PEReference: %%%s; not found\n", name);
6285 ctxt->valid = 0;
6286 }
6287 } else {
6288 /*
6289 * Internal checking in case the entity quest barfed
6290 */
6291 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6292 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6293 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6294 ctxt->sax->warning(ctxt->userData,
6295 "Internal: %%%s; is not a parameter entity\n", name);
6296 }
6297 }
6298 ctxt->hasPErefs = 1;
6299 } else {
6300 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6302 ctxt->sax->error(ctxt->userData,
6303 "xmlParseStringPEReference: expecting ';'\n");
6304 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006305 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006306 }
6307 xmlFree(name);
6308 }
6309 }
6310 *str = ptr;
6311 return(entity);
6312}
6313
6314/**
6315 * xmlParseDocTypeDecl:
6316 * @ctxt: an XML parser context
6317 *
6318 * parse a DOCTYPE declaration
6319 *
6320 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6321 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6322 *
6323 * [ VC: Root Element Type ]
6324 * The Name in the document type declaration must match the element
6325 * type of the root element.
6326 */
6327
6328void
6329xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6330 xmlChar *name = NULL;
6331 xmlChar *ExternalID = NULL;
6332 xmlChar *URI = NULL;
6333
6334 /*
6335 * We know that '<!DOCTYPE' has been detected.
6336 */
6337 SKIP(9);
6338
6339 SKIP_BLANKS;
6340
6341 /*
6342 * Parse the DOCTYPE name.
6343 */
6344 name = xmlParseName(ctxt);
6345 if (name == NULL) {
6346 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6348 ctxt->sax->error(ctxt->userData,
6349 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6350 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006351 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006352 }
6353 ctxt->intSubName = name;
6354
6355 SKIP_BLANKS;
6356
6357 /*
6358 * Check for SystemID and ExternalID
6359 */
6360 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6361
6362 if ((URI != NULL) || (ExternalID != NULL)) {
6363 ctxt->hasExternalSubset = 1;
6364 }
6365 ctxt->extSubURI = URI;
6366 ctxt->extSubSystem = ExternalID;
6367
6368 SKIP_BLANKS;
6369
6370 /*
6371 * Create and update the internal subset.
6372 */
6373 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6374 (!ctxt->disableSAX))
6375 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6376
6377 /*
6378 * Is there any internal subset declarations ?
6379 * they are handled separately in xmlParseInternalSubset()
6380 */
6381 if (RAW == '[')
6382 return;
6383
6384 /*
6385 * We should be at the end of the DOCTYPE declaration.
6386 */
6387 if (RAW != '>') {
6388 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6389 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006390 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006391 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006392 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006393 }
6394 NEXT;
6395}
6396
6397/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006398 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006399 * @ctxt: an XML parser context
6400 *
6401 * parse the internal subset declaration
6402 *
6403 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6404 */
6405
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006406static void
Owen Taylor3473f882001-02-23 17:55:21 +00006407xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6408 /*
6409 * Is there any DTD definition ?
6410 */
6411 if (RAW == '[') {
6412 ctxt->instate = XML_PARSER_DTD;
6413 NEXT;
6414 /*
6415 * Parse the succession of Markup declarations and
6416 * PEReferences.
6417 * Subsequence (markupdecl | PEReference | S)*
6418 */
6419 while (RAW != ']') {
6420 const xmlChar *check = CUR_PTR;
6421 int cons = ctxt->input->consumed;
6422
6423 SKIP_BLANKS;
6424 xmlParseMarkupDecl(ctxt);
6425 xmlParsePEReference(ctxt);
6426
6427 /*
6428 * Pop-up of finished entities.
6429 */
6430 while ((RAW == 0) && (ctxt->inputNr > 1))
6431 xmlPopInput(ctxt);
6432
6433 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6434 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6436 ctxt->sax->error(ctxt->userData,
6437 "xmlParseInternalSubset: error detected in Markup declaration\n");
6438 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006439 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006440 break;
6441 }
6442 }
6443 if (RAW == ']') {
6444 NEXT;
6445 SKIP_BLANKS;
6446 }
6447 }
6448
6449 /*
6450 * We should be at the end of the DOCTYPE declaration.
6451 */
6452 if (RAW != '>') {
6453 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006455 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006456 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006457 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006458 }
6459 NEXT;
6460}
6461
6462/**
6463 * xmlParseAttribute:
6464 * @ctxt: an XML parser context
6465 * @value: a xmlChar ** used to store the value of the attribute
6466 *
6467 * parse an attribute
6468 *
6469 * [41] Attribute ::= Name Eq AttValue
6470 *
6471 * [ WFC: No External Entity References ]
6472 * Attribute values cannot contain direct or indirect entity references
6473 * to external entities.
6474 *
6475 * [ WFC: No < in Attribute Values ]
6476 * The replacement text of any entity referred to directly or indirectly in
6477 * an attribute value (other than "&lt;") must not contain a <.
6478 *
6479 * [ VC: Attribute Value Type ]
6480 * The attribute must have been declared; the value must be of the type
6481 * declared for it.
6482 *
6483 * [25] Eq ::= S? '=' S?
6484 *
6485 * With namespace:
6486 *
6487 * [NS 11] Attribute ::= QName Eq AttValue
6488 *
6489 * Also the case QName == xmlns:??? is handled independently as a namespace
6490 * definition.
6491 *
6492 * Returns the attribute name, and the value in *value.
6493 */
6494
6495xmlChar *
6496xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6497 xmlChar *name, *val;
6498
6499 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006500 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006501 name = xmlParseName(ctxt);
6502 if (name == NULL) {
6503 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6505 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6506 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006507 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006508 return(NULL);
6509 }
6510
6511 /*
6512 * read the value
6513 */
6514 SKIP_BLANKS;
6515 if (RAW == '=') {
6516 NEXT;
6517 SKIP_BLANKS;
6518 val = xmlParseAttValue(ctxt);
6519 ctxt->instate = XML_PARSER_CONTENT;
6520 } else {
6521 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6523 ctxt->sax->error(ctxt->userData,
6524 "Specification mandate value for attribute %s\n", name);
6525 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006526 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006527 xmlFree(name);
6528 return(NULL);
6529 }
6530
6531 /*
6532 * Check that xml:lang conforms to the specification
6533 * No more registered as an error, just generate a warning now
6534 * since this was deprecated in XML second edition
6535 */
6536 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6537 if (!xmlCheckLanguageID(val)) {
6538 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6539 ctxt->sax->warning(ctxt->userData,
6540 "Malformed value for xml:lang : %s\n", val);
6541 }
6542 }
6543
6544 /*
6545 * Check that xml:space conforms to the specification
6546 */
6547 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6548 if (xmlStrEqual(val, BAD_CAST "default"))
6549 *(ctxt->space) = 0;
6550 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6551 *(ctxt->space) = 1;
6552 else {
6553 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6555 ctxt->sax->error(ctxt->userData,
6556"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6557 val);
6558 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006559 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006560 }
6561 }
6562
6563 *value = val;
6564 return(name);
6565}
6566
6567/**
6568 * xmlParseStartTag:
6569 * @ctxt: an XML parser context
6570 *
6571 * parse a start of tag either for rule element or
6572 * EmptyElement. In both case we don't parse the tag closing chars.
6573 *
6574 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6575 *
6576 * [ WFC: Unique Att Spec ]
6577 * No attribute name may appear more than once in the same start-tag or
6578 * empty-element tag.
6579 *
6580 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6581 *
6582 * [ WFC: Unique Att Spec ]
6583 * No attribute name may appear more than once in the same start-tag or
6584 * empty-element tag.
6585 *
6586 * With namespace:
6587 *
6588 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6589 *
6590 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6591 *
6592 * Returns the element name parsed
6593 */
6594
6595xmlChar *
6596xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6597 xmlChar *name;
6598 xmlChar *attname;
6599 xmlChar *attvalue;
6600 const xmlChar **atts = NULL;
6601 int nbatts = 0;
6602 int maxatts = 0;
6603 int i;
6604
6605 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006606 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006607
6608 name = xmlParseName(ctxt);
6609 if (name == NULL) {
6610 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6612 ctxt->sax->error(ctxt->userData,
6613 "xmlParseStartTag: invalid element name\n");
6614 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006615 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006616 return(NULL);
6617 }
6618
6619 /*
6620 * Now parse the attributes, it ends up with the ending
6621 *
6622 * (S Attribute)* S?
6623 */
6624 SKIP_BLANKS;
6625 GROW;
6626
Daniel Veillard21a0f912001-02-25 19:54:14 +00006627 while ((RAW != '>') &&
6628 ((RAW != '/') || (NXT(1) != '>')) &&
6629 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006630 const xmlChar *q = CUR_PTR;
6631 int cons = ctxt->input->consumed;
6632
6633 attname = xmlParseAttribute(ctxt, &attvalue);
6634 if ((attname != NULL) && (attvalue != NULL)) {
6635 /*
6636 * [ WFC: Unique Att Spec ]
6637 * No attribute name may appear more than once in the same
6638 * start-tag or empty-element tag.
6639 */
6640 for (i = 0; i < nbatts;i += 2) {
6641 if (xmlStrEqual(atts[i], attname)) {
6642 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6644 ctxt->sax->error(ctxt->userData,
6645 "Attribute %s redefined\n",
6646 attname);
6647 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006648 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006649 xmlFree(attname);
6650 xmlFree(attvalue);
6651 goto failed;
6652 }
6653 }
6654
6655 /*
6656 * Add the pair to atts
6657 */
6658 if (atts == NULL) {
6659 maxatts = 10;
6660 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6661 if (atts == NULL) {
6662 xmlGenericError(xmlGenericErrorContext,
6663 "malloc of %ld byte failed\n",
6664 maxatts * (long)sizeof(xmlChar *));
6665 return(NULL);
6666 }
6667 } else if (nbatts + 4 > maxatts) {
6668 maxatts *= 2;
6669 atts = (const xmlChar **) xmlRealloc((void *) atts,
6670 maxatts * sizeof(xmlChar *));
6671 if (atts == NULL) {
6672 xmlGenericError(xmlGenericErrorContext,
6673 "realloc of %ld byte failed\n",
6674 maxatts * (long)sizeof(xmlChar *));
6675 return(NULL);
6676 }
6677 }
6678 atts[nbatts++] = attname;
6679 atts[nbatts++] = attvalue;
6680 atts[nbatts] = NULL;
6681 atts[nbatts + 1] = NULL;
6682 } else {
6683 if (attname != NULL)
6684 xmlFree(attname);
6685 if (attvalue != NULL)
6686 xmlFree(attvalue);
6687 }
6688
6689failed:
6690
Daniel Veillard3772de32002-12-17 10:31:45 +00006691 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006692 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6693 break;
6694 if (!IS_BLANK(RAW)) {
6695 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6697 ctxt->sax->error(ctxt->userData,
6698 "attributes construct error\n");
6699 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006700 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006701 }
6702 SKIP_BLANKS;
6703 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6704 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6706 ctxt->sax->error(ctxt->userData,
6707 "xmlParseStartTag: problem parsing attributes\n");
6708 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006709 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006710 break;
6711 }
6712 GROW;
6713 }
6714
6715 /*
6716 * SAX: Start of Element !
6717 */
6718 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6719 (!ctxt->disableSAX))
6720 ctxt->sax->startElement(ctxt->userData, name, atts);
6721
6722 if (atts != NULL) {
6723 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6724 xmlFree((void *) atts);
6725 }
6726 return(name);
6727}
6728
6729/**
6730 * xmlParseEndTag:
6731 * @ctxt: an XML parser context
6732 *
6733 * parse an end of tag
6734 *
6735 * [42] ETag ::= '</' Name S? '>'
6736 *
6737 * With namespace
6738 *
6739 * [NS 9] ETag ::= '</' QName S? '>'
6740 */
6741
6742void
6743xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6744 xmlChar *name;
6745 xmlChar *oldname;
6746
6747 GROW;
6748 if ((RAW != '<') || (NXT(1) != '/')) {
6749 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6751 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6752 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006753 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006754 return;
6755 }
6756 SKIP(2);
6757
Daniel Veillard46de64e2002-05-29 08:21:33 +00006758 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006759
6760 /*
6761 * We should definitely be at the ending "S? '>'" part
6762 */
6763 GROW;
6764 SKIP_BLANKS;
6765 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6766 ctxt->errNo = XML_ERR_GT_REQUIRED;
6767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6768 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6769 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006770 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006771 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006772 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006773
6774 /*
6775 * [ WFC: Element Type Match ]
6776 * The Name in an element's end-tag must match the element type in the
6777 * start-tag.
6778 *
6779 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006780 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006781 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006783 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006784 ctxt->sax->error(ctxt->userData,
6785 "Opening and ending tag mismatch: %s and %s\n",
6786 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006787 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006788 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006789 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006790 }
6791
6792 }
6793 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006794 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6795#if 0
6796 else {
6797 /*
6798 * Recover in case of one missing close
6799 */
6800 if ((ctxt->nameNr > 2) &&
6801 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6802 namePop(ctxt);
6803 spacePop(ctxt);
6804 }
6805 }
6806#endif
6807 if (name != NULL)
6808 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006809 }
6810
6811 /*
6812 * SAX: End of Tag
6813 */
6814 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6815 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006816 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006817
Owen Taylor3473f882001-02-23 17:55:21 +00006818 oldname = namePop(ctxt);
6819 spacePop(ctxt);
6820 if (oldname != NULL) {
6821#ifdef DEBUG_STACK
6822 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6823#endif
6824 xmlFree(oldname);
6825 }
6826 return;
6827}
6828
6829/**
6830 * xmlParseCDSect:
6831 * @ctxt: an XML parser context
6832 *
6833 * Parse escaped pure raw content.
6834 *
6835 * [18] CDSect ::= CDStart CData CDEnd
6836 *
6837 * [19] CDStart ::= '<![CDATA['
6838 *
6839 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6840 *
6841 * [21] CDEnd ::= ']]>'
6842 */
6843void
6844xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6845 xmlChar *buf = NULL;
6846 int len = 0;
6847 int size = XML_PARSER_BUFFER_SIZE;
6848 int r, rl;
6849 int s, sl;
6850 int cur, l;
6851 int count = 0;
6852
6853 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6854 (NXT(2) == '[') && (NXT(3) == 'C') &&
6855 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6856 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6857 (NXT(8) == '[')) {
6858 SKIP(9);
6859 } else
6860 return;
6861
6862 ctxt->instate = XML_PARSER_CDATA_SECTION;
6863 r = CUR_CHAR(rl);
6864 if (!IS_CHAR(r)) {
6865 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6867 ctxt->sax->error(ctxt->userData,
6868 "CData section not finished\n");
6869 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006870 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006871 ctxt->instate = XML_PARSER_CONTENT;
6872 return;
6873 }
6874 NEXTL(rl);
6875 s = CUR_CHAR(sl);
6876 if (!IS_CHAR(s)) {
6877 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6879 ctxt->sax->error(ctxt->userData,
6880 "CData section not finished\n");
6881 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006882 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006883 ctxt->instate = XML_PARSER_CONTENT;
6884 return;
6885 }
6886 NEXTL(sl);
6887 cur = CUR_CHAR(l);
6888 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6889 if (buf == NULL) {
6890 xmlGenericError(xmlGenericErrorContext,
6891 "malloc of %d byte failed\n", size);
6892 return;
6893 }
6894 while (IS_CHAR(cur) &&
6895 ((r != ']') || (s != ']') || (cur != '>'))) {
6896 if (len + 5 >= size) {
6897 size *= 2;
6898 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6899 if (buf == NULL) {
6900 xmlGenericError(xmlGenericErrorContext,
6901 "realloc of %d byte failed\n", size);
6902 return;
6903 }
6904 }
6905 COPY_BUF(rl,buf,len,r);
6906 r = s;
6907 rl = sl;
6908 s = cur;
6909 sl = l;
6910 count++;
6911 if (count > 50) {
6912 GROW;
6913 count = 0;
6914 }
6915 NEXTL(l);
6916 cur = CUR_CHAR(l);
6917 }
6918 buf[len] = 0;
6919 ctxt->instate = XML_PARSER_CONTENT;
6920 if (cur != '>') {
6921 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6923 ctxt->sax->error(ctxt->userData,
6924 "CData section not finished\n%.50s\n", buf);
6925 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006926 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006927 xmlFree(buf);
6928 return;
6929 }
6930 NEXTL(l);
6931
6932 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006933 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006934 */
6935 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6936 if (ctxt->sax->cdataBlock != NULL)
6937 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006938 else if (ctxt->sax->characters != NULL)
6939 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006940 }
6941 xmlFree(buf);
6942}
6943
6944/**
6945 * xmlParseContent:
6946 * @ctxt: an XML parser context
6947 *
6948 * Parse a content:
6949 *
6950 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6951 */
6952
6953void
6954xmlParseContent(xmlParserCtxtPtr ctxt) {
6955 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006956 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006957 ((RAW != '<') || (NXT(1) != '/'))) {
6958 const xmlChar *test = CUR_PTR;
6959 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006960 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006961
6962 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006963 * First case : a Processing Instruction.
6964 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006965 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006966 xmlParsePI(ctxt);
6967 }
6968
6969 /*
6970 * Second case : a CDSection
6971 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006972 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006973 (NXT(2) == '[') && (NXT(3) == 'C') &&
6974 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6975 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6976 (NXT(8) == '[')) {
6977 xmlParseCDSect(ctxt);
6978 }
6979
6980 /*
6981 * Third case : a comment
6982 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006983 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006984 (NXT(2) == '-') && (NXT(3) == '-')) {
6985 xmlParseComment(ctxt);
6986 ctxt->instate = XML_PARSER_CONTENT;
6987 }
6988
6989 /*
6990 * Fourth case : a sub-element.
6991 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006992 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006993 xmlParseElement(ctxt);
6994 }
6995
6996 /*
6997 * Fifth case : a reference. If if has not been resolved,
6998 * parsing returns it's Name, create the node
6999 */
7000
Daniel Veillard21a0f912001-02-25 19:54:14 +00007001 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007002 xmlParseReference(ctxt);
7003 }
7004
7005 /*
7006 * Last case, text. Note that References are handled directly.
7007 */
7008 else {
7009 xmlParseCharData(ctxt, 0);
7010 }
7011
7012 GROW;
7013 /*
7014 * Pop-up of finished entities.
7015 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007016 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007017 xmlPopInput(ctxt);
7018 SHRINK;
7019
Daniel Veillardfdc91562002-07-01 21:52:03 +00007020 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007021 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7023 ctxt->sax->error(ctxt->userData,
7024 "detected an error in element content\n");
7025 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007026 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007027 ctxt->instate = XML_PARSER_EOF;
7028 break;
7029 }
7030 }
7031}
7032
7033/**
7034 * xmlParseElement:
7035 * @ctxt: an XML parser context
7036 *
7037 * parse an XML element, this is highly recursive
7038 *
7039 * [39] element ::= EmptyElemTag | STag content ETag
7040 *
7041 * [ WFC: Element Type Match ]
7042 * The Name in an element's end-tag must match the element type in the
7043 * start-tag.
7044 *
7045 * [ VC: Element Valid ]
7046 * An element is valid if there is a declaration matching elementdecl
7047 * where the Name matches the element type and one of the following holds:
7048 * - The declaration matches EMPTY and the element has no content.
7049 * - The declaration matches children and the sequence of child elements
7050 * belongs to the language generated by the regular expression in the
7051 * content model, with optional white space (characters matching the
7052 * nonterminal S) between each pair of child elements.
7053 * - The declaration matches Mixed and the content consists of character
7054 * data and child elements whose types match names in the content model.
7055 * - The declaration matches ANY, and the types of any child elements have
7056 * been declared.
7057 */
7058
7059void
7060xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007061 xmlChar *name;
7062 xmlChar *oldname;
7063 xmlParserNodeInfo node_info;
7064 xmlNodePtr ret;
7065
7066 /* Capture start position */
7067 if (ctxt->record_info) {
7068 node_info.begin_pos = ctxt->input->consumed +
7069 (CUR_PTR - ctxt->input->base);
7070 node_info.begin_line = ctxt->input->line;
7071 }
7072
7073 if (ctxt->spaceNr == 0)
7074 spacePush(ctxt, -1);
7075 else
7076 spacePush(ctxt, *ctxt->space);
7077
7078 name = xmlParseStartTag(ctxt);
7079 if (name == NULL) {
7080 spacePop(ctxt);
7081 return;
7082 }
7083 namePush(ctxt, name);
7084 ret = ctxt->node;
7085
7086 /*
7087 * [ VC: Root Element Type ]
7088 * The Name in the document type declaration must match the element
7089 * type of the root element.
7090 */
7091 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7092 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7093 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7094
7095 /*
7096 * Check for an Empty Element.
7097 */
7098 if ((RAW == '/') && (NXT(1) == '>')) {
7099 SKIP(2);
7100 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7101 (!ctxt->disableSAX))
7102 ctxt->sax->endElement(ctxt->userData, name);
7103 oldname = namePop(ctxt);
7104 spacePop(ctxt);
7105 if (oldname != NULL) {
7106#ifdef DEBUG_STACK
7107 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7108#endif
7109 xmlFree(oldname);
7110 }
7111 if ( ret != NULL && ctxt->record_info ) {
7112 node_info.end_pos = ctxt->input->consumed +
7113 (CUR_PTR - ctxt->input->base);
7114 node_info.end_line = ctxt->input->line;
7115 node_info.node = ret;
7116 xmlParserAddNodeInfo(ctxt, &node_info);
7117 }
7118 return;
7119 }
7120 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007121 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007122 } else {
7123 ctxt->errNo = XML_ERR_GT_REQUIRED;
7124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7125 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007126 "Couldn't find end of Start Tag %s\n",
7127 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007128 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007129 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007130
7131 /*
7132 * end of parsing of this node.
7133 */
7134 nodePop(ctxt);
7135 oldname = namePop(ctxt);
7136 spacePop(ctxt);
7137 if (oldname != NULL) {
7138#ifdef DEBUG_STACK
7139 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7140#endif
7141 xmlFree(oldname);
7142 }
7143
7144 /*
7145 * Capture end position and add node
7146 */
7147 if ( ret != NULL && ctxt->record_info ) {
7148 node_info.end_pos = ctxt->input->consumed +
7149 (CUR_PTR - ctxt->input->base);
7150 node_info.end_line = ctxt->input->line;
7151 node_info.node = ret;
7152 xmlParserAddNodeInfo(ctxt, &node_info);
7153 }
7154 return;
7155 }
7156
7157 /*
7158 * Parse the content of the element:
7159 */
7160 xmlParseContent(ctxt);
7161 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007162 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007163 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7164 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007165 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007166 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007167 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007168
7169 /*
7170 * end of parsing of this node.
7171 */
7172 nodePop(ctxt);
7173 oldname = namePop(ctxt);
7174 spacePop(ctxt);
7175 if (oldname != NULL) {
7176#ifdef DEBUG_STACK
7177 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7178#endif
7179 xmlFree(oldname);
7180 }
7181 return;
7182 }
7183
7184 /*
7185 * parse the end of tag: '</' should be here.
7186 */
7187 xmlParseEndTag(ctxt);
7188
7189 /*
7190 * Capture end position and add node
7191 */
7192 if ( ret != NULL && ctxt->record_info ) {
7193 node_info.end_pos = ctxt->input->consumed +
7194 (CUR_PTR - ctxt->input->base);
7195 node_info.end_line = ctxt->input->line;
7196 node_info.node = ret;
7197 xmlParserAddNodeInfo(ctxt, &node_info);
7198 }
7199}
7200
7201/**
7202 * xmlParseVersionNum:
7203 * @ctxt: an XML parser context
7204 *
7205 * parse the XML version value.
7206 *
7207 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7208 *
7209 * Returns the string giving the XML version number, or NULL
7210 */
7211xmlChar *
7212xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7213 xmlChar *buf = NULL;
7214 int len = 0;
7215 int size = 10;
7216 xmlChar cur;
7217
7218 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7219 if (buf == NULL) {
7220 xmlGenericError(xmlGenericErrorContext,
7221 "malloc of %d byte failed\n", size);
7222 return(NULL);
7223 }
7224 cur = CUR;
7225 while (((cur >= 'a') && (cur <= 'z')) ||
7226 ((cur >= 'A') && (cur <= 'Z')) ||
7227 ((cur >= '0') && (cur <= '9')) ||
7228 (cur == '_') || (cur == '.') ||
7229 (cur == ':') || (cur == '-')) {
7230 if (len + 1 >= size) {
7231 size *= 2;
7232 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7233 if (buf == NULL) {
7234 xmlGenericError(xmlGenericErrorContext,
7235 "realloc of %d byte failed\n", size);
7236 return(NULL);
7237 }
7238 }
7239 buf[len++] = cur;
7240 NEXT;
7241 cur=CUR;
7242 }
7243 buf[len] = 0;
7244 return(buf);
7245}
7246
7247/**
7248 * xmlParseVersionInfo:
7249 * @ctxt: an XML parser context
7250 *
7251 * parse the XML version.
7252 *
7253 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7254 *
7255 * [25] Eq ::= S? '=' S?
7256 *
7257 * Returns the version string, e.g. "1.0"
7258 */
7259
7260xmlChar *
7261xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7262 xmlChar *version = NULL;
7263 const xmlChar *q;
7264
7265 if ((RAW == 'v') && (NXT(1) == 'e') &&
7266 (NXT(2) == 'r') && (NXT(3) == 's') &&
7267 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7268 (NXT(6) == 'n')) {
7269 SKIP(7);
7270 SKIP_BLANKS;
7271 if (RAW != '=') {
7272 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7274 ctxt->sax->error(ctxt->userData,
7275 "xmlParseVersionInfo : expected '='\n");
7276 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007277 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007278 return(NULL);
7279 }
7280 NEXT;
7281 SKIP_BLANKS;
7282 if (RAW == '"') {
7283 NEXT;
7284 q = CUR_PTR;
7285 version = xmlParseVersionNum(ctxt);
7286 if (RAW != '"') {
7287 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7288 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7289 ctxt->sax->error(ctxt->userData,
7290 "String not closed\n%.50s\n", q);
7291 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007292 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007293 } else
7294 NEXT;
7295 } else if (RAW == '\''){
7296 NEXT;
7297 q = CUR_PTR;
7298 version = xmlParseVersionNum(ctxt);
7299 if (RAW != '\'') {
7300 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7302 ctxt->sax->error(ctxt->userData,
7303 "String not closed\n%.50s\n", q);
7304 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007305 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007306 } else
7307 NEXT;
7308 } else {
7309 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7311 ctxt->sax->error(ctxt->userData,
7312 "xmlParseVersionInfo : expected ' or \"\n");
7313 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007314 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007315 }
7316 }
7317 return(version);
7318}
7319
7320/**
7321 * xmlParseEncName:
7322 * @ctxt: an XML parser context
7323 *
7324 * parse the XML encoding name
7325 *
7326 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7327 *
7328 * Returns the encoding name value or NULL
7329 */
7330xmlChar *
7331xmlParseEncName(xmlParserCtxtPtr ctxt) {
7332 xmlChar *buf = NULL;
7333 int len = 0;
7334 int size = 10;
7335 xmlChar cur;
7336
7337 cur = CUR;
7338 if (((cur >= 'a') && (cur <= 'z')) ||
7339 ((cur >= 'A') && (cur <= 'Z'))) {
7340 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7341 if (buf == NULL) {
7342 xmlGenericError(xmlGenericErrorContext,
7343 "malloc of %d byte failed\n", size);
7344 return(NULL);
7345 }
7346
7347 buf[len++] = cur;
7348 NEXT;
7349 cur = CUR;
7350 while (((cur >= 'a') && (cur <= 'z')) ||
7351 ((cur >= 'A') && (cur <= 'Z')) ||
7352 ((cur >= '0') && (cur <= '9')) ||
7353 (cur == '.') || (cur == '_') ||
7354 (cur == '-')) {
7355 if (len + 1 >= size) {
7356 size *= 2;
7357 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7358 if (buf == NULL) {
7359 xmlGenericError(xmlGenericErrorContext,
7360 "realloc of %d byte failed\n", size);
7361 return(NULL);
7362 }
7363 }
7364 buf[len++] = cur;
7365 NEXT;
7366 cur = CUR;
7367 if (cur == 0) {
7368 SHRINK;
7369 GROW;
7370 cur = CUR;
7371 }
7372 }
7373 buf[len] = 0;
7374 } else {
7375 ctxt->errNo = XML_ERR_ENCODING_NAME;
7376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7377 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7378 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007379 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007380 }
7381 return(buf);
7382}
7383
7384/**
7385 * xmlParseEncodingDecl:
7386 * @ctxt: an XML parser context
7387 *
7388 * parse the XML encoding declaration
7389 *
7390 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7391 *
7392 * this setups the conversion filters.
7393 *
7394 * Returns the encoding value or NULL
7395 */
7396
7397xmlChar *
7398xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7399 xmlChar *encoding = NULL;
7400 const xmlChar *q;
7401
7402 SKIP_BLANKS;
7403 if ((RAW == 'e') && (NXT(1) == 'n') &&
7404 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7405 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7406 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7407 SKIP(8);
7408 SKIP_BLANKS;
7409 if (RAW != '=') {
7410 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7412 ctxt->sax->error(ctxt->userData,
7413 "xmlParseEncodingDecl : expected '='\n");
7414 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007415 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007416 return(NULL);
7417 }
7418 NEXT;
7419 SKIP_BLANKS;
7420 if (RAW == '"') {
7421 NEXT;
7422 q = CUR_PTR;
7423 encoding = xmlParseEncName(ctxt);
7424 if (RAW != '"') {
7425 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7427 ctxt->sax->error(ctxt->userData,
7428 "String not closed\n%.50s\n", q);
7429 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007430 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007431 } else
7432 NEXT;
7433 } else if (RAW == '\''){
7434 NEXT;
7435 q = CUR_PTR;
7436 encoding = xmlParseEncName(ctxt);
7437 if (RAW != '\'') {
7438 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7440 ctxt->sax->error(ctxt->userData,
7441 "String not closed\n%.50s\n", q);
7442 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007443 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007444 } else
7445 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007446 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007447 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7448 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7449 ctxt->sax->error(ctxt->userData,
7450 "xmlParseEncodingDecl : expected ' or \"\n");
7451 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007452 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007453 }
7454 if (encoding != NULL) {
7455 xmlCharEncoding enc;
7456 xmlCharEncodingHandlerPtr handler;
7457
7458 if (ctxt->input->encoding != NULL)
7459 xmlFree((xmlChar *) ctxt->input->encoding);
7460 ctxt->input->encoding = encoding;
7461
7462 enc = xmlParseCharEncoding((const char *) encoding);
7463 /*
7464 * registered set of known encodings
7465 */
7466 if (enc != XML_CHAR_ENCODING_ERROR) {
7467 xmlSwitchEncoding(ctxt, enc);
7468 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007469 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007470 xmlFree(encoding);
7471 return(NULL);
7472 }
7473 } else {
7474 /*
7475 * fallback for unknown encodings
7476 */
7477 handler = xmlFindCharEncodingHandler((const char *) encoding);
7478 if (handler != NULL) {
7479 xmlSwitchToEncoding(ctxt, handler);
7480 } else {
7481 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7483 ctxt->sax->error(ctxt->userData,
7484 "Unsupported encoding %s\n", encoding);
7485 return(NULL);
7486 }
7487 }
7488 }
7489 }
7490 return(encoding);
7491}
7492
7493/**
7494 * xmlParseSDDecl:
7495 * @ctxt: an XML parser context
7496 *
7497 * parse the XML standalone declaration
7498 *
7499 * [32] SDDecl ::= S 'standalone' Eq
7500 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7501 *
7502 * [ VC: Standalone Document Declaration ]
7503 * TODO The standalone document declaration must have the value "no"
7504 * if any external markup declarations contain declarations of:
7505 * - attributes with default values, if elements to which these
7506 * attributes apply appear in the document without specifications
7507 * of values for these attributes, or
7508 * - entities (other than amp, lt, gt, apos, quot), if references
7509 * to those entities appear in the document, or
7510 * - attributes with values subject to normalization, where the
7511 * attribute appears in the document with a value which will change
7512 * as a result of normalization, or
7513 * - element types with element content, if white space occurs directly
7514 * within any instance of those types.
7515 *
7516 * Returns 1 if standalone, 0 otherwise
7517 */
7518
7519int
7520xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7521 int standalone = -1;
7522
7523 SKIP_BLANKS;
7524 if ((RAW == 's') && (NXT(1) == 't') &&
7525 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7526 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7527 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7528 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7529 SKIP(10);
7530 SKIP_BLANKS;
7531 if (RAW != '=') {
7532 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7534 ctxt->sax->error(ctxt->userData,
7535 "XML standalone declaration : expected '='\n");
7536 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007537 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007538 return(standalone);
7539 }
7540 NEXT;
7541 SKIP_BLANKS;
7542 if (RAW == '\''){
7543 NEXT;
7544 if ((RAW == 'n') && (NXT(1) == 'o')) {
7545 standalone = 0;
7546 SKIP(2);
7547 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7548 (NXT(2) == 's')) {
7549 standalone = 1;
7550 SKIP(3);
7551 } else {
7552 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7554 ctxt->sax->error(ctxt->userData,
7555 "standalone accepts only 'yes' or 'no'\n");
7556 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007557 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007558 }
7559 if (RAW != '\'') {
7560 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7562 ctxt->sax->error(ctxt->userData, "String not closed\n");
7563 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007564 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007565 } else
7566 NEXT;
7567 } else if (RAW == '"'){
7568 NEXT;
7569 if ((RAW == 'n') && (NXT(1) == 'o')) {
7570 standalone = 0;
7571 SKIP(2);
7572 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7573 (NXT(2) == 's')) {
7574 standalone = 1;
7575 SKIP(3);
7576 } else {
7577 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7578 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7579 ctxt->sax->error(ctxt->userData,
7580 "standalone accepts only 'yes' or 'no'\n");
7581 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007582 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007583 }
7584 if (RAW != '"') {
7585 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7587 ctxt->sax->error(ctxt->userData, "String not closed\n");
7588 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007589 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007590 } else
7591 NEXT;
7592 } else {
7593 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7595 ctxt->sax->error(ctxt->userData,
7596 "Standalone value not found\n");
7597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007599 }
7600 }
7601 return(standalone);
7602}
7603
7604/**
7605 * xmlParseXMLDecl:
7606 * @ctxt: an XML parser context
7607 *
7608 * parse an XML declaration header
7609 *
7610 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7611 */
7612
7613void
7614xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7615 xmlChar *version;
7616
7617 /*
7618 * We know that '<?xml' is here.
7619 */
7620 SKIP(5);
7621
7622 if (!IS_BLANK(RAW)) {
7623 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7625 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7626 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007627 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007628 }
7629 SKIP_BLANKS;
7630
7631 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007632 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007633 */
7634 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007635 if (version == NULL) {
7636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7637 ctxt->sax->error(ctxt->userData,
7638 "Malformed declaration expecting version\n");
7639 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007640 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007641 } else {
7642 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7643 /*
7644 * TODO: Blueberry should be detected here
7645 */
7646 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7647 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7648 version);
7649 }
7650 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007651 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007652 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007653 }
Owen Taylor3473f882001-02-23 17:55:21 +00007654
7655 /*
7656 * We may have the encoding declaration
7657 */
7658 if (!IS_BLANK(RAW)) {
7659 if ((RAW == '?') && (NXT(1) == '>')) {
7660 SKIP(2);
7661 return;
7662 }
7663 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7665 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7666 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007667 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007668 }
7669 xmlParseEncodingDecl(ctxt);
7670 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7671 /*
7672 * The XML REC instructs us to stop parsing right here
7673 */
7674 return;
7675 }
7676
7677 /*
7678 * We may have the standalone status.
7679 */
7680 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7681 if ((RAW == '?') && (NXT(1) == '>')) {
7682 SKIP(2);
7683 return;
7684 }
7685 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7687 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7688 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007689 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007690 }
7691 SKIP_BLANKS;
7692 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7693
7694 SKIP_BLANKS;
7695 if ((RAW == '?') && (NXT(1) == '>')) {
7696 SKIP(2);
7697 } else if (RAW == '>') {
7698 /* Deprecated old WD ... */
7699 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7701 ctxt->sax->error(ctxt->userData,
7702 "XML declaration must end-up with '?>'\n");
7703 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007704 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007705 NEXT;
7706 } else {
7707 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7709 ctxt->sax->error(ctxt->userData,
7710 "parsing XML declaration: '?>' expected\n");
7711 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007712 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007713 MOVETO_ENDTAG(CUR_PTR);
7714 NEXT;
7715 }
7716}
7717
7718/**
7719 * xmlParseMisc:
7720 * @ctxt: an XML parser context
7721 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007722 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007723 *
7724 * [27] Misc ::= Comment | PI | S
7725 */
7726
7727void
7728xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007729 while (((RAW == '<') && (NXT(1) == '?')) ||
7730 ((RAW == '<') && (NXT(1) == '!') &&
7731 (NXT(2) == '-') && (NXT(3) == '-')) ||
7732 IS_BLANK(CUR)) {
7733 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007734 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007735 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007736 NEXT;
7737 } else
7738 xmlParseComment(ctxt);
7739 }
7740}
7741
7742/**
7743 * xmlParseDocument:
7744 * @ctxt: an XML parser context
7745 *
7746 * parse an XML document (and build a tree if using the standard SAX
7747 * interface).
7748 *
7749 * [1] document ::= prolog element Misc*
7750 *
7751 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7752 *
7753 * Returns 0, -1 in case of error. the parser context is augmented
7754 * as a result of the parsing.
7755 */
7756
7757int
7758xmlParseDocument(xmlParserCtxtPtr ctxt) {
7759 xmlChar start[4];
7760 xmlCharEncoding enc;
7761
7762 xmlInitParser();
7763
7764 GROW;
7765
7766 /*
7767 * SAX: beginning of the document processing.
7768 */
7769 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7770 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7771
Daniel Veillard50f34372001-08-03 12:06:36 +00007772 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007773 /*
7774 * Get the 4 first bytes and decode the charset
7775 * if enc != XML_CHAR_ENCODING_NONE
7776 * plug some encoding conversion routines.
7777 */
7778 start[0] = RAW;
7779 start[1] = NXT(1);
7780 start[2] = NXT(2);
7781 start[3] = NXT(3);
7782 enc = xmlDetectCharEncoding(start, 4);
7783 if (enc != XML_CHAR_ENCODING_NONE) {
7784 xmlSwitchEncoding(ctxt, enc);
7785 }
Owen Taylor3473f882001-02-23 17:55:21 +00007786 }
7787
7788
7789 if (CUR == 0) {
7790 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7792 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7793 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007794 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007795 }
7796
7797 /*
7798 * Check for the XMLDecl in the Prolog.
7799 */
7800 GROW;
7801 if ((RAW == '<') && (NXT(1) == '?') &&
7802 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7803 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7804
7805 /*
7806 * Note that we will switch encoding on the fly.
7807 */
7808 xmlParseXMLDecl(ctxt);
7809 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7810 /*
7811 * The XML REC instructs us to stop parsing right here
7812 */
7813 return(-1);
7814 }
7815 ctxt->standalone = ctxt->input->standalone;
7816 SKIP_BLANKS;
7817 } else {
7818 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7819 }
7820 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7821 ctxt->sax->startDocument(ctxt->userData);
7822
7823 /*
7824 * The Misc part of the Prolog
7825 */
7826 GROW;
7827 xmlParseMisc(ctxt);
7828
7829 /*
7830 * Then possibly doc type declaration(s) and more Misc
7831 * (doctypedecl Misc*)?
7832 */
7833 GROW;
7834 if ((RAW == '<') && (NXT(1) == '!') &&
7835 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7836 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7837 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7838 (NXT(8) == 'E')) {
7839
7840 ctxt->inSubset = 1;
7841 xmlParseDocTypeDecl(ctxt);
7842 if (RAW == '[') {
7843 ctxt->instate = XML_PARSER_DTD;
7844 xmlParseInternalSubset(ctxt);
7845 }
7846
7847 /*
7848 * Create and update the external subset.
7849 */
7850 ctxt->inSubset = 2;
7851 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7852 (!ctxt->disableSAX))
7853 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7854 ctxt->extSubSystem, ctxt->extSubURI);
7855 ctxt->inSubset = 0;
7856
7857
7858 ctxt->instate = XML_PARSER_PROLOG;
7859 xmlParseMisc(ctxt);
7860 }
7861
7862 /*
7863 * Time to start parsing the tree itself
7864 */
7865 GROW;
7866 if (RAW != '<') {
7867 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7869 ctxt->sax->error(ctxt->userData,
7870 "Start tag expected, '<' not found\n");
7871 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007872 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007873 ctxt->instate = XML_PARSER_EOF;
7874 } else {
7875 ctxt->instate = XML_PARSER_CONTENT;
7876 xmlParseElement(ctxt);
7877 ctxt->instate = XML_PARSER_EPILOG;
7878
7879
7880 /*
7881 * The Misc part at the end
7882 */
7883 xmlParseMisc(ctxt);
7884
Daniel Veillard561b7f82002-03-20 21:55:57 +00007885 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007886 ctxt->errNo = XML_ERR_DOCUMENT_END;
7887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7888 ctxt->sax->error(ctxt->userData,
7889 "Extra content at the end of the document\n");
7890 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007891 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007892 }
7893 ctxt->instate = XML_PARSER_EOF;
7894 }
7895
7896 /*
7897 * SAX: end of the document processing.
7898 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007899 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007900 ctxt->sax->endDocument(ctxt->userData);
7901
Daniel Veillard5997aca2002-03-18 18:36:20 +00007902 /*
7903 * Remove locally kept entity definitions if the tree was not built
7904 */
7905 if ((ctxt->myDoc != NULL) &&
7906 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7907 xmlFreeDoc(ctxt->myDoc);
7908 ctxt->myDoc = NULL;
7909 }
7910
Daniel Veillardc7612992002-02-17 22:47:37 +00007911 if (! ctxt->wellFormed) {
7912 ctxt->valid = 0;
7913 return(-1);
7914 }
Owen Taylor3473f882001-02-23 17:55:21 +00007915 return(0);
7916}
7917
7918/**
7919 * xmlParseExtParsedEnt:
7920 * @ctxt: an XML parser context
7921 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007922 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007923 * An external general parsed entity is well-formed if it matches the
7924 * production labeled extParsedEnt.
7925 *
7926 * [78] extParsedEnt ::= TextDecl? content
7927 *
7928 * Returns 0, -1 in case of error. the parser context is augmented
7929 * as a result of the parsing.
7930 */
7931
7932int
7933xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7934 xmlChar start[4];
7935 xmlCharEncoding enc;
7936
7937 xmlDefaultSAXHandlerInit();
7938
7939 GROW;
7940
7941 /*
7942 * SAX: beginning of the document processing.
7943 */
7944 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7945 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7946
7947 /*
7948 * Get the 4 first bytes and decode the charset
7949 * if enc != XML_CHAR_ENCODING_NONE
7950 * plug some encoding conversion routines.
7951 */
7952 start[0] = RAW;
7953 start[1] = NXT(1);
7954 start[2] = NXT(2);
7955 start[3] = NXT(3);
7956 enc = xmlDetectCharEncoding(start, 4);
7957 if (enc != XML_CHAR_ENCODING_NONE) {
7958 xmlSwitchEncoding(ctxt, enc);
7959 }
7960
7961
7962 if (CUR == 0) {
7963 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7965 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7966 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007967 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007968 }
7969
7970 /*
7971 * Check for the XMLDecl in the Prolog.
7972 */
7973 GROW;
7974 if ((RAW == '<') && (NXT(1) == '?') &&
7975 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7976 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7977
7978 /*
7979 * Note that we will switch encoding on the fly.
7980 */
7981 xmlParseXMLDecl(ctxt);
7982 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7983 /*
7984 * The XML REC instructs us to stop parsing right here
7985 */
7986 return(-1);
7987 }
7988 SKIP_BLANKS;
7989 } else {
7990 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7991 }
7992 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7993 ctxt->sax->startDocument(ctxt->userData);
7994
7995 /*
7996 * Doing validity checking on chunk doesn't make sense
7997 */
7998 ctxt->instate = XML_PARSER_CONTENT;
7999 ctxt->validate = 0;
8000 ctxt->loadsubset = 0;
8001 ctxt->depth = 0;
8002
8003 xmlParseContent(ctxt);
8004
8005 if ((RAW == '<') && (NXT(1) == '/')) {
8006 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8008 ctxt->sax->error(ctxt->userData,
8009 "chunk is not well balanced\n");
8010 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008011 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008012 } else if (RAW != 0) {
8013 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8015 ctxt->sax->error(ctxt->userData,
8016 "extra content at the end of well balanced chunk\n");
8017 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008018 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008019 }
8020
8021 /*
8022 * SAX: end of the document processing.
8023 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008024 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008025 ctxt->sax->endDocument(ctxt->userData);
8026
8027 if (! ctxt->wellFormed) return(-1);
8028 return(0);
8029}
8030
8031/************************************************************************
8032 * *
8033 * Progressive parsing interfaces *
8034 * *
8035 ************************************************************************/
8036
8037/**
8038 * xmlParseLookupSequence:
8039 * @ctxt: an XML parser context
8040 * @first: the first char to lookup
8041 * @next: the next char to lookup or zero
8042 * @third: the next char to lookup or zero
8043 *
8044 * Try to find if a sequence (first, next, third) or just (first next) or
8045 * (first) is available in the input stream.
8046 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8047 * to avoid rescanning sequences of bytes, it DOES change the state of the
8048 * parser, do not use liberally.
8049 *
8050 * Returns the index to the current parsing point if the full sequence
8051 * is available, -1 otherwise.
8052 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008053static int
Owen Taylor3473f882001-02-23 17:55:21 +00008054xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8055 xmlChar next, xmlChar third) {
8056 int base, len;
8057 xmlParserInputPtr in;
8058 const xmlChar *buf;
8059
8060 in = ctxt->input;
8061 if (in == NULL) return(-1);
8062 base = in->cur - in->base;
8063 if (base < 0) return(-1);
8064 if (ctxt->checkIndex > base)
8065 base = ctxt->checkIndex;
8066 if (in->buf == NULL) {
8067 buf = in->base;
8068 len = in->length;
8069 } else {
8070 buf = in->buf->buffer->content;
8071 len = in->buf->buffer->use;
8072 }
8073 /* take into account the sequence length */
8074 if (third) len -= 2;
8075 else if (next) len --;
8076 for (;base < len;base++) {
8077 if (buf[base] == first) {
8078 if (third != 0) {
8079 if ((buf[base + 1] != next) ||
8080 (buf[base + 2] != third)) continue;
8081 } else if (next != 0) {
8082 if (buf[base + 1] != next) continue;
8083 }
8084 ctxt->checkIndex = 0;
8085#ifdef DEBUG_PUSH
8086 if (next == 0)
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: lookup '%c' found at %d\n",
8089 first, base);
8090 else if (third == 0)
8091 xmlGenericError(xmlGenericErrorContext,
8092 "PP: lookup '%c%c' found at %d\n",
8093 first, next, base);
8094 else
8095 xmlGenericError(xmlGenericErrorContext,
8096 "PP: lookup '%c%c%c' found at %d\n",
8097 first, next, third, base);
8098#endif
8099 return(base - (in->cur - in->base));
8100 }
8101 }
8102 ctxt->checkIndex = base;
8103#ifdef DEBUG_PUSH
8104 if (next == 0)
8105 xmlGenericError(xmlGenericErrorContext,
8106 "PP: lookup '%c' failed\n", first);
8107 else if (third == 0)
8108 xmlGenericError(xmlGenericErrorContext,
8109 "PP: lookup '%c%c' failed\n", first, next);
8110 else
8111 xmlGenericError(xmlGenericErrorContext,
8112 "PP: lookup '%c%c%c' failed\n", first, next, third);
8113#endif
8114 return(-1);
8115}
8116
8117/**
8118 * xmlParseTryOrFinish:
8119 * @ctxt: an XML parser context
8120 * @terminate: last chunk indicator
8121 *
8122 * Try to progress on parsing
8123 *
8124 * Returns zero if no parsing was possible
8125 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008126static int
Owen Taylor3473f882001-02-23 17:55:21 +00008127xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8128 int ret = 0;
8129 int avail;
8130 xmlChar cur, next;
8131
8132#ifdef DEBUG_PUSH
8133 switch (ctxt->instate) {
8134 case XML_PARSER_EOF:
8135 xmlGenericError(xmlGenericErrorContext,
8136 "PP: try EOF\n"); break;
8137 case XML_PARSER_START:
8138 xmlGenericError(xmlGenericErrorContext,
8139 "PP: try START\n"); break;
8140 case XML_PARSER_MISC:
8141 xmlGenericError(xmlGenericErrorContext,
8142 "PP: try MISC\n");break;
8143 case XML_PARSER_COMMENT:
8144 xmlGenericError(xmlGenericErrorContext,
8145 "PP: try COMMENT\n");break;
8146 case XML_PARSER_PROLOG:
8147 xmlGenericError(xmlGenericErrorContext,
8148 "PP: try PROLOG\n");break;
8149 case XML_PARSER_START_TAG:
8150 xmlGenericError(xmlGenericErrorContext,
8151 "PP: try START_TAG\n");break;
8152 case XML_PARSER_CONTENT:
8153 xmlGenericError(xmlGenericErrorContext,
8154 "PP: try CONTENT\n");break;
8155 case XML_PARSER_CDATA_SECTION:
8156 xmlGenericError(xmlGenericErrorContext,
8157 "PP: try CDATA_SECTION\n");break;
8158 case XML_PARSER_END_TAG:
8159 xmlGenericError(xmlGenericErrorContext,
8160 "PP: try END_TAG\n");break;
8161 case XML_PARSER_ENTITY_DECL:
8162 xmlGenericError(xmlGenericErrorContext,
8163 "PP: try ENTITY_DECL\n");break;
8164 case XML_PARSER_ENTITY_VALUE:
8165 xmlGenericError(xmlGenericErrorContext,
8166 "PP: try ENTITY_VALUE\n");break;
8167 case XML_PARSER_ATTRIBUTE_VALUE:
8168 xmlGenericError(xmlGenericErrorContext,
8169 "PP: try ATTRIBUTE_VALUE\n");break;
8170 case XML_PARSER_DTD:
8171 xmlGenericError(xmlGenericErrorContext,
8172 "PP: try DTD\n");break;
8173 case XML_PARSER_EPILOG:
8174 xmlGenericError(xmlGenericErrorContext,
8175 "PP: try EPILOG\n");break;
8176 case XML_PARSER_PI:
8177 xmlGenericError(xmlGenericErrorContext,
8178 "PP: try PI\n");break;
8179 case XML_PARSER_IGNORE:
8180 xmlGenericError(xmlGenericErrorContext,
8181 "PP: try IGNORE\n");break;
8182 }
8183#endif
8184
8185 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008186 SHRINK;
8187
Owen Taylor3473f882001-02-23 17:55:21 +00008188 /*
8189 * Pop-up of finished entities.
8190 */
8191 while ((RAW == 0) && (ctxt->inputNr > 1))
8192 xmlPopInput(ctxt);
8193
8194 if (ctxt->input ==NULL) break;
8195 if (ctxt->input->buf == NULL)
8196 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008197 else {
8198 /*
8199 * If we are operating on converted input, try to flush
8200 * remainng chars to avoid them stalling in the non-converted
8201 * buffer.
8202 */
8203 if ((ctxt->input->buf->raw != NULL) &&
8204 (ctxt->input->buf->raw->use > 0)) {
8205 int base = ctxt->input->base -
8206 ctxt->input->buf->buffer->content;
8207 int current = ctxt->input->cur - ctxt->input->base;
8208
8209 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8210 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8211 ctxt->input->cur = ctxt->input->base + current;
8212 ctxt->input->end =
8213 &ctxt->input->buf->buffer->content[
8214 ctxt->input->buf->buffer->use];
8215 }
8216 avail = ctxt->input->buf->buffer->use -
8217 (ctxt->input->cur - ctxt->input->base);
8218 }
Owen Taylor3473f882001-02-23 17:55:21 +00008219 if (avail < 1)
8220 goto done;
8221 switch (ctxt->instate) {
8222 case XML_PARSER_EOF:
8223 /*
8224 * Document parsing is done !
8225 */
8226 goto done;
8227 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008228 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8229 xmlChar start[4];
8230 xmlCharEncoding enc;
8231
8232 /*
8233 * Very first chars read from the document flow.
8234 */
8235 if (avail < 4)
8236 goto done;
8237
8238 /*
8239 * Get the 4 first bytes and decode the charset
8240 * if enc != XML_CHAR_ENCODING_NONE
8241 * plug some encoding conversion routines.
8242 */
8243 start[0] = RAW;
8244 start[1] = NXT(1);
8245 start[2] = NXT(2);
8246 start[3] = NXT(3);
8247 enc = xmlDetectCharEncoding(start, 4);
8248 if (enc != XML_CHAR_ENCODING_NONE) {
8249 xmlSwitchEncoding(ctxt, enc);
8250 }
8251 break;
8252 }
Owen Taylor3473f882001-02-23 17:55:21 +00008253
8254 cur = ctxt->input->cur[0];
8255 next = ctxt->input->cur[1];
8256 if (cur == 0) {
8257 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8258 ctxt->sax->setDocumentLocator(ctxt->userData,
8259 &xmlDefaultSAXLocator);
8260 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8262 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8263 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008264 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008265 ctxt->instate = XML_PARSER_EOF;
8266#ifdef DEBUG_PUSH
8267 xmlGenericError(xmlGenericErrorContext,
8268 "PP: entering EOF\n");
8269#endif
8270 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8271 ctxt->sax->endDocument(ctxt->userData);
8272 goto done;
8273 }
8274 if ((cur == '<') && (next == '?')) {
8275 /* PI or XML decl */
8276 if (avail < 5) return(ret);
8277 if ((!terminate) &&
8278 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8279 return(ret);
8280 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8281 ctxt->sax->setDocumentLocator(ctxt->userData,
8282 &xmlDefaultSAXLocator);
8283 if ((ctxt->input->cur[2] == 'x') &&
8284 (ctxt->input->cur[3] == 'm') &&
8285 (ctxt->input->cur[4] == 'l') &&
8286 (IS_BLANK(ctxt->input->cur[5]))) {
8287 ret += 5;
8288#ifdef DEBUG_PUSH
8289 xmlGenericError(xmlGenericErrorContext,
8290 "PP: Parsing XML Decl\n");
8291#endif
8292 xmlParseXMLDecl(ctxt);
8293 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8294 /*
8295 * The XML REC instructs us to stop parsing right
8296 * here
8297 */
8298 ctxt->instate = XML_PARSER_EOF;
8299 return(0);
8300 }
8301 ctxt->standalone = ctxt->input->standalone;
8302 if ((ctxt->encoding == NULL) &&
8303 (ctxt->input->encoding != NULL))
8304 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8305 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8306 (!ctxt->disableSAX))
8307 ctxt->sax->startDocument(ctxt->userData);
8308 ctxt->instate = XML_PARSER_MISC;
8309#ifdef DEBUG_PUSH
8310 xmlGenericError(xmlGenericErrorContext,
8311 "PP: entering MISC\n");
8312#endif
8313 } else {
8314 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8315 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8316 (!ctxt->disableSAX))
8317 ctxt->sax->startDocument(ctxt->userData);
8318 ctxt->instate = XML_PARSER_MISC;
8319#ifdef DEBUG_PUSH
8320 xmlGenericError(xmlGenericErrorContext,
8321 "PP: entering MISC\n");
8322#endif
8323 }
8324 } else {
8325 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8326 ctxt->sax->setDocumentLocator(ctxt->userData,
8327 &xmlDefaultSAXLocator);
8328 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8329 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8330 (!ctxt->disableSAX))
8331 ctxt->sax->startDocument(ctxt->userData);
8332 ctxt->instate = XML_PARSER_MISC;
8333#ifdef DEBUG_PUSH
8334 xmlGenericError(xmlGenericErrorContext,
8335 "PP: entering MISC\n");
8336#endif
8337 }
8338 break;
8339 case XML_PARSER_MISC:
8340 SKIP_BLANKS;
8341 if (ctxt->input->buf == NULL)
8342 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8343 else
8344 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8345 if (avail < 2)
8346 goto done;
8347 cur = ctxt->input->cur[0];
8348 next = ctxt->input->cur[1];
8349 if ((cur == '<') && (next == '?')) {
8350 if ((!terminate) &&
8351 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8352 goto done;
8353#ifdef DEBUG_PUSH
8354 xmlGenericError(xmlGenericErrorContext,
8355 "PP: Parsing PI\n");
8356#endif
8357 xmlParsePI(ctxt);
8358 } else if ((cur == '<') && (next == '!') &&
8359 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8360 if ((!terminate) &&
8361 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8362 goto done;
8363#ifdef DEBUG_PUSH
8364 xmlGenericError(xmlGenericErrorContext,
8365 "PP: Parsing Comment\n");
8366#endif
8367 xmlParseComment(ctxt);
8368 ctxt->instate = XML_PARSER_MISC;
8369 } else if ((cur == '<') && (next == '!') &&
8370 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8371 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8372 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8373 (ctxt->input->cur[8] == 'E')) {
8374 if ((!terminate) &&
8375 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8376 goto done;
8377#ifdef DEBUG_PUSH
8378 xmlGenericError(xmlGenericErrorContext,
8379 "PP: Parsing internal subset\n");
8380#endif
8381 ctxt->inSubset = 1;
8382 xmlParseDocTypeDecl(ctxt);
8383 if (RAW == '[') {
8384 ctxt->instate = XML_PARSER_DTD;
8385#ifdef DEBUG_PUSH
8386 xmlGenericError(xmlGenericErrorContext,
8387 "PP: entering DTD\n");
8388#endif
8389 } else {
8390 /*
8391 * Create and update the external subset.
8392 */
8393 ctxt->inSubset = 2;
8394 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8395 (ctxt->sax->externalSubset != NULL))
8396 ctxt->sax->externalSubset(ctxt->userData,
8397 ctxt->intSubName, ctxt->extSubSystem,
8398 ctxt->extSubURI);
8399 ctxt->inSubset = 0;
8400 ctxt->instate = XML_PARSER_PROLOG;
8401#ifdef DEBUG_PUSH
8402 xmlGenericError(xmlGenericErrorContext,
8403 "PP: entering PROLOG\n");
8404#endif
8405 }
8406 } else if ((cur == '<') && (next == '!') &&
8407 (avail < 9)) {
8408 goto done;
8409 } else {
8410 ctxt->instate = XML_PARSER_START_TAG;
8411#ifdef DEBUG_PUSH
8412 xmlGenericError(xmlGenericErrorContext,
8413 "PP: entering START_TAG\n");
8414#endif
8415 }
8416 break;
8417 case XML_PARSER_IGNORE:
8418 xmlGenericError(xmlGenericErrorContext,
8419 "PP: internal error, state == IGNORE");
8420 ctxt->instate = XML_PARSER_DTD;
8421#ifdef DEBUG_PUSH
8422 xmlGenericError(xmlGenericErrorContext,
8423 "PP: entering DTD\n");
8424#endif
8425 break;
8426 case XML_PARSER_PROLOG:
8427 SKIP_BLANKS;
8428 if (ctxt->input->buf == NULL)
8429 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8430 else
8431 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8432 if (avail < 2)
8433 goto done;
8434 cur = ctxt->input->cur[0];
8435 next = ctxt->input->cur[1];
8436 if ((cur == '<') && (next == '?')) {
8437 if ((!terminate) &&
8438 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8439 goto done;
8440#ifdef DEBUG_PUSH
8441 xmlGenericError(xmlGenericErrorContext,
8442 "PP: Parsing PI\n");
8443#endif
8444 xmlParsePI(ctxt);
8445 } else if ((cur == '<') && (next == '!') &&
8446 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8447 if ((!terminate) &&
8448 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8449 goto done;
8450#ifdef DEBUG_PUSH
8451 xmlGenericError(xmlGenericErrorContext,
8452 "PP: Parsing Comment\n");
8453#endif
8454 xmlParseComment(ctxt);
8455 ctxt->instate = XML_PARSER_PROLOG;
8456 } else if ((cur == '<') && (next == '!') &&
8457 (avail < 4)) {
8458 goto done;
8459 } else {
8460 ctxt->instate = XML_PARSER_START_TAG;
8461#ifdef DEBUG_PUSH
8462 xmlGenericError(xmlGenericErrorContext,
8463 "PP: entering START_TAG\n");
8464#endif
8465 }
8466 break;
8467 case XML_PARSER_EPILOG:
8468 SKIP_BLANKS;
8469 if (ctxt->input->buf == NULL)
8470 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8471 else
8472 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8473 if (avail < 2)
8474 goto done;
8475 cur = ctxt->input->cur[0];
8476 next = ctxt->input->cur[1];
8477 if ((cur == '<') && (next == '?')) {
8478 if ((!terminate) &&
8479 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8480 goto done;
8481#ifdef DEBUG_PUSH
8482 xmlGenericError(xmlGenericErrorContext,
8483 "PP: Parsing PI\n");
8484#endif
8485 xmlParsePI(ctxt);
8486 ctxt->instate = XML_PARSER_EPILOG;
8487 } else if ((cur == '<') && (next == '!') &&
8488 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8489 if ((!terminate) &&
8490 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8491 goto done;
8492#ifdef DEBUG_PUSH
8493 xmlGenericError(xmlGenericErrorContext,
8494 "PP: Parsing Comment\n");
8495#endif
8496 xmlParseComment(ctxt);
8497 ctxt->instate = XML_PARSER_EPILOG;
8498 } else if ((cur == '<') && (next == '!') &&
8499 (avail < 4)) {
8500 goto done;
8501 } else {
8502 ctxt->errNo = XML_ERR_DOCUMENT_END;
8503 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8504 ctxt->sax->error(ctxt->userData,
8505 "Extra content at the end of the document\n");
8506 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008507 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008508 ctxt->instate = XML_PARSER_EOF;
8509#ifdef DEBUG_PUSH
8510 xmlGenericError(xmlGenericErrorContext,
8511 "PP: entering EOF\n");
8512#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008513 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008514 ctxt->sax->endDocument(ctxt->userData);
8515 goto done;
8516 }
8517 break;
8518 case XML_PARSER_START_TAG: {
8519 xmlChar *name, *oldname;
8520
8521 if ((avail < 2) && (ctxt->inputNr == 1))
8522 goto done;
8523 cur = ctxt->input->cur[0];
8524 if (cur != '<') {
8525 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8526 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8527 ctxt->sax->error(ctxt->userData,
8528 "Start tag expect, '<' not found\n");
8529 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008530 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008531 ctxt->instate = XML_PARSER_EOF;
8532#ifdef DEBUG_PUSH
8533 xmlGenericError(xmlGenericErrorContext,
8534 "PP: entering EOF\n");
8535#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008536 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008537 ctxt->sax->endDocument(ctxt->userData);
8538 goto done;
8539 }
8540 if ((!terminate) &&
8541 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8542 goto done;
8543 if (ctxt->spaceNr == 0)
8544 spacePush(ctxt, -1);
8545 else
8546 spacePush(ctxt, *ctxt->space);
8547 name = xmlParseStartTag(ctxt);
8548 if (name == NULL) {
8549 spacePop(ctxt);
8550 ctxt->instate = XML_PARSER_EOF;
8551#ifdef DEBUG_PUSH
8552 xmlGenericError(xmlGenericErrorContext,
8553 "PP: entering EOF\n");
8554#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008555 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008556 ctxt->sax->endDocument(ctxt->userData);
8557 goto done;
8558 }
8559 namePush(ctxt, xmlStrdup(name));
8560
8561 /*
8562 * [ VC: Root Element Type ]
8563 * The Name in the document type declaration must match
8564 * the element type of the root element.
8565 */
8566 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8567 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8568 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8569
8570 /*
8571 * Check for an Empty Element.
8572 */
8573 if ((RAW == '/') && (NXT(1) == '>')) {
8574 SKIP(2);
8575 if ((ctxt->sax != NULL) &&
8576 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8577 ctxt->sax->endElement(ctxt->userData, name);
8578 xmlFree(name);
8579 oldname = namePop(ctxt);
8580 spacePop(ctxt);
8581 if (oldname != NULL) {
8582#ifdef DEBUG_STACK
8583 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8584#endif
8585 xmlFree(oldname);
8586 }
8587 if (ctxt->name == NULL) {
8588 ctxt->instate = XML_PARSER_EPILOG;
8589#ifdef DEBUG_PUSH
8590 xmlGenericError(xmlGenericErrorContext,
8591 "PP: entering EPILOG\n");
8592#endif
8593 } else {
8594 ctxt->instate = XML_PARSER_CONTENT;
8595#ifdef DEBUG_PUSH
8596 xmlGenericError(xmlGenericErrorContext,
8597 "PP: entering CONTENT\n");
8598#endif
8599 }
8600 break;
8601 }
8602 if (RAW == '>') {
8603 NEXT;
8604 } else {
8605 ctxt->errNo = XML_ERR_GT_REQUIRED;
8606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8607 ctxt->sax->error(ctxt->userData,
8608 "Couldn't find end of Start Tag %s\n",
8609 name);
8610 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008611 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008612
8613 /*
8614 * end of parsing of this node.
8615 */
8616 nodePop(ctxt);
8617 oldname = namePop(ctxt);
8618 spacePop(ctxt);
8619 if (oldname != NULL) {
8620#ifdef DEBUG_STACK
8621 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8622#endif
8623 xmlFree(oldname);
8624 }
8625 }
8626 xmlFree(name);
8627 ctxt->instate = XML_PARSER_CONTENT;
8628#ifdef DEBUG_PUSH
8629 xmlGenericError(xmlGenericErrorContext,
8630 "PP: entering CONTENT\n");
8631#endif
8632 break;
8633 }
8634 case XML_PARSER_CONTENT: {
8635 const xmlChar *test;
8636 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008637 if ((avail < 2) && (ctxt->inputNr == 1))
8638 goto done;
8639 cur = ctxt->input->cur[0];
8640 next = ctxt->input->cur[1];
8641
8642 test = CUR_PTR;
8643 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008644 if ((cur == '<') && (next == '?')) {
8645 if ((!terminate) &&
8646 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8647 goto done;
8648#ifdef DEBUG_PUSH
8649 xmlGenericError(xmlGenericErrorContext,
8650 "PP: Parsing PI\n");
8651#endif
8652 xmlParsePI(ctxt);
8653 } else if ((cur == '<') && (next == '!') &&
8654 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8655 if ((!terminate) &&
8656 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8657 goto done;
8658#ifdef DEBUG_PUSH
8659 xmlGenericError(xmlGenericErrorContext,
8660 "PP: Parsing Comment\n");
8661#endif
8662 xmlParseComment(ctxt);
8663 ctxt->instate = XML_PARSER_CONTENT;
8664 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8665 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8666 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8667 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8668 (ctxt->input->cur[8] == '[')) {
8669 SKIP(9);
8670 ctxt->instate = XML_PARSER_CDATA_SECTION;
8671#ifdef DEBUG_PUSH
8672 xmlGenericError(xmlGenericErrorContext,
8673 "PP: entering CDATA_SECTION\n");
8674#endif
8675 break;
8676 } else if ((cur == '<') && (next == '!') &&
8677 (avail < 9)) {
8678 goto done;
8679 } else if ((cur == '<') && (next == '/')) {
8680 ctxt->instate = XML_PARSER_END_TAG;
8681#ifdef DEBUG_PUSH
8682 xmlGenericError(xmlGenericErrorContext,
8683 "PP: entering END_TAG\n");
8684#endif
8685 break;
8686 } else if (cur == '<') {
8687 ctxt->instate = XML_PARSER_START_TAG;
8688#ifdef DEBUG_PUSH
8689 xmlGenericError(xmlGenericErrorContext,
8690 "PP: entering START_TAG\n");
8691#endif
8692 break;
8693 } else if (cur == '&') {
8694 if ((!terminate) &&
8695 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8696 goto done;
8697#ifdef DEBUG_PUSH
8698 xmlGenericError(xmlGenericErrorContext,
8699 "PP: Parsing Reference\n");
8700#endif
8701 xmlParseReference(ctxt);
8702 } else {
8703 /* TODO Avoid the extra copy, handle directly !!! */
8704 /*
8705 * Goal of the following test is:
8706 * - minimize calls to the SAX 'character' callback
8707 * when they are mergeable
8708 * - handle an problem for isBlank when we only parse
8709 * a sequence of blank chars and the next one is
8710 * not available to check against '<' presence.
8711 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008712 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008713 * of the parser.
8714 */
8715 if ((ctxt->inputNr == 1) &&
8716 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8717 if ((!terminate) &&
8718 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8719 goto done;
8720 }
8721 ctxt->checkIndex = 0;
8722#ifdef DEBUG_PUSH
8723 xmlGenericError(xmlGenericErrorContext,
8724 "PP: Parsing char data\n");
8725#endif
8726 xmlParseCharData(ctxt, 0);
8727 }
8728 /*
8729 * Pop-up of finished entities.
8730 */
8731 while ((RAW == 0) && (ctxt->inputNr > 1))
8732 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008733 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008734 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8736 ctxt->sax->error(ctxt->userData,
8737 "detected an error in element content\n");
8738 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008739 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008740 ctxt->instate = XML_PARSER_EOF;
8741 break;
8742 }
8743 break;
8744 }
8745 case XML_PARSER_CDATA_SECTION: {
8746 /*
8747 * The Push mode need to have the SAX callback for
8748 * cdataBlock merge back contiguous callbacks.
8749 */
8750 int base;
8751
8752 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8753 if (base < 0) {
8754 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8755 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8756 if (ctxt->sax->cdataBlock != NULL)
8757 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8758 XML_PARSER_BIG_BUFFER_SIZE);
8759 }
8760 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8761 ctxt->checkIndex = 0;
8762 }
8763 goto done;
8764 } else {
8765 if ((ctxt->sax != NULL) && (base > 0) &&
8766 (!ctxt->disableSAX)) {
8767 if (ctxt->sax->cdataBlock != NULL)
8768 ctxt->sax->cdataBlock(ctxt->userData,
8769 ctxt->input->cur, base);
8770 }
8771 SKIP(base + 3);
8772 ctxt->checkIndex = 0;
8773 ctxt->instate = XML_PARSER_CONTENT;
8774#ifdef DEBUG_PUSH
8775 xmlGenericError(xmlGenericErrorContext,
8776 "PP: entering CONTENT\n");
8777#endif
8778 }
8779 break;
8780 }
8781 case XML_PARSER_END_TAG:
8782 if (avail < 2)
8783 goto done;
8784 if ((!terminate) &&
8785 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8786 goto done;
8787 xmlParseEndTag(ctxt);
8788 if (ctxt->name == NULL) {
8789 ctxt->instate = XML_PARSER_EPILOG;
8790#ifdef DEBUG_PUSH
8791 xmlGenericError(xmlGenericErrorContext,
8792 "PP: entering EPILOG\n");
8793#endif
8794 } else {
8795 ctxt->instate = XML_PARSER_CONTENT;
8796#ifdef DEBUG_PUSH
8797 xmlGenericError(xmlGenericErrorContext,
8798 "PP: entering CONTENT\n");
8799#endif
8800 }
8801 break;
8802 case XML_PARSER_DTD: {
8803 /*
8804 * Sorry but progressive parsing of the internal subset
8805 * is not expected to be supported. We first check that
8806 * the full content of the internal subset is available and
8807 * the parsing is launched only at that point.
8808 * Internal subset ends up with "']' S? '>'" in an unescaped
8809 * section and not in a ']]>' sequence which are conditional
8810 * sections (whoever argued to keep that crap in XML deserve
8811 * a place in hell !).
8812 */
8813 int base, i;
8814 xmlChar *buf;
8815 xmlChar quote = 0;
8816
8817 base = ctxt->input->cur - ctxt->input->base;
8818 if (base < 0) return(0);
8819 if (ctxt->checkIndex > base)
8820 base = ctxt->checkIndex;
8821 buf = ctxt->input->buf->buffer->content;
8822 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8823 base++) {
8824 if (quote != 0) {
8825 if (buf[base] == quote)
8826 quote = 0;
8827 continue;
8828 }
8829 if (buf[base] == '"') {
8830 quote = '"';
8831 continue;
8832 }
8833 if (buf[base] == '\'') {
8834 quote = '\'';
8835 continue;
8836 }
8837 if (buf[base] == ']') {
8838 if ((unsigned int) base +1 >=
8839 ctxt->input->buf->buffer->use)
8840 break;
8841 if (buf[base + 1] == ']') {
8842 /* conditional crap, skip both ']' ! */
8843 base++;
8844 continue;
8845 }
8846 for (i = 0;
8847 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8848 i++) {
8849 if (buf[base + i] == '>')
8850 goto found_end_int_subset;
8851 }
8852 break;
8853 }
8854 }
8855 /*
8856 * We didn't found the end of the Internal subset
8857 */
8858 if (quote == 0)
8859 ctxt->checkIndex = base;
8860#ifdef DEBUG_PUSH
8861 if (next == 0)
8862 xmlGenericError(xmlGenericErrorContext,
8863 "PP: lookup of int subset end filed\n");
8864#endif
8865 goto done;
8866
8867found_end_int_subset:
8868 xmlParseInternalSubset(ctxt);
8869 ctxt->inSubset = 2;
8870 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8871 (ctxt->sax->externalSubset != NULL))
8872 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8873 ctxt->extSubSystem, ctxt->extSubURI);
8874 ctxt->inSubset = 0;
8875 ctxt->instate = XML_PARSER_PROLOG;
8876 ctxt->checkIndex = 0;
8877#ifdef DEBUG_PUSH
8878 xmlGenericError(xmlGenericErrorContext,
8879 "PP: entering PROLOG\n");
8880#endif
8881 break;
8882 }
8883 case XML_PARSER_COMMENT:
8884 xmlGenericError(xmlGenericErrorContext,
8885 "PP: internal error, state == COMMENT\n");
8886 ctxt->instate = XML_PARSER_CONTENT;
8887#ifdef DEBUG_PUSH
8888 xmlGenericError(xmlGenericErrorContext,
8889 "PP: entering CONTENT\n");
8890#endif
8891 break;
8892 case XML_PARSER_PI:
8893 xmlGenericError(xmlGenericErrorContext,
8894 "PP: internal error, state == PI\n");
8895 ctxt->instate = XML_PARSER_CONTENT;
8896#ifdef DEBUG_PUSH
8897 xmlGenericError(xmlGenericErrorContext,
8898 "PP: entering CONTENT\n");
8899#endif
8900 break;
8901 case XML_PARSER_ENTITY_DECL:
8902 xmlGenericError(xmlGenericErrorContext,
8903 "PP: internal error, state == ENTITY_DECL\n");
8904 ctxt->instate = XML_PARSER_DTD;
8905#ifdef DEBUG_PUSH
8906 xmlGenericError(xmlGenericErrorContext,
8907 "PP: entering DTD\n");
8908#endif
8909 break;
8910 case XML_PARSER_ENTITY_VALUE:
8911 xmlGenericError(xmlGenericErrorContext,
8912 "PP: internal error, state == ENTITY_VALUE\n");
8913 ctxt->instate = XML_PARSER_CONTENT;
8914#ifdef DEBUG_PUSH
8915 xmlGenericError(xmlGenericErrorContext,
8916 "PP: entering DTD\n");
8917#endif
8918 break;
8919 case XML_PARSER_ATTRIBUTE_VALUE:
8920 xmlGenericError(xmlGenericErrorContext,
8921 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8922 ctxt->instate = XML_PARSER_START_TAG;
8923#ifdef DEBUG_PUSH
8924 xmlGenericError(xmlGenericErrorContext,
8925 "PP: entering START_TAG\n");
8926#endif
8927 break;
8928 case XML_PARSER_SYSTEM_LITERAL:
8929 xmlGenericError(xmlGenericErrorContext,
8930 "PP: internal error, state == SYSTEM_LITERAL\n");
8931 ctxt->instate = XML_PARSER_START_TAG;
8932#ifdef DEBUG_PUSH
8933 xmlGenericError(xmlGenericErrorContext,
8934 "PP: entering START_TAG\n");
8935#endif
8936 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008937 case XML_PARSER_PUBLIC_LITERAL:
8938 xmlGenericError(xmlGenericErrorContext,
8939 "PP: internal error, state == PUBLIC_LITERAL\n");
8940 ctxt->instate = XML_PARSER_START_TAG;
8941#ifdef DEBUG_PUSH
8942 xmlGenericError(xmlGenericErrorContext,
8943 "PP: entering START_TAG\n");
8944#endif
8945 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008946 }
8947 }
8948done:
8949#ifdef DEBUG_PUSH
8950 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8951#endif
8952 return(ret);
8953}
8954
8955/**
Owen Taylor3473f882001-02-23 17:55:21 +00008956 * xmlParseChunk:
8957 * @ctxt: an XML parser context
8958 * @chunk: an char array
8959 * @size: the size in byte of the chunk
8960 * @terminate: last chunk indicator
8961 *
8962 * Parse a Chunk of memory
8963 *
8964 * Returns zero if no error, the xmlParserErrors otherwise.
8965 */
8966int
8967xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8968 int terminate) {
8969 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8970 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8971 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8972 int cur = ctxt->input->cur - ctxt->input->base;
8973
8974 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8975 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8976 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008977 ctxt->input->end =
8978 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008979#ifdef DEBUG_PUSH
8980 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8981#endif
8982
8983 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8984 xmlParseTryOrFinish(ctxt, terminate);
8985 } else if (ctxt->instate != XML_PARSER_EOF) {
8986 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8987 xmlParserInputBufferPtr in = ctxt->input->buf;
8988 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8989 (in->raw != NULL)) {
8990 int nbchars;
8991
8992 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8993 if (nbchars < 0) {
8994 xmlGenericError(xmlGenericErrorContext,
8995 "xmlParseChunk: encoder error\n");
8996 return(XML_ERR_INVALID_ENCODING);
8997 }
8998 }
8999 }
9000 }
9001 xmlParseTryOrFinish(ctxt, terminate);
9002 if (terminate) {
9003 /*
9004 * Check for termination
9005 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009006 int avail = 0;
9007 if (ctxt->input->buf == NULL)
9008 avail = ctxt->input->length -
9009 (ctxt->input->cur - ctxt->input->base);
9010 else
9011 avail = ctxt->input->buf->buffer->use -
9012 (ctxt->input->cur - ctxt->input->base);
9013
Owen Taylor3473f882001-02-23 17:55:21 +00009014 if ((ctxt->instate != XML_PARSER_EOF) &&
9015 (ctxt->instate != XML_PARSER_EPILOG)) {
9016 ctxt->errNo = XML_ERR_DOCUMENT_END;
9017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9018 ctxt->sax->error(ctxt->userData,
9019 "Extra content at the end of the document\n");
9020 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009021 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009022 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009023 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9024 ctxt->errNo = XML_ERR_DOCUMENT_END;
9025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9026 ctxt->sax->error(ctxt->userData,
9027 "Extra content at the end of the document\n");
9028 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009029 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009030
9031 }
Owen Taylor3473f882001-02-23 17:55:21 +00009032 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009033 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009034 ctxt->sax->endDocument(ctxt->userData);
9035 }
9036 ctxt->instate = XML_PARSER_EOF;
9037 }
9038 return((xmlParserErrors) ctxt->errNo);
9039}
9040
9041/************************************************************************
9042 * *
9043 * I/O front end functions to the parser *
9044 * *
9045 ************************************************************************/
9046
9047/**
9048 * xmlStopParser:
9049 * @ctxt: an XML parser context
9050 *
9051 * Blocks further parser processing
9052 */
9053void
9054xmlStopParser(xmlParserCtxtPtr ctxt) {
9055 ctxt->instate = XML_PARSER_EOF;
9056 if (ctxt->input != NULL)
9057 ctxt->input->cur = BAD_CAST"";
9058}
9059
9060/**
9061 * xmlCreatePushParserCtxt:
9062 * @sax: a SAX handler
9063 * @user_data: The user data returned on SAX callbacks
9064 * @chunk: a pointer to an array of chars
9065 * @size: number of chars in the array
9066 * @filename: an optional file name or URI
9067 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009068 * Create a parser context for using the XML parser in push mode.
9069 * If @buffer and @size are non-NULL, the data is used to detect
9070 * the encoding. The remaining characters will be parsed so they
9071 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009072 * To allow content encoding detection, @size should be >= 4
9073 * The value of @filename is used for fetching external entities
9074 * and error/warning reports.
9075 *
9076 * Returns the new parser context or NULL
9077 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009078
Owen Taylor3473f882001-02-23 17:55:21 +00009079xmlParserCtxtPtr
9080xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9081 const char *chunk, int size, const char *filename) {
9082 xmlParserCtxtPtr ctxt;
9083 xmlParserInputPtr inputStream;
9084 xmlParserInputBufferPtr buf;
9085 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9086
9087 /*
9088 * plug some encoding conversion routines
9089 */
9090 if ((chunk != NULL) && (size >= 4))
9091 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9092
9093 buf = xmlAllocParserInputBuffer(enc);
9094 if (buf == NULL) return(NULL);
9095
9096 ctxt = xmlNewParserCtxt();
9097 if (ctxt == NULL) {
9098 xmlFree(buf);
9099 return(NULL);
9100 }
9101 if (sax != NULL) {
9102 if (ctxt->sax != &xmlDefaultSAXHandler)
9103 xmlFree(ctxt->sax);
9104 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9105 if (ctxt->sax == NULL) {
9106 xmlFree(buf);
9107 xmlFree(ctxt);
9108 return(NULL);
9109 }
9110 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9111 if (user_data != NULL)
9112 ctxt->userData = user_data;
9113 }
9114 if (filename == NULL) {
9115 ctxt->directory = NULL;
9116 } else {
9117 ctxt->directory = xmlParserGetDirectory(filename);
9118 }
9119
9120 inputStream = xmlNewInputStream(ctxt);
9121 if (inputStream == NULL) {
9122 xmlFreeParserCtxt(ctxt);
9123 return(NULL);
9124 }
9125
9126 if (filename == NULL)
9127 inputStream->filename = NULL;
9128 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009129 inputStream->filename = (char *)
9130 xmlNormalizeWindowsPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009131 inputStream->buf = buf;
9132 inputStream->base = inputStream->buf->buffer->content;
9133 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009134 inputStream->end =
9135 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009136
9137 inputPush(ctxt, inputStream);
9138
9139 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9140 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009141 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9142 int cur = ctxt->input->cur - ctxt->input->base;
9143
Owen Taylor3473f882001-02-23 17:55:21 +00009144 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009145
9146 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9147 ctxt->input->cur = ctxt->input->base + cur;
9148 ctxt->input->end =
9149 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009150#ifdef DEBUG_PUSH
9151 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9152#endif
9153 }
9154
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009155 if (enc != XML_CHAR_ENCODING_NONE) {
9156 xmlSwitchEncoding(ctxt, enc);
9157 }
9158
Owen Taylor3473f882001-02-23 17:55:21 +00009159 return(ctxt);
9160}
9161
9162/**
9163 * xmlCreateIOParserCtxt:
9164 * @sax: a SAX handler
9165 * @user_data: The user data returned on SAX callbacks
9166 * @ioread: an I/O read function
9167 * @ioclose: an I/O close function
9168 * @ioctx: an I/O handler
9169 * @enc: the charset encoding if known
9170 *
9171 * Create a parser context for using the XML parser with an existing
9172 * I/O stream
9173 *
9174 * Returns the new parser context or NULL
9175 */
9176xmlParserCtxtPtr
9177xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9178 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9179 void *ioctx, xmlCharEncoding enc) {
9180 xmlParserCtxtPtr ctxt;
9181 xmlParserInputPtr inputStream;
9182 xmlParserInputBufferPtr buf;
9183
9184 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9185 if (buf == NULL) return(NULL);
9186
9187 ctxt = xmlNewParserCtxt();
9188 if (ctxt == NULL) {
9189 xmlFree(buf);
9190 return(NULL);
9191 }
9192 if (sax != NULL) {
9193 if (ctxt->sax != &xmlDefaultSAXHandler)
9194 xmlFree(ctxt->sax);
9195 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9196 if (ctxt->sax == NULL) {
9197 xmlFree(buf);
9198 xmlFree(ctxt);
9199 return(NULL);
9200 }
9201 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9202 if (user_data != NULL)
9203 ctxt->userData = user_data;
9204 }
9205
9206 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9207 if (inputStream == NULL) {
9208 xmlFreeParserCtxt(ctxt);
9209 return(NULL);
9210 }
9211 inputPush(ctxt, inputStream);
9212
9213 return(ctxt);
9214}
9215
9216/************************************************************************
9217 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009218 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009219 * *
9220 ************************************************************************/
9221
9222/**
9223 * xmlIOParseDTD:
9224 * @sax: the SAX handler block or NULL
9225 * @input: an Input Buffer
9226 * @enc: the charset encoding if known
9227 *
9228 * Load and parse a DTD
9229 *
9230 * Returns the resulting xmlDtdPtr or NULL in case of error.
9231 * @input will be freed at parsing end.
9232 */
9233
9234xmlDtdPtr
9235xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9236 xmlCharEncoding enc) {
9237 xmlDtdPtr ret = NULL;
9238 xmlParserCtxtPtr ctxt;
9239 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009240 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009241
9242 if (input == NULL)
9243 return(NULL);
9244
9245 ctxt = xmlNewParserCtxt();
9246 if (ctxt == NULL) {
9247 return(NULL);
9248 }
9249
9250 /*
9251 * Set-up the SAX context
9252 */
9253 if (sax != NULL) {
9254 if (ctxt->sax != NULL)
9255 xmlFree(ctxt->sax);
9256 ctxt->sax = sax;
9257 ctxt->userData = NULL;
9258 }
9259
9260 /*
9261 * generate a parser input from the I/O handler
9262 */
9263
9264 pinput = xmlNewIOInputStream(ctxt, input, enc);
9265 if (pinput == NULL) {
9266 if (sax != NULL) ctxt->sax = NULL;
9267 xmlFreeParserCtxt(ctxt);
9268 return(NULL);
9269 }
9270
9271 /*
9272 * plug some encoding conversion routines here.
9273 */
9274 xmlPushInput(ctxt, pinput);
9275
9276 pinput->filename = NULL;
9277 pinput->line = 1;
9278 pinput->col = 1;
9279 pinput->base = ctxt->input->cur;
9280 pinput->cur = ctxt->input->cur;
9281 pinput->free = NULL;
9282
9283 /*
9284 * let's parse that entity knowing it's an external subset.
9285 */
9286 ctxt->inSubset = 2;
9287 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9288 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9289 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009290
9291 if (enc == XML_CHAR_ENCODING_NONE) {
9292 /*
9293 * Get the 4 first bytes and decode the charset
9294 * if enc != XML_CHAR_ENCODING_NONE
9295 * plug some encoding conversion routines.
9296 */
9297 start[0] = RAW;
9298 start[1] = NXT(1);
9299 start[2] = NXT(2);
9300 start[3] = NXT(3);
9301 enc = xmlDetectCharEncoding(start, 4);
9302 if (enc != XML_CHAR_ENCODING_NONE) {
9303 xmlSwitchEncoding(ctxt, enc);
9304 }
9305 }
9306
Owen Taylor3473f882001-02-23 17:55:21 +00009307 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9308
9309 if (ctxt->myDoc != NULL) {
9310 if (ctxt->wellFormed) {
9311 ret = ctxt->myDoc->extSubset;
9312 ctxt->myDoc->extSubset = NULL;
9313 } else {
9314 ret = NULL;
9315 }
9316 xmlFreeDoc(ctxt->myDoc);
9317 ctxt->myDoc = NULL;
9318 }
9319 if (sax != NULL) ctxt->sax = NULL;
9320 xmlFreeParserCtxt(ctxt);
9321
9322 return(ret);
9323}
9324
9325/**
9326 * xmlSAXParseDTD:
9327 * @sax: the SAX handler block
9328 * @ExternalID: a NAME* containing the External ID of the DTD
9329 * @SystemID: a NAME* containing the URL to the DTD
9330 *
9331 * Load and parse an external subset.
9332 *
9333 * Returns the resulting xmlDtdPtr or NULL in case of error.
9334 */
9335
9336xmlDtdPtr
9337xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9338 const xmlChar *SystemID) {
9339 xmlDtdPtr ret = NULL;
9340 xmlParserCtxtPtr ctxt;
9341 xmlParserInputPtr input = NULL;
9342 xmlCharEncoding enc;
9343
9344 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9345
9346 ctxt = xmlNewParserCtxt();
9347 if (ctxt == NULL) {
9348 return(NULL);
9349 }
9350
9351 /*
9352 * Set-up the SAX context
9353 */
9354 if (sax != NULL) {
9355 if (ctxt->sax != NULL)
9356 xmlFree(ctxt->sax);
9357 ctxt->sax = sax;
9358 ctxt->userData = NULL;
9359 }
9360
9361 /*
9362 * Ask the Entity resolver to load the damn thing
9363 */
9364
9365 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9366 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9367 if (input == NULL) {
9368 if (sax != NULL) ctxt->sax = NULL;
9369 xmlFreeParserCtxt(ctxt);
9370 return(NULL);
9371 }
9372
9373 /*
9374 * plug some encoding conversion routines here.
9375 */
9376 xmlPushInput(ctxt, input);
9377 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9378 xmlSwitchEncoding(ctxt, enc);
9379
9380 if (input->filename == NULL)
9381 input->filename = (char *) xmlStrdup(SystemID);
9382 input->line = 1;
9383 input->col = 1;
9384 input->base = ctxt->input->cur;
9385 input->cur = ctxt->input->cur;
9386 input->free = NULL;
9387
9388 /*
9389 * let's parse that entity knowing it's an external subset.
9390 */
9391 ctxt->inSubset = 2;
9392 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9393 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9394 ExternalID, SystemID);
9395 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9396
9397 if (ctxt->myDoc != NULL) {
9398 if (ctxt->wellFormed) {
9399 ret = ctxt->myDoc->extSubset;
9400 ctxt->myDoc->extSubset = NULL;
9401 } else {
9402 ret = NULL;
9403 }
9404 xmlFreeDoc(ctxt->myDoc);
9405 ctxt->myDoc = NULL;
9406 }
9407 if (sax != NULL) ctxt->sax = NULL;
9408 xmlFreeParserCtxt(ctxt);
9409
9410 return(ret);
9411}
9412
9413/**
9414 * xmlParseDTD:
9415 * @ExternalID: a NAME* containing the External ID of the DTD
9416 * @SystemID: a NAME* containing the URL to the DTD
9417 *
9418 * Load and parse an external subset.
9419 *
9420 * Returns the resulting xmlDtdPtr or NULL in case of error.
9421 */
9422
9423xmlDtdPtr
9424xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9425 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9426}
9427
9428/************************************************************************
9429 * *
9430 * Front ends when parsing an Entity *
9431 * *
9432 ************************************************************************/
9433
9434/**
Owen Taylor3473f882001-02-23 17:55:21 +00009435 * xmlParseCtxtExternalEntity:
9436 * @ctx: the existing parsing context
9437 * @URL: the URL for the entity to load
9438 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009439 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009440 *
9441 * Parse an external general entity within an existing parsing context
9442 * An external general parsed entity is well-formed if it matches the
9443 * production labeled extParsedEnt.
9444 *
9445 * [78] extParsedEnt ::= TextDecl? content
9446 *
9447 * Returns 0 if the entity is well formed, -1 in case of args problem and
9448 * the parser error code otherwise
9449 */
9450
9451int
9452xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009453 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009454 xmlParserCtxtPtr ctxt;
9455 xmlDocPtr newDoc;
9456 xmlSAXHandlerPtr oldsax = NULL;
9457 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009458 xmlChar start[4];
9459 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009460
9461 if (ctx->depth > 40) {
9462 return(XML_ERR_ENTITY_LOOP);
9463 }
9464
Daniel Veillardcda96922001-08-21 10:56:31 +00009465 if (lst != NULL)
9466 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009467 if ((URL == NULL) && (ID == NULL))
9468 return(-1);
9469 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9470 return(-1);
9471
9472
9473 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9474 if (ctxt == NULL) return(-1);
9475 ctxt->userData = ctxt;
9476 oldsax = ctxt->sax;
9477 ctxt->sax = ctx->sax;
9478 newDoc = xmlNewDoc(BAD_CAST "1.0");
9479 if (newDoc == NULL) {
9480 xmlFreeParserCtxt(ctxt);
9481 return(-1);
9482 }
9483 if (ctx->myDoc != NULL) {
9484 newDoc->intSubset = ctx->myDoc->intSubset;
9485 newDoc->extSubset = ctx->myDoc->extSubset;
9486 }
9487 if (ctx->myDoc->URL != NULL) {
9488 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9489 }
9490 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9491 if (newDoc->children == NULL) {
9492 ctxt->sax = oldsax;
9493 xmlFreeParserCtxt(ctxt);
9494 newDoc->intSubset = NULL;
9495 newDoc->extSubset = NULL;
9496 xmlFreeDoc(newDoc);
9497 return(-1);
9498 }
9499 nodePush(ctxt, newDoc->children);
9500 if (ctx->myDoc == NULL) {
9501 ctxt->myDoc = newDoc;
9502 } else {
9503 ctxt->myDoc = ctx->myDoc;
9504 newDoc->children->doc = ctx->myDoc;
9505 }
9506
Daniel Veillard87a764e2001-06-20 17:41:10 +00009507 /*
9508 * Get the 4 first bytes and decode the charset
9509 * if enc != XML_CHAR_ENCODING_NONE
9510 * plug some encoding conversion routines.
9511 */
9512 GROW
9513 start[0] = RAW;
9514 start[1] = NXT(1);
9515 start[2] = NXT(2);
9516 start[3] = NXT(3);
9517 enc = xmlDetectCharEncoding(start, 4);
9518 if (enc != XML_CHAR_ENCODING_NONE) {
9519 xmlSwitchEncoding(ctxt, enc);
9520 }
9521
Owen Taylor3473f882001-02-23 17:55:21 +00009522 /*
9523 * Parse a possible text declaration first
9524 */
Owen Taylor3473f882001-02-23 17:55:21 +00009525 if ((RAW == '<') && (NXT(1) == '?') &&
9526 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9527 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9528 xmlParseTextDecl(ctxt);
9529 }
9530
9531 /*
9532 * Doing validity checking on chunk doesn't make sense
9533 */
9534 ctxt->instate = XML_PARSER_CONTENT;
9535 ctxt->validate = ctx->validate;
9536 ctxt->loadsubset = ctx->loadsubset;
9537 ctxt->depth = ctx->depth + 1;
9538 ctxt->replaceEntities = ctx->replaceEntities;
9539 if (ctxt->validate) {
9540 ctxt->vctxt.error = ctx->vctxt.error;
9541 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009542 } else {
9543 ctxt->vctxt.error = NULL;
9544 ctxt->vctxt.warning = NULL;
9545 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009546 ctxt->vctxt.nodeTab = NULL;
9547 ctxt->vctxt.nodeNr = 0;
9548 ctxt->vctxt.nodeMax = 0;
9549 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009550
9551 xmlParseContent(ctxt);
9552
9553 if ((RAW == '<') && (NXT(1) == '/')) {
9554 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9556 ctxt->sax->error(ctxt->userData,
9557 "chunk is not well balanced\n");
9558 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009559 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009560 } else if (RAW != 0) {
9561 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9562 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9563 ctxt->sax->error(ctxt->userData,
9564 "extra content at the end of well balanced chunk\n");
9565 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009566 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009567 }
9568 if (ctxt->node != newDoc->children) {
9569 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9571 ctxt->sax->error(ctxt->userData,
9572 "chunk is not well balanced\n");
9573 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009574 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009575 }
9576
9577 if (!ctxt->wellFormed) {
9578 if (ctxt->errNo == 0)
9579 ret = 1;
9580 else
9581 ret = ctxt->errNo;
9582 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009583 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009584 xmlNodePtr cur;
9585
9586 /*
9587 * Return the newly created nodeset after unlinking it from
9588 * they pseudo parent.
9589 */
9590 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009591 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009592 while (cur != NULL) {
9593 cur->parent = NULL;
9594 cur = cur->next;
9595 }
9596 newDoc->children->children = NULL;
9597 }
9598 ret = 0;
9599 }
9600 ctxt->sax = oldsax;
9601 xmlFreeParserCtxt(ctxt);
9602 newDoc->intSubset = NULL;
9603 newDoc->extSubset = NULL;
9604 xmlFreeDoc(newDoc);
9605
9606 return(ret);
9607}
9608
9609/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009610 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009611 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009612 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009613 * @sax: the SAX handler bloc (possibly NULL)
9614 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9615 * @depth: Used for loop detection, use 0
9616 * @URL: the URL for the entity to load
9617 * @ID: the System ID for the entity to load
9618 * @list: the return value for the set of parsed nodes
9619 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009620 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009621 *
9622 * Returns 0 if the entity is well formed, -1 in case of args problem and
9623 * the parser error code otherwise
9624 */
9625
Daniel Veillard257d9102001-05-08 10:41:44 +00009626static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009627xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9628 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009629 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009630 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009631 xmlParserCtxtPtr ctxt;
9632 xmlDocPtr newDoc;
9633 xmlSAXHandlerPtr oldsax = NULL;
9634 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009635 xmlChar start[4];
9636 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009637
9638 if (depth > 40) {
9639 return(XML_ERR_ENTITY_LOOP);
9640 }
9641
9642
9643
9644 if (list != NULL)
9645 *list = NULL;
9646 if ((URL == NULL) && (ID == NULL))
9647 return(-1);
9648 if (doc == NULL) /* @@ relax but check for dereferences */
9649 return(-1);
9650
9651
9652 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9653 if (ctxt == NULL) return(-1);
9654 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009655 if (oldctxt != NULL) {
9656 ctxt->_private = oldctxt->_private;
9657 ctxt->loadsubset = oldctxt->loadsubset;
9658 ctxt->validate = oldctxt->validate;
9659 ctxt->external = oldctxt->external;
9660 } else {
9661 /*
9662 * Doing validity checking on chunk without context
9663 * doesn't make sense
9664 */
9665 ctxt->_private = NULL;
9666 ctxt->validate = 0;
9667 ctxt->external = 2;
9668 ctxt->loadsubset = 0;
9669 }
Owen Taylor3473f882001-02-23 17:55:21 +00009670 if (sax != NULL) {
9671 oldsax = ctxt->sax;
9672 ctxt->sax = sax;
9673 if (user_data != NULL)
9674 ctxt->userData = user_data;
9675 }
9676 newDoc = xmlNewDoc(BAD_CAST "1.0");
9677 if (newDoc == NULL) {
9678 xmlFreeParserCtxt(ctxt);
9679 return(-1);
9680 }
9681 if (doc != NULL) {
9682 newDoc->intSubset = doc->intSubset;
9683 newDoc->extSubset = doc->extSubset;
9684 }
9685 if (doc->URL != NULL) {
9686 newDoc->URL = xmlStrdup(doc->URL);
9687 }
9688 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9689 if (newDoc->children == NULL) {
9690 if (sax != NULL)
9691 ctxt->sax = oldsax;
9692 xmlFreeParserCtxt(ctxt);
9693 newDoc->intSubset = NULL;
9694 newDoc->extSubset = NULL;
9695 xmlFreeDoc(newDoc);
9696 return(-1);
9697 }
9698 nodePush(ctxt, newDoc->children);
9699 if (doc == NULL) {
9700 ctxt->myDoc = newDoc;
9701 } else {
9702 ctxt->myDoc = doc;
9703 newDoc->children->doc = doc;
9704 }
9705
Daniel Veillard87a764e2001-06-20 17:41:10 +00009706 /*
9707 * Get the 4 first bytes and decode the charset
9708 * if enc != XML_CHAR_ENCODING_NONE
9709 * plug some encoding conversion routines.
9710 */
9711 GROW;
9712 start[0] = RAW;
9713 start[1] = NXT(1);
9714 start[2] = NXT(2);
9715 start[3] = NXT(3);
9716 enc = xmlDetectCharEncoding(start, 4);
9717 if (enc != XML_CHAR_ENCODING_NONE) {
9718 xmlSwitchEncoding(ctxt, enc);
9719 }
9720
Owen Taylor3473f882001-02-23 17:55:21 +00009721 /*
9722 * Parse a possible text declaration first
9723 */
Owen Taylor3473f882001-02-23 17:55:21 +00009724 if ((RAW == '<') && (NXT(1) == '?') &&
9725 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9726 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9727 xmlParseTextDecl(ctxt);
9728 }
9729
Owen Taylor3473f882001-02-23 17:55:21 +00009730 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009731 ctxt->depth = depth;
9732
9733 xmlParseContent(ctxt);
9734
Daniel Veillard561b7f82002-03-20 21:55:57 +00009735 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009736 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9738 ctxt->sax->error(ctxt->userData,
9739 "chunk is not well balanced\n");
9740 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009741 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009742 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009743 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9745 ctxt->sax->error(ctxt->userData,
9746 "extra content at the end of well balanced chunk\n");
9747 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009748 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009749 }
9750 if (ctxt->node != newDoc->children) {
9751 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9753 ctxt->sax->error(ctxt->userData,
9754 "chunk is not well balanced\n");
9755 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009756 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009757 }
9758
9759 if (!ctxt->wellFormed) {
9760 if (ctxt->errNo == 0)
9761 ret = 1;
9762 else
9763 ret = ctxt->errNo;
9764 } else {
9765 if (list != NULL) {
9766 xmlNodePtr cur;
9767
9768 /*
9769 * Return the newly created nodeset after unlinking it from
9770 * they pseudo parent.
9771 */
9772 cur = newDoc->children->children;
9773 *list = cur;
9774 while (cur != NULL) {
9775 cur->parent = NULL;
9776 cur = cur->next;
9777 }
9778 newDoc->children->children = NULL;
9779 }
9780 ret = 0;
9781 }
9782 if (sax != NULL)
9783 ctxt->sax = oldsax;
9784 xmlFreeParserCtxt(ctxt);
9785 newDoc->intSubset = NULL;
9786 newDoc->extSubset = NULL;
9787 xmlFreeDoc(newDoc);
9788
9789 return(ret);
9790}
9791
9792/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009793 * xmlParseExternalEntity:
9794 * @doc: the document the chunk pertains to
9795 * @sax: the SAX handler bloc (possibly NULL)
9796 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9797 * @depth: Used for loop detection, use 0
9798 * @URL: the URL for the entity to load
9799 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009800 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009801 *
9802 * Parse an external general entity
9803 * An external general parsed entity is well-formed if it matches the
9804 * production labeled extParsedEnt.
9805 *
9806 * [78] extParsedEnt ::= TextDecl? content
9807 *
9808 * Returns 0 if the entity is well formed, -1 in case of args problem and
9809 * the parser error code otherwise
9810 */
9811
9812int
9813xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009814 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009815 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009816 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009817}
9818
9819/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009820 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009821 * @doc: the document the chunk pertains to
9822 * @sax: the SAX handler bloc (possibly NULL)
9823 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9824 * @depth: Used for loop detection, use 0
9825 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009826 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009827 *
9828 * Parse a well-balanced chunk of an XML document
9829 * called by the parser
9830 * The allowed sequence for the Well Balanced Chunk is the one defined by
9831 * the content production in the XML grammar:
9832 *
9833 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9834 *
9835 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9836 * the parser error code otherwise
9837 */
9838
9839int
9840xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009841 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009842 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9843 depth, string, lst, 0 );
9844}
9845
9846/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009847 * xmlParseBalancedChunkMemoryInternal:
9848 * @oldctxt: the existing parsing context
9849 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9850 * @user_data: the user data field for the parser context
9851 * @lst: the return value for the set of parsed nodes
9852 *
9853 *
9854 * Parse a well-balanced chunk of an XML document
9855 * called by the parser
9856 * The allowed sequence for the Well Balanced Chunk is the one defined by
9857 * the content production in the XML grammar:
9858 *
9859 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9860 *
9861 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9862 * the parser error code otherwise
9863 *
9864 * In case recover is set to 1, the nodelist will not be empty even if
9865 * the parsed chunk is not well balanced.
9866 */
9867static int
9868xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9869 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9870 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009871 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009872 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009873 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009874 int size;
9875 int ret = 0;
9876
9877 if (oldctxt->depth > 40) {
9878 return(XML_ERR_ENTITY_LOOP);
9879 }
9880
9881
9882 if (lst != NULL)
9883 *lst = NULL;
9884 if (string == NULL)
9885 return(-1);
9886
9887 size = xmlStrlen(string);
9888
9889 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9890 if (ctxt == NULL) return(-1);
9891 if (user_data != NULL)
9892 ctxt->userData = user_data;
9893 else
9894 ctxt->userData = ctxt;
9895
9896 oldsax = ctxt->sax;
9897 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +00009898 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009899 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009900 newDoc = xmlNewDoc(BAD_CAST "1.0");
9901 if (newDoc == NULL) {
9902 ctxt->sax = oldsax;
9903 xmlFreeParserCtxt(ctxt);
9904 return(-1);
9905 }
Daniel Veillard328f48c2002-11-15 15:24:34 +00009906 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009907 } else {
9908 ctxt->myDoc = oldctxt->myDoc;
9909 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009910 }
Daniel Veillard9bc53102002-11-25 13:20:04 +00009911 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +00009912 BAD_CAST "pseudoroot", NULL);
9913 if (ctxt->myDoc->children == NULL) {
9914 ctxt->sax = oldsax;
9915 xmlFreeParserCtxt(ctxt);
9916 if (newDoc != NULL)
9917 xmlFreeDoc(newDoc);
9918 return(-1);
9919 }
9920 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009921 ctxt->instate = XML_PARSER_CONTENT;
9922 ctxt->depth = oldctxt->depth + 1;
9923
9924 /*
9925 * Doing validity checking on chunk doesn't make sense
9926 */
9927 ctxt->validate = 0;
9928 ctxt->loadsubset = oldctxt->loadsubset;
9929
Daniel Veillard68e9e742002-11-16 15:35:11 +00009930 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009931 if ((RAW == '<') && (NXT(1) == '/')) {
9932 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9934 ctxt->sax->error(ctxt->userData,
9935 "chunk is not well balanced\n");
9936 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009937 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009938 } else if (RAW != 0) {
9939 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9941 ctxt->sax->error(ctxt->userData,
9942 "extra content at the end of well balanced chunk\n");
9943 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009944 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009945 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009946 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +00009947 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9949 ctxt->sax->error(ctxt->userData,
9950 "chunk is not well balanced\n");
9951 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009952 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009953 }
9954
9955 if (!ctxt->wellFormed) {
9956 if (ctxt->errNo == 0)
9957 ret = 1;
9958 else
9959 ret = ctxt->errNo;
9960 } else {
9961 ret = 0;
9962 }
9963
9964 if ((lst != NULL) && (ret == 0)) {
9965 xmlNodePtr cur;
9966
9967 /*
9968 * Return the newly created nodeset after unlinking it from
9969 * they pseudo parent.
9970 */
Daniel Veillard68e9e742002-11-16 15:35:11 +00009971 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009972 *lst = cur;
9973 while (cur != NULL) {
9974 cur->parent = NULL;
9975 cur = cur->next;
9976 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009977 ctxt->myDoc->children->children = NULL;
9978 }
9979 if (ctxt->myDoc != NULL) {
9980 xmlFreeNode(ctxt->myDoc->children);
9981 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009982 }
9983
9984 ctxt->sax = oldsax;
9985 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +00009986 if (newDoc != NULL)
9987 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009988
9989 return(ret);
9990}
9991
9992/**
Daniel Veillard58e44c92002-08-02 22:19:49 +00009993 * xmlParseBalancedChunkMemoryRecover:
9994 * @doc: the document the chunk pertains to
9995 * @sax: the SAX handler bloc (possibly NULL)
9996 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9997 * @depth: Used for loop detection, use 0
9998 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9999 * @lst: the return value for the set of parsed nodes
10000 * @recover: return nodes even if the data is broken (use 0)
10001 *
10002 *
10003 * Parse a well-balanced chunk of an XML document
10004 * called by the parser
10005 * The allowed sequence for the Well Balanced Chunk is the one defined by
10006 * the content production in the XML grammar:
10007 *
10008 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10009 *
10010 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10011 * the parser error code otherwise
10012 *
10013 * In case recover is set to 1, the nodelist will not be empty even if
10014 * the parsed chunk is not well balanced.
10015 */
10016int
10017xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10018 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10019 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010020 xmlParserCtxtPtr ctxt;
10021 xmlDocPtr newDoc;
10022 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010023 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010024 int size;
10025 int ret = 0;
10026
10027 if (depth > 40) {
10028 return(XML_ERR_ENTITY_LOOP);
10029 }
10030
10031
Daniel Veillardcda96922001-08-21 10:56:31 +000010032 if (lst != NULL)
10033 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010034 if (string == NULL)
10035 return(-1);
10036
10037 size = xmlStrlen(string);
10038
10039 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10040 if (ctxt == NULL) return(-1);
10041 ctxt->userData = ctxt;
10042 if (sax != NULL) {
10043 oldsax = ctxt->sax;
10044 ctxt->sax = sax;
10045 if (user_data != NULL)
10046 ctxt->userData = user_data;
10047 }
10048 newDoc = xmlNewDoc(BAD_CAST "1.0");
10049 if (newDoc == NULL) {
10050 xmlFreeParserCtxt(ctxt);
10051 return(-1);
10052 }
10053 if (doc != NULL) {
10054 newDoc->intSubset = doc->intSubset;
10055 newDoc->extSubset = doc->extSubset;
10056 }
10057 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10058 if (newDoc->children == NULL) {
10059 if (sax != NULL)
10060 ctxt->sax = oldsax;
10061 xmlFreeParserCtxt(ctxt);
10062 newDoc->intSubset = NULL;
10063 newDoc->extSubset = NULL;
10064 xmlFreeDoc(newDoc);
10065 return(-1);
10066 }
10067 nodePush(ctxt, newDoc->children);
10068 if (doc == NULL) {
10069 ctxt->myDoc = newDoc;
10070 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010071 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010072 newDoc->children->doc = doc;
10073 }
10074 ctxt->instate = XML_PARSER_CONTENT;
10075 ctxt->depth = depth;
10076
10077 /*
10078 * Doing validity checking on chunk doesn't make sense
10079 */
10080 ctxt->validate = 0;
10081 ctxt->loadsubset = 0;
10082
Daniel Veillardb39bc392002-10-26 19:29:51 +000010083 if ( doc != NULL ){
10084 content = doc->children;
10085 doc->children = NULL;
10086 xmlParseContent(ctxt);
10087 doc->children = content;
10088 }
10089 else {
10090 xmlParseContent(ctxt);
10091 }
Owen Taylor3473f882001-02-23 17:55:21 +000010092 if ((RAW == '<') && (NXT(1) == '/')) {
10093 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10095 ctxt->sax->error(ctxt->userData,
10096 "chunk is not well balanced\n");
10097 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010098 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010099 } else if (RAW != 0) {
10100 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10102 ctxt->sax->error(ctxt->userData,
10103 "extra content at the end of well balanced chunk\n");
10104 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010105 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010106 }
10107 if (ctxt->node != newDoc->children) {
10108 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10110 ctxt->sax->error(ctxt->userData,
10111 "chunk is not well balanced\n");
10112 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010113 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010114 }
10115
10116 if (!ctxt->wellFormed) {
10117 if (ctxt->errNo == 0)
10118 ret = 1;
10119 else
10120 ret = ctxt->errNo;
10121 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010122 ret = 0;
10123 }
10124
10125 if (lst != NULL && (ret == 0 || recover == 1)) {
10126 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010127
10128 /*
10129 * Return the newly created nodeset after unlinking it from
10130 * they pseudo parent.
10131 */
10132 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010133 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010134 while (cur != NULL) {
10135 cur->parent = NULL;
10136 cur = cur->next;
10137 }
10138 newDoc->children->children = NULL;
10139 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010140
Owen Taylor3473f882001-02-23 17:55:21 +000010141 if (sax != NULL)
10142 ctxt->sax = oldsax;
10143 xmlFreeParserCtxt(ctxt);
10144 newDoc->intSubset = NULL;
10145 newDoc->extSubset = NULL;
10146 xmlFreeDoc(newDoc);
10147
10148 return(ret);
10149}
10150
10151/**
10152 * xmlSAXParseEntity:
10153 * @sax: the SAX handler block
10154 * @filename: the filename
10155 *
10156 * parse an XML external entity out of context and build a tree.
10157 * It use the given SAX function block to handle the parsing callback.
10158 * If sax is NULL, fallback to the default DOM tree building routines.
10159 *
10160 * [78] extParsedEnt ::= TextDecl? content
10161 *
10162 * This correspond to a "Well Balanced" chunk
10163 *
10164 * Returns the resulting document tree
10165 */
10166
10167xmlDocPtr
10168xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10169 xmlDocPtr ret;
10170 xmlParserCtxtPtr ctxt;
10171 char *directory = NULL;
10172
10173 ctxt = xmlCreateFileParserCtxt(filename);
10174 if (ctxt == NULL) {
10175 return(NULL);
10176 }
10177 if (sax != NULL) {
10178 if (ctxt->sax != NULL)
10179 xmlFree(ctxt->sax);
10180 ctxt->sax = sax;
10181 ctxt->userData = NULL;
10182 }
10183
10184 if ((ctxt->directory == NULL) && (directory == NULL))
10185 directory = xmlParserGetDirectory(filename);
10186
10187 xmlParseExtParsedEnt(ctxt);
10188
10189 if (ctxt->wellFormed)
10190 ret = ctxt->myDoc;
10191 else {
10192 ret = NULL;
10193 xmlFreeDoc(ctxt->myDoc);
10194 ctxt->myDoc = NULL;
10195 }
10196 if (sax != NULL)
10197 ctxt->sax = NULL;
10198 xmlFreeParserCtxt(ctxt);
10199
10200 return(ret);
10201}
10202
10203/**
10204 * xmlParseEntity:
10205 * @filename: the filename
10206 *
10207 * parse an XML external entity out of context and build a tree.
10208 *
10209 * [78] extParsedEnt ::= TextDecl? content
10210 *
10211 * This correspond to a "Well Balanced" chunk
10212 *
10213 * Returns the resulting document tree
10214 */
10215
10216xmlDocPtr
10217xmlParseEntity(const char *filename) {
10218 return(xmlSAXParseEntity(NULL, filename));
10219}
10220
10221/**
10222 * xmlCreateEntityParserCtxt:
10223 * @URL: the entity URL
10224 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010225 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010226 *
10227 * Create a parser context for an external entity
10228 * Automatic support for ZLIB/Compress compressed document is provided
10229 * by default if found at compile-time.
10230 *
10231 * Returns the new parser context or NULL
10232 */
10233xmlParserCtxtPtr
10234xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10235 const xmlChar *base) {
10236 xmlParserCtxtPtr ctxt;
10237 xmlParserInputPtr inputStream;
10238 char *directory = NULL;
10239 xmlChar *uri;
10240
10241 ctxt = xmlNewParserCtxt();
10242 if (ctxt == NULL) {
10243 return(NULL);
10244 }
10245
10246 uri = xmlBuildURI(URL, base);
10247
10248 if (uri == NULL) {
10249 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10250 if (inputStream == NULL) {
10251 xmlFreeParserCtxt(ctxt);
10252 return(NULL);
10253 }
10254
10255 inputPush(ctxt, inputStream);
10256
10257 if ((ctxt->directory == NULL) && (directory == NULL))
10258 directory = xmlParserGetDirectory((char *)URL);
10259 if ((ctxt->directory == NULL) && (directory != NULL))
10260 ctxt->directory = directory;
10261 } else {
10262 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10263 if (inputStream == NULL) {
10264 xmlFree(uri);
10265 xmlFreeParserCtxt(ctxt);
10266 return(NULL);
10267 }
10268
10269 inputPush(ctxt, inputStream);
10270
10271 if ((ctxt->directory == NULL) && (directory == NULL))
10272 directory = xmlParserGetDirectory((char *)uri);
10273 if ((ctxt->directory == NULL) && (directory != NULL))
10274 ctxt->directory = directory;
10275 xmlFree(uri);
10276 }
10277
10278 return(ctxt);
10279}
10280
10281/************************************************************************
10282 * *
10283 * Front ends when parsing from a file *
10284 * *
10285 ************************************************************************/
10286
10287/**
10288 * xmlCreateFileParserCtxt:
10289 * @filename: the filename
10290 *
10291 * Create a parser context for a file content.
10292 * Automatic support for ZLIB/Compress compressed document is provided
10293 * by default if found at compile-time.
10294 *
10295 * Returns the new parser context or NULL
10296 */
10297xmlParserCtxtPtr
10298xmlCreateFileParserCtxt(const char *filename)
10299{
10300 xmlParserCtxtPtr ctxt;
10301 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010302 char *directory = NULL;
Daniel Veillardf4862f02002-09-10 11:13:43 +000010303 xmlChar *normalized;
Owen Taylor3473f882001-02-23 17:55:21 +000010304
Owen Taylor3473f882001-02-23 17:55:21 +000010305 ctxt = xmlNewParserCtxt();
10306 if (ctxt == NULL) {
10307 if (xmlDefaultSAXHandler.error != NULL) {
10308 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10309 }
10310 return(NULL);
10311 }
10312
Daniel Veillardf4862f02002-09-10 11:13:43 +000010313 normalized = xmlNormalizeWindowsPath((const xmlChar *) filename);
10314 if (normalized == NULL) {
10315 xmlFreeParserCtxt(ctxt);
10316 return(NULL);
10317 }
10318 inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010319 if (inputStream == NULL) {
10320 xmlFreeParserCtxt(ctxt);
Daniel Veillardf4862f02002-09-10 11:13:43 +000010321 xmlFree(normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010322 return(NULL);
10323 }
10324
Owen Taylor3473f882001-02-23 17:55:21 +000010325 inputPush(ctxt, inputStream);
10326 if ((ctxt->directory == NULL) && (directory == NULL))
Daniel Veillardf4862f02002-09-10 11:13:43 +000010327 directory = xmlParserGetDirectory((char *) normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010328 if ((ctxt->directory == NULL) && (directory != NULL))
10329 ctxt->directory = directory;
10330
Daniel Veillardf4862f02002-09-10 11:13:43 +000010331 xmlFree(normalized);
10332
Owen Taylor3473f882001-02-23 17:55:21 +000010333 return(ctxt);
10334}
10335
10336/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010337 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010338 * @sax: the SAX handler block
10339 * @filename: the filename
10340 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10341 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010342 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010343 *
10344 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10345 * compressed document is provided by default if found at compile-time.
10346 * It use the given SAX function block to handle the parsing callback.
10347 * If sax is NULL, fallback to the default DOM tree building routines.
10348 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010349 * User data (void *) is stored within the parser context in the
10350 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010351 *
Owen Taylor3473f882001-02-23 17:55:21 +000010352 * Returns the resulting document tree
10353 */
10354
10355xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010356xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10357 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010358 xmlDocPtr ret;
10359 xmlParserCtxtPtr ctxt;
10360 char *directory = NULL;
10361
Daniel Veillard635ef722001-10-29 11:48:19 +000010362 xmlInitParser();
10363
Owen Taylor3473f882001-02-23 17:55:21 +000010364 ctxt = xmlCreateFileParserCtxt(filename);
10365 if (ctxt == NULL) {
10366 return(NULL);
10367 }
10368 if (sax != NULL) {
10369 if (ctxt->sax != NULL)
10370 xmlFree(ctxt->sax);
10371 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010372 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010373 if (data!=NULL) {
10374 ctxt->_private=data;
10375 }
Owen Taylor3473f882001-02-23 17:55:21 +000010376
10377 if ((ctxt->directory == NULL) && (directory == NULL))
10378 directory = xmlParserGetDirectory(filename);
10379 if ((ctxt->directory == NULL) && (directory != NULL))
10380 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10381
Daniel Veillarddad3f682002-11-17 16:47:27 +000010382 ctxt->recovery = recovery;
10383
Owen Taylor3473f882001-02-23 17:55:21 +000010384 xmlParseDocument(ctxt);
10385
10386 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10387 else {
10388 ret = NULL;
10389 xmlFreeDoc(ctxt->myDoc);
10390 ctxt->myDoc = NULL;
10391 }
10392 if (sax != NULL)
10393 ctxt->sax = NULL;
10394 xmlFreeParserCtxt(ctxt);
10395
10396 return(ret);
10397}
10398
10399/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010400 * xmlSAXParseFile:
10401 * @sax: the SAX handler block
10402 * @filename: the filename
10403 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10404 * documents
10405 *
10406 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10407 * compressed document is provided by default if found at compile-time.
10408 * It use the given SAX function block to handle the parsing callback.
10409 * If sax is NULL, fallback to the default DOM tree building routines.
10410 *
10411 * Returns the resulting document tree
10412 */
10413
10414xmlDocPtr
10415xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10416 int recovery) {
10417 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10418}
10419
10420/**
Owen Taylor3473f882001-02-23 17:55:21 +000010421 * xmlRecoverDoc:
10422 * @cur: a pointer to an array of xmlChar
10423 *
10424 * parse an XML in-memory document and build a tree.
10425 * In the case the document is not Well Formed, a tree is built anyway
10426 *
10427 * Returns the resulting document tree
10428 */
10429
10430xmlDocPtr
10431xmlRecoverDoc(xmlChar *cur) {
10432 return(xmlSAXParseDoc(NULL, cur, 1));
10433}
10434
10435/**
10436 * xmlParseFile:
10437 * @filename: the filename
10438 *
10439 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10440 * compressed document is provided by default if found at compile-time.
10441 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010442 * Returns the resulting document tree if the file was wellformed,
10443 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010444 */
10445
10446xmlDocPtr
10447xmlParseFile(const char *filename) {
10448 return(xmlSAXParseFile(NULL, filename, 0));
10449}
10450
10451/**
10452 * xmlRecoverFile:
10453 * @filename: the filename
10454 *
10455 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10456 * compressed document is provided by default if found at compile-time.
10457 * In the case the document is not Well Formed, a tree is built anyway
10458 *
10459 * Returns the resulting document tree
10460 */
10461
10462xmlDocPtr
10463xmlRecoverFile(const char *filename) {
10464 return(xmlSAXParseFile(NULL, filename, 1));
10465}
10466
10467
10468/**
10469 * xmlSetupParserForBuffer:
10470 * @ctxt: an XML parser context
10471 * @buffer: a xmlChar * buffer
10472 * @filename: a file name
10473 *
10474 * Setup the parser context to parse a new buffer; Clears any prior
10475 * contents from the parser context. The buffer parameter must not be
10476 * NULL, but the filename parameter can be
10477 */
10478void
10479xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10480 const char* filename)
10481{
10482 xmlParserInputPtr input;
10483
10484 input = xmlNewInputStream(ctxt);
10485 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010486 xmlGenericError(xmlGenericErrorContext,
10487 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010488 xmlFree(ctxt);
10489 return;
10490 }
10491
10492 xmlClearParserCtxt(ctxt);
10493 if (filename != NULL)
10494 input->filename = xmlMemStrdup(filename);
10495 input->base = buffer;
10496 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010497 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010498 inputPush(ctxt, input);
10499}
10500
10501/**
10502 * xmlSAXUserParseFile:
10503 * @sax: a SAX handler
10504 * @user_data: The user data returned on SAX callbacks
10505 * @filename: a file name
10506 *
10507 * parse an XML file and call the given SAX handler routines.
10508 * Automatic support for ZLIB/Compress compressed document is provided
10509 *
10510 * Returns 0 in case of success or a error number otherwise
10511 */
10512int
10513xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10514 const char *filename) {
10515 int ret = 0;
10516 xmlParserCtxtPtr ctxt;
10517
10518 ctxt = xmlCreateFileParserCtxt(filename);
10519 if (ctxt == NULL) return -1;
10520 if (ctxt->sax != &xmlDefaultSAXHandler)
10521 xmlFree(ctxt->sax);
10522 ctxt->sax = sax;
10523 if (user_data != NULL)
10524 ctxt->userData = user_data;
10525
10526 xmlParseDocument(ctxt);
10527
10528 if (ctxt->wellFormed)
10529 ret = 0;
10530 else {
10531 if (ctxt->errNo != 0)
10532 ret = ctxt->errNo;
10533 else
10534 ret = -1;
10535 }
10536 if (sax != NULL)
10537 ctxt->sax = NULL;
10538 xmlFreeParserCtxt(ctxt);
10539
10540 return ret;
10541}
10542
10543/************************************************************************
10544 * *
10545 * Front ends when parsing from memory *
10546 * *
10547 ************************************************************************/
10548
10549/**
10550 * xmlCreateMemoryParserCtxt:
10551 * @buffer: a pointer to a char array
10552 * @size: the size of the array
10553 *
10554 * Create a parser context for an XML in-memory document.
10555 *
10556 * Returns the new parser context or NULL
10557 */
10558xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010559xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010560 xmlParserCtxtPtr ctxt;
10561 xmlParserInputPtr input;
10562 xmlParserInputBufferPtr buf;
10563
10564 if (buffer == NULL)
10565 return(NULL);
10566 if (size <= 0)
10567 return(NULL);
10568
10569 ctxt = xmlNewParserCtxt();
10570 if (ctxt == NULL)
10571 return(NULL);
10572
10573 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010574 if (buf == NULL) {
10575 xmlFreeParserCtxt(ctxt);
10576 return(NULL);
10577 }
Owen Taylor3473f882001-02-23 17:55:21 +000010578
10579 input = xmlNewInputStream(ctxt);
10580 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010581 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010582 xmlFreeParserCtxt(ctxt);
10583 return(NULL);
10584 }
10585
10586 input->filename = NULL;
10587 input->buf = buf;
10588 input->base = input->buf->buffer->content;
10589 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010590 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010591
10592 inputPush(ctxt, input);
10593 return(ctxt);
10594}
10595
10596/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010597 * xmlSAXParseMemoryWithData:
10598 * @sax: the SAX handler block
10599 * @buffer: an pointer to a char array
10600 * @size: the size of the array
10601 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10602 * documents
10603 * @data: the userdata
10604 *
10605 * parse an XML in-memory block and use the given SAX function block
10606 * to handle the parsing callback. If sax is NULL, fallback to the default
10607 * DOM tree building routines.
10608 *
10609 * User data (void *) is stored within the parser context in the
10610 * context's _private member, so it is available nearly everywhere in libxml
10611 *
10612 * Returns the resulting document tree
10613 */
10614
10615xmlDocPtr
10616xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10617 int size, int recovery, void *data) {
10618 xmlDocPtr ret;
10619 xmlParserCtxtPtr ctxt;
10620
10621 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10622 if (ctxt == NULL) return(NULL);
10623 if (sax != NULL) {
10624 if (ctxt->sax != NULL)
10625 xmlFree(ctxt->sax);
10626 ctxt->sax = sax;
10627 }
10628 if (data!=NULL) {
10629 ctxt->_private=data;
10630 }
10631
10632 xmlParseDocument(ctxt);
10633
10634 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10635 else {
10636 ret = NULL;
10637 xmlFreeDoc(ctxt->myDoc);
10638 ctxt->myDoc = NULL;
10639 }
10640 if (sax != NULL)
10641 ctxt->sax = NULL;
10642 xmlFreeParserCtxt(ctxt);
10643
10644 return(ret);
10645}
10646
10647/**
Owen Taylor3473f882001-02-23 17:55:21 +000010648 * xmlSAXParseMemory:
10649 * @sax: the SAX handler block
10650 * @buffer: an pointer to a char array
10651 * @size: the size of the array
10652 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10653 * documents
10654 *
10655 * parse an XML in-memory block and use the given SAX function block
10656 * to handle the parsing callback. If sax is NULL, fallback to the default
10657 * DOM tree building routines.
10658 *
10659 * Returns the resulting document tree
10660 */
10661xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010662xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10663 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010664 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010665}
10666
10667/**
10668 * xmlParseMemory:
10669 * @buffer: an pointer to a char array
10670 * @size: the size of the array
10671 *
10672 * parse an XML in-memory block and build a tree.
10673 *
10674 * Returns the resulting document tree
10675 */
10676
Daniel Veillard50822cb2001-07-26 20:05:51 +000010677xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010678 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10679}
10680
10681/**
10682 * xmlRecoverMemory:
10683 * @buffer: an pointer to a char array
10684 * @size: the size of the array
10685 *
10686 * parse an XML in-memory block and build a tree.
10687 * In the case the document is not Well Formed, a tree is built anyway
10688 *
10689 * Returns the resulting document tree
10690 */
10691
Daniel Veillard50822cb2001-07-26 20:05:51 +000010692xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010693 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10694}
10695
10696/**
10697 * xmlSAXUserParseMemory:
10698 * @sax: a SAX handler
10699 * @user_data: The user data returned on SAX callbacks
10700 * @buffer: an in-memory XML document input
10701 * @size: the length of the XML document in bytes
10702 *
10703 * A better SAX parsing routine.
10704 * parse an XML in-memory buffer and call the given SAX handler routines.
10705 *
10706 * Returns 0 in case of success or a error number otherwise
10707 */
10708int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010709 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010710 int ret = 0;
10711 xmlParserCtxtPtr ctxt;
10712 xmlSAXHandlerPtr oldsax = NULL;
10713
Daniel Veillard9e923512002-08-14 08:48:52 +000010714 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010715 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10716 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010717 oldsax = ctxt->sax;
10718 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010719 if (user_data != NULL)
10720 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010721
10722 xmlParseDocument(ctxt);
10723
10724 if (ctxt->wellFormed)
10725 ret = 0;
10726 else {
10727 if (ctxt->errNo != 0)
10728 ret = ctxt->errNo;
10729 else
10730 ret = -1;
10731 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010732 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010733 xmlFreeParserCtxt(ctxt);
10734
10735 return ret;
10736}
10737
10738/**
10739 * xmlCreateDocParserCtxt:
10740 * @cur: a pointer to an array of xmlChar
10741 *
10742 * Creates a parser context for an XML in-memory document.
10743 *
10744 * Returns the new parser context or NULL
10745 */
10746xmlParserCtxtPtr
10747xmlCreateDocParserCtxt(xmlChar *cur) {
10748 int len;
10749
10750 if (cur == NULL)
10751 return(NULL);
10752 len = xmlStrlen(cur);
10753 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10754}
10755
10756/**
10757 * xmlSAXParseDoc:
10758 * @sax: the SAX handler block
10759 * @cur: a pointer to an array of xmlChar
10760 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10761 * documents
10762 *
10763 * parse an XML in-memory document and build a tree.
10764 * It use the given SAX function block to handle the parsing callback.
10765 * If sax is NULL, fallback to the default DOM tree building routines.
10766 *
10767 * Returns the resulting document tree
10768 */
10769
10770xmlDocPtr
10771xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10772 xmlDocPtr ret;
10773 xmlParserCtxtPtr ctxt;
10774
10775 if (cur == NULL) return(NULL);
10776
10777
10778 ctxt = xmlCreateDocParserCtxt(cur);
10779 if (ctxt == NULL) return(NULL);
10780 if (sax != NULL) {
10781 ctxt->sax = sax;
10782 ctxt->userData = NULL;
10783 }
10784
10785 xmlParseDocument(ctxt);
10786 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10787 else {
10788 ret = NULL;
10789 xmlFreeDoc(ctxt->myDoc);
10790 ctxt->myDoc = NULL;
10791 }
10792 if (sax != NULL)
10793 ctxt->sax = NULL;
10794 xmlFreeParserCtxt(ctxt);
10795
10796 return(ret);
10797}
10798
10799/**
10800 * xmlParseDoc:
10801 * @cur: a pointer to an array of xmlChar
10802 *
10803 * parse an XML in-memory document and build a tree.
10804 *
10805 * Returns the resulting document tree
10806 */
10807
10808xmlDocPtr
10809xmlParseDoc(xmlChar *cur) {
10810 return(xmlSAXParseDoc(NULL, cur, 0));
10811}
10812
Daniel Veillard8107a222002-01-13 14:10:10 +000010813/************************************************************************
10814 * *
10815 * Specific function to keep track of entities references *
10816 * and used by the XSLT debugger *
10817 * *
10818 ************************************************************************/
10819
10820static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10821
10822/**
10823 * xmlAddEntityReference:
10824 * @ent : A valid entity
10825 * @firstNode : A valid first node for children of entity
10826 * @lastNode : A valid last node of children entity
10827 *
10828 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10829 */
10830static void
10831xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10832 xmlNodePtr lastNode)
10833{
10834 if (xmlEntityRefFunc != NULL) {
10835 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10836 }
10837}
10838
10839
10840/**
10841 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000010842 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000010843 *
10844 * Set the function to call call back when a xml reference has been made
10845 */
10846void
10847xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10848{
10849 xmlEntityRefFunc = func;
10850}
Owen Taylor3473f882001-02-23 17:55:21 +000010851
10852/************************************************************************
10853 * *
10854 * Miscellaneous *
10855 * *
10856 ************************************************************************/
10857
10858#ifdef LIBXML_XPATH_ENABLED
10859#include <libxml/xpath.h>
10860#endif
10861
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010862extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010863static int xmlParserInitialized = 0;
10864
10865/**
10866 * xmlInitParser:
10867 *
10868 * Initialization function for the XML parser.
10869 * This is not reentrant. Call once before processing in case of
10870 * use in multithreaded programs.
10871 */
10872
10873void
10874xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010875 if (xmlParserInitialized != 0)
10876 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010877
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010878 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10879 (xmlGenericError == NULL))
10880 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010881 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010882 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010883 xmlInitCharEncodingHandlers();
10884 xmlInitializePredefinedEntities();
10885 xmlDefaultSAXHandlerInit();
10886 xmlRegisterDefaultInputCallbacks();
10887 xmlRegisterDefaultOutputCallbacks();
10888#ifdef LIBXML_HTML_ENABLED
10889 htmlInitAutoClose();
10890 htmlDefaultSAXHandlerInit();
10891#endif
10892#ifdef LIBXML_XPATH_ENABLED
10893 xmlXPathInit();
10894#endif
10895 xmlParserInitialized = 1;
10896}
10897
10898/**
10899 * xmlCleanupParser:
10900 *
10901 * Cleanup function for the XML parser. It tries to reclaim all
10902 * parsing related global memory allocated for the parser processing.
10903 * It doesn't deallocate any document related memory. Calling this
10904 * function should not prevent reusing the parser.
10905 */
10906
10907void
10908xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010909 xmlCleanupCharEncodingHandlers();
10910 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010911#ifdef LIBXML_CATALOG_ENABLED
10912 xmlCatalogCleanup();
10913#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010914 xmlCleanupThreads();
10915 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010916}