blob: 9078c0ca38390bbac5b138064c836c4e21fef9d8 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard21a0f912001-02-25 19:54:14 +000088#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000089#define XML_PARSER_BUFFER_SIZE 100
90
Daniel Veillard5997aca2002-03-18 18:36:20 +000091#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
92
Owen Taylor3473f882001-02-23 17:55:21 +000093/*
Owen Taylor3473f882001-02-23 17:55:21 +000094 * List of XML prefixed PI allowed by W3C specs
95 */
96
Daniel Veillardb44025c2001-10-11 22:55:55 +000097static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000098 "xml-stylesheet",
99 NULL
100};
101
102/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000103xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
104 const xmlChar **str);
105
Daniel Veillard257d9102001-05-08 10:41:44 +0000106static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000107xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
108 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000109 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000111
Daniel Veillard8107a222002-01-13 14:10:10 +0000112static void
113xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
114 xmlNodePtr lastNode);
115
Daniel Veillard328f48c2002-11-15 15:24:34 +0000116static int
117xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
118 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000119/************************************************************************
120 * *
121 * Parser stacks related functions and macros *
122 * *
123 ************************************************************************/
124
125xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
126 const xmlChar ** str);
127
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128/**
129 * inputPush:
130 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000131 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000132 *
133 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000134 *
135 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000137extern int
138inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
139{
140 if (ctxt->inputNr >= ctxt->inputMax) {
141 ctxt->inputMax *= 2;
142 ctxt->inputTab =
143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
144 ctxt->inputMax *
145 sizeof(ctxt->inputTab[0]));
146 if (ctxt->inputTab == NULL) {
147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
148 return (0);
149 }
150 }
151 ctxt->inputTab[ctxt->inputNr] = value;
152 ctxt->input = value;
153 return (ctxt->inputNr++);
154}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000155/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000156 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000157 * @ctxt: an XML parser context
158 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000159 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000160 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000161 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000163extern xmlParserInputPtr
164inputPop(xmlParserCtxtPtr ctxt)
165{
166 xmlParserInputPtr ret;
167
168 if (ctxt->inputNr <= 0)
169 return (0);
170 ctxt->inputNr--;
171 if (ctxt->inputNr > 0)
172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
173 else
174 ctxt->input = NULL;
175 ret = ctxt->inputTab[ctxt->inputNr];
176 ctxt->inputTab[ctxt->inputNr] = 0;
177 return (ret);
178}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179/**
180 * nodePush:
181 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000182 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 *
184 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000185 *
186 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000187 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000188extern int
189nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
190{
191 if (ctxt->nodeNr >= ctxt->nodeMax) {
192 ctxt->nodeMax *= 2;
193 ctxt->nodeTab =
194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
195 ctxt->nodeMax *
196 sizeof(ctxt->nodeTab[0]));
197 if (ctxt->nodeTab == NULL) {
198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
199 return (0);
200 }
201 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202#ifdef MAX_DEPTH
203 if (ctxt->nodeNr > MAX_DEPTH) {
204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
205 ctxt->sax->error(ctxt->userData,
206 "Excessive depth in document: change MAX_DEPTH = %d\n",
207 MAX_DEPTH);
208 ctxt->wellFormed = 0;
209 ctxt->instate = XML_PARSER_EOF;
210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
211 return(0);
212 }
213#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000214 ctxt->nodeTab[ctxt->nodeNr] = value;
215 ctxt->node = value;
216 return (ctxt->nodeNr++);
217}
218/**
219 * nodePop:
220 * @ctxt: an XML parser context
221 *
222 * Pops the top element node from the node stack
223 *
224 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000226extern xmlNodePtr
227nodePop(xmlParserCtxtPtr ctxt)
228{
229 xmlNodePtr ret;
230
231 if (ctxt->nodeNr <= 0)
232 return (0);
233 ctxt->nodeNr--;
234 if (ctxt->nodeNr > 0)
235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
236 else
237 ctxt->node = NULL;
238 ret = ctxt->nodeTab[ctxt->nodeNr];
239 ctxt->nodeTab[ctxt->nodeNr] = 0;
240 return (ret);
241}
242/**
243 * namePush:
244 * @ctxt: an XML parser context
245 * @value: the element name
246 *
247 * Pushes a new element name on top of the name stack
248 *
249 * Returns 0 in case of error, the index in the stack otherwise
250 */
251extern int
252namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
253{
254 if (ctxt->nameNr >= ctxt->nameMax) {
255 ctxt->nameMax *= 2;
256 ctxt->nameTab =
257 (xmlChar * *)xmlRealloc(ctxt->nameTab,
258 ctxt->nameMax *
259 sizeof(ctxt->nameTab[0]));
260 if (ctxt->nameTab == NULL) {
261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
262 return (0);
263 }
264 }
265 ctxt->nameTab[ctxt->nameNr] = value;
266 ctxt->name = value;
267 return (ctxt->nameNr++);
268}
269/**
270 * namePop:
271 * @ctxt: an XML parser context
272 *
273 * Pops the top element name from the name stack
274 *
275 * Returns the name just removed
276 */
277extern xmlChar *
278namePop(xmlParserCtxtPtr ctxt)
279{
280 xmlChar *ret;
281
282 if (ctxt->nameNr <= 0)
283 return (0);
284 ctxt->nameNr--;
285 if (ctxt->nameNr > 0)
286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
287 else
288 ctxt->name = NULL;
289 ret = ctxt->nameTab[ctxt->nameNr];
290 ctxt->nameTab[ctxt->nameNr] = 0;
291 return (ret);
292}
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000294static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000295 if (ctxt->spaceNr >= ctxt->spaceMax) {
296 ctxt->spaceMax *= 2;
297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
299 if (ctxt->spaceTab == NULL) {
300 xmlGenericError(xmlGenericErrorContext,
301 "realloc failed !\n");
302 return(0);
303 }
304 }
305 ctxt->spaceTab[ctxt->spaceNr] = val;
306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
307 return(ctxt->spaceNr++);
308}
309
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000311 int ret;
312 if (ctxt->spaceNr <= 0) return(0);
313 ctxt->spaceNr--;
314 if (ctxt->spaceNr > 0)
315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
316 else
317 ctxt->space = NULL;
318 ret = ctxt->spaceTab[ctxt->spaceNr];
319 ctxt->spaceTab[ctxt->spaceNr] = -1;
320 return(ret);
321}
322
323/*
324 * Macros for accessing the content. Those should be used only by the parser,
325 * and not exported.
326 *
327 * Dirty macros, i.e. one often need to make assumption on the context to
328 * use them
329 *
330 * CUR_PTR return the current pointer to the xmlChar to be parsed.
331 * To be used with extreme caution since operations consuming
332 * characters may move the input buffer to a different location !
333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
334 * This should be used internally by the parser
335 * only to compare to ASCII values otherwise it would break when
336 * running with UTF-8 encoding.
337 * RAW same as CUR but in the input buffer, bypass any token
338 * extraction that may have been done
339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
340 * to compare on ASCII based substring.
341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
342 * strings within the parser.
343 *
344 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
345 *
346 * NEXT Skip to the next character, this does the proper decoding
347 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000348 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000349 * CUR_CHAR(l) returns the current unicode character (int), set l
350 * to the number of xmlChars used for the encoding [0-5].
351 * CUR_SCHAR same but operate on a string instead of the context
352 * COPY_BUF copy the current unicode char to the target buffer, increment
353 * the index
354 * GROW, SHRINK handling of input buffers
355 */
356
Daniel Veillardfdc91562002-07-01 21:52:03 +0000357#define RAW (*ctxt->input->cur)
358#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000359#define NXT(val) ctxt->input->cur[(val)]
360#define CUR_PTR ctxt->input->cur
361
362#define SKIP(val) do { \
363 ctxt->nbChars += (val),ctxt->input->cur += (val); \
364 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000365 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000366 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
367 xmlPopInput(ctxt); \
368 } while (0)
369
Daniel Veillard46de64e2002-05-29 08:21:33 +0000370#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
371 xmlSHRINK (ctxt);
372
373static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
374 xmlParserInputShrink(ctxt->input);
375 if ((*ctxt->input->cur == 0) &&
376 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
377 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000378 }
Owen Taylor3473f882001-02-23 17:55:21 +0000379
Daniel Veillard46de64e2002-05-29 08:21:33 +0000380#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
381 xmlGROW (ctxt);
382
383static void xmlGROW (xmlParserCtxtPtr ctxt) {
384 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
385 if ((*ctxt->input->cur == 0) &&
386 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
387 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000388 }
Owen Taylor3473f882001-02-23 17:55:21 +0000389
390#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
391
392#define NEXT xmlNextChar(ctxt)
393
Daniel Veillard21a0f912001-02-25 19:54:14 +0000394#define NEXT1 { \
395 ctxt->input->cur++; \
396 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000397 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000398 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
399 }
400
Owen Taylor3473f882001-02-23 17:55:21 +0000401#define NEXTL(l) do { \
402 if (*(ctxt->input->cur) == '\n') { \
403 ctxt->input->line++; ctxt->input->col = 1; \
404 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000405 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000406 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000407 } while (0)
408
409#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
410#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
411
412#define COPY_BUF(l,b,i,v) \
413 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000414 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000415
416/**
417 * xmlSkipBlankChars:
418 * @ctxt: the XML parser context
419 *
420 * skip all blanks character found at that point in the input streams.
421 * It pops up finished entities in the process if allowable at that point.
422 *
423 * Returns the number of space chars skipped
424 */
425
426int
427xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000428 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000429
430 /*
431 * It's Okay to use CUR/NEXT here since all the blanks are on
432 * the ASCII range.
433 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000434 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
435 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000436 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000437 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000438 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000439 cur = ctxt->input->cur;
440 while (IS_BLANK(*cur)) {
441 if (*cur == '\n') {
442 ctxt->input->line++; ctxt->input->col = 1;
443 }
444 cur++;
445 res++;
446 if (*cur == 0) {
447 ctxt->input->cur = cur;
448 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
449 cur = ctxt->input->cur;
450 }
451 }
452 ctxt->input->cur = cur;
453 } else {
454 int cur;
455 do {
456 cur = CUR;
457 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
458 NEXT;
459 cur = CUR;
460 res++;
461 }
462 while ((cur == 0) && (ctxt->inputNr > 1) &&
463 (ctxt->instate != XML_PARSER_COMMENT)) {
464 xmlPopInput(ctxt);
465 cur = CUR;
466 }
467 /*
468 * Need to handle support of entities branching here
469 */
470 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
471 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
472 }
Owen Taylor3473f882001-02-23 17:55:21 +0000473 return(res);
474}
475
476/************************************************************************
477 * *
478 * Commodity functions to handle entities *
479 * *
480 ************************************************************************/
481
482/**
483 * xmlPopInput:
484 * @ctxt: an XML parser context
485 *
486 * xmlPopInput: the current input pointed by ctxt->input came to an end
487 * pop it and return the next char.
488 *
489 * Returns the current xmlChar in the parser context
490 */
491xmlChar
492xmlPopInput(xmlParserCtxtPtr ctxt) {
493 if (ctxt->inputNr == 1) return(0); /* End of main Input */
494 if (xmlParserDebugEntities)
495 xmlGenericError(xmlGenericErrorContext,
496 "Popping input %d\n", ctxt->inputNr);
497 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000498 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000499 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
500 return(xmlPopInput(ctxt));
501 return(CUR);
502}
503
504/**
505 * xmlPushInput:
506 * @ctxt: an XML parser context
507 * @input: an XML parser input fragment (entity, XML fragment ...).
508 *
509 * xmlPushInput: switch to a new input stream which is stacked on top
510 * of the previous one(s).
511 */
512void
513xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
514 if (input == NULL) return;
515
516 if (xmlParserDebugEntities) {
517 if ((ctxt->input != NULL) && (ctxt->input->filename))
518 xmlGenericError(xmlGenericErrorContext,
519 "%s(%d): ", ctxt->input->filename,
520 ctxt->input->line);
521 xmlGenericError(xmlGenericErrorContext,
522 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
523 }
524 inputPush(ctxt, input);
525 GROW;
526}
527
528/**
529 * xmlParseCharRef:
530 * @ctxt: an XML parser context
531 *
532 * parse Reference declarations
533 *
534 * [66] CharRef ::= '&#' [0-9]+ ';' |
535 * '&#x' [0-9a-fA-F]+ ';'
536 *
537 * [ WFC: Legal Character ]
538 * Characters referred to using character references must match the
539 * production for Char.
540 *
541 * Returns the value parsed (as an int), 0 in case of error
542 */
543int
544xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000545 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000546 int count = 0;
547
Owen Taylor3473f882001-02-23 17:55:21 +0000548 /*
549 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
550 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000551 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000552 (NXT(2) == 'x')) {
553 SKIP(3);
554 GROW;
555 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000556 if (count++ > 20) {
557 count = 0;
558 GROW;
559 }
560 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000561 val = val * 16 + (CUR - '0');
562 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
563 val = val * 16 + (CUR - 'a') + 10;
564 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
565 val = val * 16 + (CUR - 'A') + 10;
566 else {
567 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
569 ctxt->sax->error(ctxt->userData,
570 "xmlParseCharRef: invalid hexadecimal value\n");
571 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000572 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000573 val = 0;
574 break;
575 }
576 NEXT;
577 count++;
578 }
579 if (RAW == ';') {
580 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
581 ctxt->nbChars ++;
582 ctxt->input->cur++;
583 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000584 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000585 SKIP(2);
586 GROW;
587 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000588 if (count++ > 20) {
589 count = 0;
590 GROW;
591 }
592 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000593 val = val * 10 + (CUR - '0');
594 else {
595 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
597 ctxt->sax->error(ctxt->userData,
598 "xmlParseCharRef: invalid decimal value\n");
599 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000600 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000601 val = 0;
602 break;
603 }
604 NEXT;
605 count++;
606 }
607 if (RAW == ';') {
608 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
609 ctxt->nbChars ++;
610 ctxt->input->cur++;
611 }
612 } else {
613 ctxt->errNo = XML_ERR_INVALID_CHARREF;
614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
615 ctxt->sax->error(ctxt->userData,
616 "xmlParseCharRef: invalid value\n");
617 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000618 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000619 }
620
621 /*
622 * [ WFC: Legal Character ]
623 * Characters referred to using character references must match the
624 * production for Char.
625 */
626 if (IS_CHAR(val)) {
627 return(val);
628 } else {
629 ctxt->errNo = XML_ERR_INVALID_CHAR;
630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000631 ctxt->sax->error(ctxt->userData,
632 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000633 val);
634 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000635 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000636 }
637 return(0);
638}
639
640/**
641 * xmlParseStringCharRef:
642 * @ctxt: an XML parser context
643 * @str: a pointer to an index in the string
644 *
645 * parse Reference declarations, variant parsing from a string rather
646 * than an an input flow.
647 *
648 * [66] CharRef ::= '&#' [0-9]+ ';' |
649 * '&#x' [0-9a-fA-F]+ ';'
650 *
651 * [ WFC: Legal Character ]
652 * Characters referred to using character references must match the
653 * production for Char.
654 *
655 * Returns the value parsed (as an int), 0 in case of error, str will be
656 * updated to the current value of the index
657 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000658static int
Owen Taylor3473f882001-02-23 17:55:21 +0000659xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
660 const xmlChar *ptr;
661 xmlChar cur;
662 int val = 0;
663
664 if ((str == NULL) || (*str == NULL)) return(0);
665 ptr = *str;
666 cur = *ptr;
667 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
668 ptr += 3;
669 cur = *ptr;
670 while (cur != ';') { /* Non input consuming loop */
671 if ((cur >= '0') && (cur <= '9'))
672 val = val * 16 + (cur - '0');
673 else if ((cur >= 'a') && (cur <= 'f'))
674 val = val * 16 + (cur - 'a') + 10;
675 else if ((cur >= 'A') && (cur <= 'F'))
676 val = val * 16 + (cur - 'A') + 10;
677 else {
678 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
680 ctxt->sax->error(ctxt->userData,
681 "xmlParseStringCharRef: invalid hexadecimal value\n");
682 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000683 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000684 val = 0;
685 break;
686 }
687 ptr++;
688 cur = *ptr;
689 }
690 if (cur == ';')
691 ptr++;
692 } else if ((cur == '&') && (ptr[1] == '#')){
693 ptr += 2;
694 cur = *ptr;
695 while (cur != ';') { /* Non input consuming loops */
696 if ((cur >= '0') && (cur <= '9'))
697 val = val * 10 + (cur - '0');
698 else {
699 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
701 ctxt->sax->error(ctxt->userData,
702 "xmlParseStringCharRef: invalid decimal value\n");
703 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000704 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000705 val = 0;
706 break;
707 }
708 ptr++;
709 cur = *ptr;
710 }
711 if (cur == ';')
712 ptr++;
713 } else {
714 ctxt->errNo = XML_ERR_INVALID_CHARREF;
715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
716 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000717 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000718 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000719 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000720 return(0);
721 }
722 *str = ptr;
723
724 /*
725 * [ WFC: Legal Character ]
726 * Characters referred to using character references must match the
727 * production for Char.
728 */
729 if (IS_CHAR(val)) {
730 return(val);
731 } else {
732 ctxt->errNo = XML_ERR_INVALID_CHAR;
733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
734 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000735 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000736 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000737 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000738 }
739 return(0);
740}
741
742/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000743 * xmlNewBlanksWrapperInputStream:
744 * @ctxt: an XML parser context
745 * @entity: an Entity pointer
746 *
747 * Create a new input stream for wrapping
748 * blanks around a PEReference
749 *
750 * Returns the new input stream or NULL
751 */
752
753static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
754
Daniel Veillardf4862f02002-09-10 11:13:43 +0000755static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000756xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
757 xmlParserInputPtr input;
758 xmlChar *buffer;
759 size_t length;
760 if (entity == NULL) {
761 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
763 ctxt->sax->error(ctxt->userData,
764 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
765 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
766 return(NULL);
767 }
768 if (xmlParserDebugEntities)
769 xmlGenericError(xmlGenericErrorContext,
770 "new blanks wrapper for entity: %s\n", entity->name);
771 input = xmlNewInputStream(ctxt);
772 if (input == NULL) {
773 return(NULL);
774 }
775 length = xmlStrlen(entity->name) + 5;
776 buffer = xmlMalloc(length);
777 if (buffer == NULL) {
778 return(NULL);
779 }
780 buffer [0] = ' ';
781 buffer [1] = '%';
782 buffer [length-3] = ';';
783 buffer [length-2] = ' ';
784 buffer [length-1] = 0;
785 memcpy(buffer + 2, entity->name, length - 5);
786 input->free = deallocblankswrapper;
787 input->base = buffer;
788 input->cur = buffer;
789 input->length = length;
790 input->end = &buffer[length];
791 return(input);
792}
793
794/**
Owen Taylor3473f882001-02-23 17:55:21 +0000795 * xmlParserHandlePEReference:
796 * @ctxt: the parser context
797 *
798 * [69] PEReference ::= '%' Name ';'
799 *
800 * [ WFC: No Recursion ]
801 * A parsed entity must not contain a recursive
802 * reference to itself, either directly or indirectly.
803 *
804 * [ WFC: Entity Declared ]
805 * In a document without any DTD, a document with only an internal DTD
806 * subset which contains no parameter entity references, or a document
807 * with "standalone='yes'", ... ... The declaration of a parameter
808 * entity must precede any reference to it...
809 *
810 * [ VC: Entity Declared ]
811 * In a document with an external subset or external parameter entities
812 * with "standalone='no'", ... ... The declaration of a parameter entity
813 * must precede any reference to it...
814 *
815 * [ WFC: In DTD ]
816 * Parameter-entity references may only appear in the DTD.
817 * NOTE: misleading but this is handled.
818 *
819 * A PEReference may have been detected in the current input stream
820 * the handling is done accordingly to
821 * http://www.w3.org/TR/REC-xml#entproc
822 * i.e.
823 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000824 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000825 */
826void
827xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
828 xmlChar *name;
829 xmlEntityPtr entity = NULL;
830 xmlParserInputPtr input;
831
Owen Taylor3473f882001-02-23 17:55:21 +0000832 if (RAW != '%') return;
833 switch(ctxt->instate) {
834 case XML_PARSER_CDATA_SECTION:
835 return;
836 case XML_PARSER_COMMENT:
837 return;
838 case XML_PARSER_START_TAG:
839 return;
840 case XML_PARSER_END_TAG:
841 return;
842 case XML_PARSER_EOF:
843 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
844 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
845 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
846 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000847 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000848 return;
849 case XML_PARSER_PROLOG:
850 case XML_PARSER_START:
851 case XML_PARSER_MISC:
852 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
854 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
855 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000856 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000857 return;
858 case XML_PARSER_ENTITY_DECL:
859 case XML_PARSER_CONTENT:
860 case XML_PARSER_ATTRIBUTE_VALUE:
861 case XML_PARSER_PI:
862 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000863 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000864 /* we just ignore it there */
865 return;
866 case XML_PARSER_EPILOG:
867 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
869 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
870 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000871 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000872 return;
873 case XML_PARSER_ENTITY_VALUE:
874 /*
875 * NOTE: in the case of entity values, we don't do the
876 * substitution here since we need the literal
877 * entity value to be able to save the internal
878 * subset of the document.
879 * This will be handled by xmlStringDecodeEntities
880 */
881 return;
882 case XML_PARSER_DTD:
883 /*
884 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
885 * In the internal DTD subset, parameter-entity references
886 * can occur only where markup declarations can occur, not
887 * within markup declarations.
888 * In that case this is handled in xmlParseMarkupDecl
889 */
890 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
891 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000892 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
893 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000894 break;
895 case XML_PARSER_IGNORE:
896 return;
897 }
898
899 NEXT;
900 name = xmlParseName(ctxt);
901 if (xmlParserDebugEntities)
902 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000903 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000904 if (name == NULL) {
905 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000907 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000908 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000909 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000910 } else {
911 if (RAW == ';') {
912 NEXT;
913 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
914 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
915 if (entity == NULL) {
916
917 /*
918 * [ WFC: Entity Declared ]
919 * In a document without any DTD, a document with only an
920 * internal DTD subset which contains no parameter entity
921 * references, or a document with "standalone='yes'", ...
922 * ... The declaration of a parameter entity must precede
923 * any reference to it...
924 */
925 if ((ctxt->standalone == 1) ||
926 ((ctxt->hasExternalSubset == 0) &&
927 (ctxt->hasPErefs == 0))) {
928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
929 ctxt->sax->error(ctxt->userData,
930 "PEReference: %%%s; not found\n", name);
931 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000932 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000933 } else {
934 /*
935 * [ VC: Entity Declared ]
936 * In a document with an external subset or external
937 * parameter entities with "standalone='no'", ...
938 * ... The declaration of a parameter entity must precede
939 * any reference to it...
940 */
941 if ((!ctxt->disableSAX) &&
942 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
943 ctxt->vctxt.error(ctxt->vctxt.userData,
944 "PEReference: %%%s; not found\n", name);
945 } else if ((!ctxt->disableSAX) &&
946 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
947 ctxt->sax->warning(ctxt->userData,
948 "PEReference: %%%s; not found\n", name);
949 ctxt->valid = 0;
950 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000951 } else if (ctxt->input->free != deallocblankswrapper) {
952 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
953 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000954 } else {
955 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
956 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000957 xmlChar start[4];
958 xmlCharEncoding enc;
959
Owen Taylor3473f882001-02-23 17:55:21 +0000960 /*
961 * handle the extra spaces added before and after
962 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000963 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000964 */
965 input = xmlNewEntityInputStream(ctxt, entity);
966 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000967
968 /*
969 * Get the 4 first bytes and decode the charset
970 * if enc != XML_CHAR_ENCODING_NONE
971 * plug some encoding conversion routines.
972 */
973 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000974 if (entity->length >= 4) {
975 start[0] = RAW;
976 start[1] = NXT(1);
977 start[2] = NXT(2);
978 start[3] = NXT(3);
979 enc = xmlDetectCharEncoding(start, 4);
980 if (enc != XML_CHAR_ENCODING_NONE) {
981 xmlSwitchEncoding(ctxt, enc);
982 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000983 }
984
Owen Taylor3473f882001-02-23 17:55:21 +0000985 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
986 (RAW == '<') && (NXT(1) == '?') &&
987 (NXT(2) == 'x') && (NXT(3) == 'm') &&
988 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
989 xmlParseTextDecl(ctxt);
990 }
Owen Taylor3473f882001-02-23 17:55:21 +0000991 } else {
992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
993 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000994 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000995 name);
996 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000997 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000998 }
999 }
1000 } else {
1001 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1003 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001004 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001005 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001006 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001007 }
1008 xmlFree(name);
1009 }
1010}
1011
1012/*
1013 * Macro used to grow the current buffer.
1014 */
1015#define growBuffer(buffer) { \
1016 buffer##_size *= 2; \
1017 buffer = (xmlChar *) \
1018 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1019 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001020 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001021 return(NULL); \
1022 } \
1023}
1024
1025/**
1026 * xmlStringDecodeEntities:
1027 * @ctxt: the parser context
1028 * @str: the input string
1029 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1030 * @end: an end marker xmlChar, 0 if none
1031 * @end2: an end marker xmlChar, 0 if none
1032 * @end3: an end marker xmlChar, 0 if none
1033 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001034 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001035 *
1036 * [67] Reference ::= EntityRef | CharRef
1037 *
1038 * [69] PEReference ::= '%' Name ';'
1039 *
1040 * Returns A newly allocated string with the substitution done. The caller
1041 * must deallocate it !
1042 */
1043xmlChar *
1044xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1045 xmlChar end, xmlChar end2, xmlChar end3) {
1046 xmlChar *buffer = NULL;
1047 int buffer_size = 0;
1048
1049 xmlChar *current = NULL;
1050 xmlEntityPtr ent;
1051 int c,l;
1052 int nbchars = 0;
1053
1054 if (str == NULL)
1055 return(NULL);
1056
1057 if (ctxt->depth > 40) {
1058 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1060 ctxt->sax->error(ctxt->userData,
1061 "Detected entity reference loop\n");
1062 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001063 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001064 return(NULL);
1065 }
1066
1067 /*
1068 * allocate a translation buffer.
1069 */
1070 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1071 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1072 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001073 xmlGenericError(xmlGenericErrorContext,
1074 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001075 return(NULL);
1076 }
1077
1078 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001079 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001080 * we are operating on already parsed values.
1081 */
1082 c = CUR_SCHAR(str, l);
1083 while ((c != 0) && (c != end) && /* non input consuming loop */
1084 (c != end2) && (c != end3)) {
1085
1086 if (c == 0) break;
1087 if ((c == '&') && (str[1] == '#')) {
1088 int val = xmlParseStringCharRef(ctxt, &str);
1089 if (val != 0) {
1090 COPY_BUF(0,buffer,nbchars,val);
1091 }
1092 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1093 if (xmlParserDebugEntities)
1094 xmlGenericError(xmlGenericErrorContext,
1095 "String decoding Entity Reference: %.30s\n",
1096 str);
1097 ent = xmlParseStringEntityRef(ctxt, &str);
1098 if ((ent != NULL) &&
1099 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1100 if (ent->content != NULL) {
1101 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1102 } else {
1103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1104 ctxt->sax->error(ctxt->userData,
1105 "internal error entity has no content\n");
1106 }
1107 } else if ((ent != NULL) && (ent->content != NULL)) {
1108 xmlChar *rep;
1109
1110 ctxt->depth++;
1111 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1112 0, 0, 0);
1113 ctxt->depth--;
1114 if (rep != NULL) {
1115 current = rep;
1116 while (*current != 0) { /* non input consuming loop */
1117 buffer[nbchars++] = *current++;
1118 if (nbchars >
1119 buffer_size - XML_PARSER_BUFFER_SIZE) {
1120 growBuffer(buffer);
1121 }
1122 }
1123 xmlFree(rep);
1124 }
1125 } else if (ent != NULL) {
1126 int i = xmlStrlen(ent->name);
1127 const xmlChar *cur = ent->name;
1128
1129 buffer[nbchars++] = '&';
1130 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1131 growBuffer(buffer);
1132 }
1133 for (;i > 0;i--)
1134 buffer[nbchars++] = *cur++;
1135 buffer[nbchars++] = ';';
1136 }
1137 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1138 if (xmlParserDebugEntities)
1139 xmlGenericError(xmlGenericErrorContext,
1140 "String decoding PE Reference: %.30s\n", str);
1141 ent = xmlParseStringPEReference(ctxt, &str);
1142 if (ent != NULL) {
1143 xmlChar *rep;
1144
1145 ctxt->depth++;
1146 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1147 0, 0, 0);
1148 ctxt->depth--;
1149 if (rep != NULL) {
1150 current = rep;
1151 while (*current != 0) { /* non input consuming loop */
1152 buffer[nbchars++] = *current++;
1153 if (nbchars >
1154 buffer_size - XML_PARSER_BUFFER_SIZE) {
1155 growBuffer(buffer);
1156 }
1157 }
1158 xmlFree(rep);
1159 }
1160 }
1161 } else {
1162 COPY_BUF(l,buffer,nbchars,c);
1163 str += l;
1164 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1165 growBuffer(buffer);
1166 }
1167 }
1168 c = CUR_SCHAR(str, l);
1169 }
1170 buffer[nbchars++] = 0;
1171 return(buffer);
1172}
1173
1174
1175/************************************************************************
1176 * *
1177 * Commodity functions to handle xmlChars *
1178 * *
1179 ************************************************************************/
1180
1181/**
1182 * xmlStrndup:
1183 * @cur: the input xmlChar *
1184 * @len: the len of @cur
1185 *
1186 * a strndup for array of xmlChar's
1187 *
1188 * Returns a new xmlChar * or NULL
1189 */
1190xmlChar *
1191xmlStrndup(const xmlChar *cur, int len) {
1192 xmlChar *ret;
1193
1194 if ((cur == NULL) || (len < 0)) return(NULL);
1195 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1196 if (ret == NULL) {
1197 xmlGenericError(xmlGenericErrorContext,
1198 "malloc of %ld byte failed\n",
1199 (len + 1) * (long)sizeof(xmlChar));
1200 return(NULL);
1201 }
1202 memcpy(ret, cur, len * sizeof(xmlChar));
1203 ret[len] = 0;
1204 return(ret);
1205}
1206
1207/**
1208 * xmlStrdup:
1209 * @cur: the input xmlChar *
1210 *
1211 * a strdup for array of xmlChar's. Since they are supposed to be
1212 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1213 * a termination mark of '0'.
1214 *
1215 * Returns a new xmlChar * or NULL
1216 */
1217xmlChar *
1218xmlStrdup(const xmlChar *cur) {
1219 const xmlChar *p = cur;
1220
1221 if (cur == NULL) return(NULL);
1222 while (*p != 0) p++; /* non input consuming */
1223 return(xmlStrndup(cur, p - cur));
1224}
1225
1226/**
1227 * xmlCharStrndup:
1228 * @cur: the input char *
1229 * @len: the len of @cur
1230 *
1231 * a strndup for char's to xmlChar's
1232 *
1233 * Returns a new xmlChar * or NULL
1234 */
1235
1236xmlChar *
1237xmlCharStrndup(const char *cur, int len) {
1238 int i;
1239 xmlChar *ret;
1240
1241 if ((cur == NULL) || (len < 0)) return(NULL);
1242 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1243 if (ret == NULL) {
1244 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1245 (len + 1) * (long)sizeof(xmlChar));
1246 return(NULL);
1247 }
1248 for (i = 0;i < len;i++)
1249 ret[i] = (xmlChar) cur[i];
1250 ret[len] = 0;
1251 return(ret);
1252}
1253
1254/**
1255 * xmlCharStrdup:
1256 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001257 *
1258 * a strdup for char's to xmlChar's
1259 *
1260 * Returns a new xmlChar * or NULL
1261 */
1262
1263xmlChar *
1264xmlCharStrdup(const char *cur) {
1265 const char *p = cur;
1266
1267 if (cur == NULL) return(NULL);
1268 while (*p != '\0') p++; /* non input consuming */
1269 return(xmlCharStrndup(cur, p - cur));
1270}
1271
1272/**
1273 * xmlStrcmp:
1274 * @str1: the first xmlChar *
1275 * @str2: the second xmlChar *
1276 *
1277 * a strcmp for xmlChar's
1278 *
1279 * Returns the integer result of the comparison
1280 */
1281
1282int
1283xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1284 register int tmp;
1285
1286 if (str1 == str2) return(0);
1287 if (str1 == NULL) return(-1);
1288 if (str2 == NULL) return(1);
1289 do {
1290 tmp = *str1++ - *str2;
1291 if (tmp != 0) return(tmp);
1292 } while (*str2++ != 0);
1293 return 0;
1294}
1295
1296/**
1297 * xmlStrEqual:
1298 * @str1: the first xmlChar *
1299 * @str2: the second xmlChar *
1300 *
1301 * Check if both string are equal of have same content
1302 * Should be a bit more readable and faster than xmlStrEqual()
1303 *
1304 * Returns 1 if they are equal, 0 if they are different
1305 */
1306
1307int
1308xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1309 if (str1 == str2) return(1);
1310 if (str1 == NULL) return(0);
1311 if (str2 == NULL) return(0);
1312 do {
1313 if (*str1++ != *str2) return(0);
1314 } while (*str2++);
1315 return(1);
1316}
1317
1318/**
1319 * xmlStrncmp:
1320 * @str1: the first xmlChar *
1321 * @str2: the second xmlChar *
1322 * @len: the max comparison length
1323 *
1324 * a strncmp for xmlChar's
1325 *
1326 * Returns the integer result of the comparison
1327 */
1328
1329int
1330xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1331 register int tmp;
1332
1333 if (len <= 0) return(0);
1334 if (str1 == str2) return(0);
1335 if (str1 == NULL) return(-1);
1336 if (str2 == NULL) return(1);
1337 do {
1338 tmp = *str1++ - *str2;
1339 if (tmp != 0 || --len == 0) return(tmp);
1340 } while (*str2++ != 0);
1341 return 0;
1342}
1343
Daniel Veillardb44025c2001-10-11 22:55:55 +00001344static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001345 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1346 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1347 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1348 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1349 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1350 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1351 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1352 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1353 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1354 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1355 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1356 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1357 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1358 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1359 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1360 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1361 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1362 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1363 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1364 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1365 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1366 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1367 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1368 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1369 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1370 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1371 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1372 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1373 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1374 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1375 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1376 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1377};
1378
1379/**
1380 * xmlStrcasecmp:
1381 * @str1: the first xmlChar *
1382 * @str2: the second xmlChar *
1383 *
1384 * a strcasecmp for xmlChar's
1385 *
1386 * Returns the integer result of the comparison
1387 */
1388
1389int
1390xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1391 register int tmp;
1392
1393 if (str1 == str2) return(0);
1394 if (str1 == NULL) return(-1);
1395 if (str2 == NULL) return(1);
1396 do {
1397 tmp = casemap[*str1++] - casemap[*str2];
1398 if (tmp != 0) return(tmp);
1399 } while (*str2++ != 0);
1400 return 0;
1401}
1402
1403/**
1404 * xmlStrncasecmp:
1405 * @str1: the first xmlChar *
1406 * @str2: the second xmlChar *
1407 * @len: the max comparison length
1408 *
1409 * a strncasecmp for xmlChar's
1410 *
1411 * Returns the integer result of the comparison
1412 */
1413
1414int
1415xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1416 register int tmp;
1417
1418 if (len <= 0) return(0);
1419 if (str1 == str2) return(0);
1420 if (str1 == NULL) return(-1);
1421 if (str2 == NULL) return(1);
1422 do {
1423 tmp = casemap[*str1++] - casemap[*str2];
1424 if (tmp != 0 || --len == 0) return(tmp);
1425 } while (*str2++ != 0);
1426 return 0;
1427}
1428
1429/**
1430 * xmlStrchr:
1431 * @str: the xmlChar * array
1432 * @val: the xmlChar to search
1433 *
1434 * a strchr for xmlChar's
1435 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001436 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001437 */
1438
1439const xmlChar *
1440xmlStrchr(const xmlChar *str, xmlChar val) {
1441 if (str == NULL) return(NULL);
1442 while (*str != 0) { /* non input consuming */
1443 if (*str == val) return((xmlChar *) str);
1444 str++;
1445 }
1446 return(NULL);
1447}
1448
1449/**
1450 * xmlStrstr:
1451 * @str: the xmlChar * array (haystack)
1452 * @val: the xmlChar to search (needle)
1453 *
1454 * a strstr for xmlChar's
1455 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001456 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001457 */
1458
1459const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001460xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001461 int n;
1462
1463 if (str == NULL) return(NULL);
1464 if (val == NULL) return(NULL);
1465 n = xmlStrlen(val);
1466
1467 if (n == 0) return(str);
1468 while (*str != 0) { /* non input consuming */
1469 if (*str == *val) {
1470 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1471 }
1472 str++;
1473 }
1474 return(NULL);
1475}
1476
1477/**
1478 * xmlStrcasestr:
1479 * @str: the xmlChar * array (haystack)
1480 * @val: the xmlChar to search (needle)
1481 *
1482 * a case-ignoring strstr for xmlChar's
1483 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001484 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001485 */
1486
1487const xmlChar *
1488xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1489 int n;
1490
1491 if (str == NULL) return(NULL);
1492 if (val == NULL) return(NULL);
1493 n = xmlStrlen(val);
1494
1495 if (n == 0) return(str);
1496 while (*str != 0) { /* non input consuming */
1497 if (casemap[*str] == casemap[*val])
1498 if (!xmlStrncasecmp(str, val, n)) return(str);
1499 str++;
1500 }
1501 return(NULL);
1502}
1503
1504/**
1505 * xmlStrsub:
1506 * @str: the xmlChar * array (haystack)
1507 * @start: the index of the first char (zero based)
1508 * @len: the length of the substring
1509 *
1510 * Extract a substring of a given string
1511 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001512 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001513 */
1514
1515xmlChar *
1516xmlStrsub(const xmlChar *str, int start, int len) {
1517 int i;
1518
1519 if (str == NULL) return(NULL);
1520 if (start < 0) return(NULL);
1521 if (len < 0) return(NULL);
1522
1523 for (i = 0;i < start;i++) {
1524 if (*str == 0) return(NULL);
1525 str++;
1526 }
1527 if (*str == 0) return(NULL);
1528 return(xmlStrndup(str, len));
1529}
1530
1531/**
1532 * xmlStrlen:
1533 * @str: the xmlChar * array
1534 *
1535 * length of a xmlChar's string
1536 *
1537 * Returns the number of xmlChar contained in the ARRAY.
1538 */
1539
1540int
1541xmlStrlen(const xmlChar *str) {
1542 int len = 0;
1543
1544 if (str == NULL) return(0);
1545 while (*str != 0) { /* non input consuming */
1546 str++;
1547 len++;
1548 }
1549 return(len);
1550}
1551
1552/**
1553 * xmlStrncat:
1554 * @cur: the original xmlChar * array
1555 * @add: the xmlChar * array added
1556 * @len: the length of @add
1557 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001558 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001559 * first bytes of @add.
1560 *
1561 * Returns a new xmlChar *, the original @cur is reallocated if needed
1562 * and should not be freed
1563 */
1564
1565xmlChar *
1566xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1567 int size;
1568 xmlChar *ret;
1569
1570 if ((add == NULL) || (len == 0))
1571 return(cur);
1572 if (cur == NULL)
1573 return(xmlStrndup(add, len));
1574
1575 size = xmlStrlen(cur);
1576 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1577 if (ret == NULL) {
1578 xmlGenericError(xmlGenericErrorContext,
1579 "xmlStrncat: realloc of %ld byte failed\n",
1580 (size + len + 1) * (long)sizeof(xmlChar));
1581 return(cur);
1582 }
1583 memcpy(&ret[size], add, len * sizeof(xmlChar));
1584 ret[size + len] = 0;
1585 return(ret);
1586}
1587
1588/**
1589 * xmlStrcat:
1590 * @cur: the original xmlChar * array
1591 * @add: the xmlChar * array added
1592 *
1593 * a strcat for array of xmlChar's. Since they are supposed to be
1594 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1595 * a termination mark of '0'.
1596 *
1597 * Returns a new xmlChar * containing the concatenated string.
1598 */
1599xmlChar *
1600xmlStrcat(xmlChar *cur, const xmlChar *add) {
1601 const xmlChar *p = add;
1602
1603 if (add == NULL) return(cur);
1604 if (cur == NULL)
1605 return(xmlStrdup(add));
1606
1607 while (*p != 0) p++; /* non input consuming */
1608 return(xmlStrncat(cur, add, p - add));
1609}
1610
1611/************************************************************************
1612 * *
1613 * Commodity functions, cleanup needed ? *
1614 * *
1615 ************************************************************************/
1616
1617/**
1618 * areBlanks:
1619 * @ctxt: an XML parser context
1620 * @str: a xmlChar *
1621 * @len: the size of @str
1622 *
1623 * Is this a sequence of blank chars that one can ignore ?
1624 *
1625 * Returns 1 if ignorable 0 otherwise.
1626 */
1627
1628static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1629 int i, ret;
1630 xmlNodePtr lastChild;
1631
Daniel Veillard05c13a22001-09-09 08:38:09 +00001632 /*
1633 * Don't spend time trying to differentiate them, the same callback is
1634 * used !
1635 */
1636 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001637 return(0);
1638
Owen Taylor3473f882001-02-23 17:55:21 +00001639 /*
1640 * Check for xml:space value.
1641 */
1642 if (*(ctxt->space) == 1)
1643 return(0);
1644
1645 /*
1646 * Check that the string is made of blanks
1647 */
1648 for (i = 0;i < len;i++)
1649 if (!(IS_BLANK(str[i]))) return(0);
1650
1651 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001652 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001653 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001654 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001655 if (ctxt->myDoc != NULL) {
1656 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1657 if (ret == 0) return(1);
1658 if (ret == 1) return(0);
1659 }
1660
1661 /*
1662 * Otherwise, heuristic :-\
1663 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001664 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001665 if ((ctxt->node->children == NULL) &&
1666 (RAW == '<') && (NXT(1) == '/')) return(0);
1667
1668 lastChild = xmlGetLastChild(ctxt->node);
1669 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001670 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1671 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001672 } else if (xmlNodeIsText(lastChild))
1673 return(0);
1674 else if ((ctxt->node->children != NULL) &&
1675 (xmlNodeIsText(ctxt->node->children)))
1676 return(0);
1677 return(1);
1678}
1679
Owen Taylor3473f882001-02-23 17:55:21 +00001680/************************************************************************
1681 * *
1682 * Extra stuff for namespace support *
1683 * Relates to http://www.w3.org/TR/WD-xml-names *
1684 * *
1685 ************************************************************************/
1686
1687/**
1688 * xmlSplitQName:
1689 * @ctxt: an XML parser context
1690 * @name: an XML parser context
1691 * @prefix: a xmlChar **
1692 *
1693 * parse an UTF8 encoded XML qualified name string
1694 *
1695 * [NS 5] QName ::= (Prefix ':')? LocalPart
1696 *
1697 * [NS 6] Prefix ::= NCName
1698 *
1699 * [NS 7] LocalPart ::= NCName
1700 *
1701 * Returns the local part, and prefix is updated
1702 * to get the Prefix if any.
1703 */
1704
1705xmlChar *
1706xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1707 xmlChar buf[XML_MAX_NAMELEN + 5];
1708 xmlChar *buffer = NULL;
1709 int len = 0;
1710 int max = XML_MAX_NAMELEN;
1711 xmlChar *ret = NULL;
1712 const xmlChar *cur = name;
1713 int c;
1714
1715 *prefix = NULL;
1716
1717#ifndef XML_XML_NAMESPACE
1718 /* xml: prefix is not really a namespace */
1719 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1720 (cur[2] == 'l') && (cur[3] == ':'))
1721 return(xmlStrdup(name));
1722#endif
1723
1724 /* nasty but valid */
1725 if (cur[0] == ':')
1726 return(xmlStrdup(name));
1727
1728 c = *cur++;
1729 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1730 buf[len++] = c;
1731 c = *cur++;
1732 }
1733 if (len >= max) {
1734 /*
1735 * Okay someone managed to make a huge name, so he's ready to pay
1736 * for the processing speed.
1737 */
1738 max = len * 2;
1739
1740 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1741 if (buffer == NULL) {
1742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1743 ctxt->sax->error(ctxt->userData,
1744 "xmlSplitQName: out of memory\n");
1745 return(NULL);
1746 }
1747 memcpy(buffer, buf, len);
1748 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1749 if (len + 10 > max) {
1750 max *= 2;
1751 buffer = (xmlChar *) xmlRealloc(buffer,
1752 max * sizeof(xmlChar));
1753 if (buffer == NULL) {
1754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1755 ctxt->sax->error(ctxt->userData,
1756 "xmlSplitQName: out of memory\n");
1757 return(NULL);
1758 }
1759 }
1760 buffer[len++] = c;
1761 c = *cur++;
1762 }
1763 buffer[len] = 0;
1764 }
1765
1766 if (buffer == NULL)
1767 ret = xmlStrndup(buf, len);
1768 else {
1769 ret = buffer;
1770 buffer = NULL;
1771 max = XML_MAX_NAMELEN;
1772 }
1773
1774
1775 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001776 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001777 if (c == 0) return(ret);
1778 *prefix = ret;
1779 len = 0;
1780
Daniel Veillardbb284f42002-10-16 18:02:47 +00001781 /*
1782 * Check that the first character is proper to start
1783 * a new name
1784 */
1785 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1786 ((c >= 0x41) && (c <= 0x5A)) ||
1787 (c == '_') || (c == ':'))) {
1788 int l;
1789 int first = CUR_SCHAR(cur, l);
1790
1791 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001792 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1793 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001794 ctxt->sax->error(ctxt->userData,
1795 "Name %s is not XML Namespace compliant\n",
1796 name);
1797 }
1798 }
1799 cur++;
1800
Owen Taylor3473f882001-02-23 17:55:21 +00001801 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1802 buf[len++] = c;
1803 c = *cur++;
1804 }
1805 if (len >= max) {
1806 /*
1807 * Okay someone managed to make a huge name, so he's ready to pay
1808 * for the processing speed.
1809 */
1810 max = len * 2;
1811
1812 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1813 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001814 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1815 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001816 ctxt->sax->error(ctxt->userData,
1817 "xmlSplitQName: out of memory\n");
1818 return(NULL);
1819 }
1820 memcpy(buffer, buf, len);
1821 while (c != 0) { /* tested bigname2.xml */
1822 if (len + 10 > max) {
1823 max *= 2;
1824 buffer = (xmlChar *) xmlRealloc(buffer,
1825 max * sizeof(xmlChar));
1826 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001827 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1828 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001829 ctxt->sax->error(ctxt->userData,
1830 "xmlSplitQName: out of memory\n");
1831 return(NULL);
1832 }
1833 }
1834 buffer[len++] = c;
1835 c = *cur++;
1836 }
1837 buffer[len] = 0;
1838 }
1839
1840 if (buffer == NULL)
1841 ret = xmlStrndup(buf, len);
1842 else {
1843 ret = buffer;
1844 }
1845 }
1846
1847 return(ret);
1848}
1849
1850/************************************************************************
1851 * *
1852 * The parser itself *
1853 * Relates to http://www.w3.org/TR/REC-xml *
1854 * *
1855 ************************************************************************/
1856
Daniel Veillard76d66f42001-05-16 21:05:17 +00001857static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001858/**
1859 * xmlParseName:
1860 * @ctxt: an XML parser context
1861 *
1862 * parse an XML name.
1863 *
1864 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1865 * CombiningChar | Extender
1866 *
1867 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1868 *
1869 * [6] Names ::= Name (S Name)*
1870 *
1871 * Returns the Name parsed or NULL
1872 */
1873
1874xmlChar *
1875xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001876 const xmlChar *in;
1877 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001878 int count = 0;
1879
1880 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001881
1882 /*
1883 * Accelerator for simple ASCII names
1884 */
1885 in = ctxt->input->cur;
1886 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1887 ((*in >= 0x41) && (*in <= 0x5A)) ||
1888 (*in == '_') || (*in == ':')) {
1889 in++;
1890 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1891 ((*in >= 0x41) && (*in <= 0x5A)) ||
1892 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001893 (*in == '_') || (*in == '-') ||
1894 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001895 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001896 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001897 count = in - ctxt->input->cur;
1898 ret = xmlStrndup(ctxt->input->cur, count);
1899 ctxt->input->cur = in;
1900 return(ret);
1901 }
1902 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001903 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001904}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001905
Daniel Veillard46de64e2002-05-29 08:21:33 +00001906/**
1907 * xmlParseNameAndCompare:
1908 * @ctxt: an XML parser context
1909 *
1910 * parse an XML name and compares for match
1911 * (specialized for endtag parsing)
1912 *
1913 *
1914 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1915 * and the name for mismatch
1916 */
1917
Daniel Veillardf4862f02002-09-10 11:13:43 +00001918static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001919xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1920 const xmlChar *cmp = other;
1921 const xmlChar *in;
1922 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001923
1924 GROW;
1925
1926 in = ctxt->input->cur;
1927 while (*in != 0 && *in == *cmp) {
1928 ++in;
1929 ++cmp;
1930 }
1931 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1932 /* success */
1933 ctxt->input->cur = in;
1934 return (xmlChar*) 1;
1935 }
1936 /* failure (or end of input buffer), check with full function */
1937 ret = xmlParseName (ctxt);
1938 if (ret != 0 && xmlStrEqual (ret, other)) {
1939 xmlFree (ret);
1940 return (xmlChar*) 1;
1941 }
1942 return ret;
1943}
1944
Daniel Veillard76d66f42001-05-16 21:05:17 +00001945static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001946xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1947 xmlChar buf[XML_MAX_NAMELEN + 5];
1948 int len = 0, l;
1949 int c;
1950 int count = 0;
1951
1952 /*
1953 * Handler for more complex cases
1954 */
1955 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001956 c = CUR_CHAR(l);
1957 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1958 (!IS_LETTER(c) && (c != '_') &&
1959 (c != ':'))) {
1960 return(NULL);
1961 }
1962
1963 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1964 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1965 (c == '.') || (c == '-') ||
1966 (c == '_') || (c == ':') ||
1967 (IS_COMBINING(c)) ||
1968 (IS_EXTENDER(c)))) {
1969 if (count++ > 100) {
1970 count = 0;
1971 GROW;
1972 }
1973 COPY_BUF(l,buf,len,c);
1974 NEXTL(l);
1975 c = CUR_CHAR(l);
1976 if (len >= XML_MAX_NAMELEN) {
1977 /*
1978 * Okay someone managed to make a huge name, so he's ready to pay
1979 * for the processing speed.
1980 */
1981 xmlChar *buffer;
1982 int max = len * 2;
1983
1984 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1985 if (buffer == NULL) {
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001988 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001989 return(NULL);
1990 }
1991 memcpy(buffer, buf, len);
1992 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1993 (c == '.') || (c == '-') ||
1994 (c == '_') || (c == ':') ||
1995 (IS_COMBINING(c)) ||
1996 (IS_EXTENDER(c))) {
1997 if (count++ > 100) {
1998 count = 0;
1999 GROW;
2000 }
2001 if (len + 10 > max) {
2002 max *= 2;
2003 buffer = (xmlChar *) xmlRealloc(buffer,
2004 max * sizeof(xmlChar));
2005 if (buffer == NULL) {
2006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2007 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00002008 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002009 return(NULL);
2010 }
2011 }
2012 COPY_BUF(l,buffer,len,c);
2013 NEXTL(l);
2014 c = CUR_CHAR(l);
2015 }
2016 buffer[len] = 0;
2017 return(buffer);
2018 }
2019 }
2020 return(xmlStrndup(buf, len));
2021}
2022
2023/**
2024 * xmlParseStringName:
2025 * @ctxt: an XML parser context
2026 * @str: a pointer to the string pointer (IN/OUT)
2027 *
2028 * parse an XML name.
2029 *
2030 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2031 * CombiningChar | Extender
2032 *
2033 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2034 *
2035 * [6] Names ::= Name (S Name)*
2036 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002037 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002038 * is updated to the current location in the string.
2039 */
2040
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002041static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002042xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2043 xmlChar buf[XML_MAX_NAMELEN + 5];
2044 const xmlChar *cur = *str;
2045 int len = 0, l;
2046 int c;
2047
2048 c = CUR_SCHAR(cur, l);
2049 if (!IS_LETTER(c) && (c != '_') &&
2050 (c != ':')) {
2051 return(NULL);
2052 }
2053
2054 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2055 (c == '.') || (c == '-') ||
2056 (c == '_') || (c == ':') ||
2057 (IS_COMBINING(c)) ||
2058 (IS_EXTENDER(c))) {
2059 COPY_BUF(l,buf,len,c);
2060 cur += l;
2061 c = CUR_SCHAR(cur, l);
2062 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2063 /*
2064 * Okay someone managed to make a huge name, so he's ready to pay
2065 * for the processing speed.
2066 */
2067 xmlChar *buffer;
2068 int max = len * 2;
2069
2070 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2071 if (buffer == NULL) {
2072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2073 ctxt->sax->error(ctxt->userData,
2074 "xmlParseStringName: out of memory\n");
2075 return(NULL);
2076 }
2077 memcpy(buffer, buf, len);
2078 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2079 (c == '.') || (c == '-') ||
2080 (c == '_') || (c == ':') ||
2081 (IS_COMBINING(c)) ||
2082 (IS_EXTENDER(c))) {
2083 if (len + 10 > max) {
2084 max *= 2;
2085 buffer = (xmlChar *) xmlRealloc(buffer,
2086 max * sizeof(xmlChar));
2087 if (buffer == NULL) {
2088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2089 ctxt->sax->error(ctxt->userData,
2090 "xmlParseStringName: out of memory\n");
2091 return(NULL);
2092 }
2093 }
2094 COPY_BUF(l,buffer,len,c);
2095 cur += l;
2096 c = CUR_SCHAR(cur, l);
2097 }
2098 buffer[len] = 0;
2099 *str = cur;
2100 return(buffer);
2101 }
2102 }
2103 *str = cur;
2104 return(xmlStrndup(buf, len));
2105}
2106
2107/**
2108 * xmlParseNmtoken:
2109 * @ctxt: an XML parser context
2110 *
2111 * parse an XML Nmtoken.
2112 *
2113 * [7] Nmtoken ::= (NameChar)+
2114 *
2115 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2116 *
2117 * Returns the Nmtoken parsed or NULL
2118 */
2119
2120xmlChar *
2121xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2122 xmlChar buf[XML_MAX_NAMELEN + 5];
2123 int len = 0, l;
2124 int c;
2125 int count = 0;
2126
2127 GROW;
2128 c = CUR_CHAR(l);
2129
2130 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2131 (c == '.') || (c == '-') ||
2132 (c == '_') || (c == ':') ||
2133 (IS_COMBINING(c)) ||
2134 (IS_EXTENDER(c))) {
2135 if (count++ > 100) {
2136 count = 0;
2137 GROW;
2138 }
2139 COPY_BUF(l,buf,len,c);
2140 NEXTL(l);
2141 c = CUR_CHAR(l);
2142 if (len >= XML_MAX_NAMELEN) {
2143 /*
2144 * Okay someone managed to make a huge token, so he's ready to pay
2145 * for the processing speed.
2146 */
2147 xmlChar *buffer;
2148 int max = len * 2;
2149
2150 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2151 if (buffer == NULL) {
2152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2153 ctxt->sax->error(ctxt->userData,
2154 "xmlParseNmtoken: out of memory\n");
2155 return(NULL);
2156 }
2157 memcpy(buffer, buf, len);
2158 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2159 (c == '.') || (c == '-') ||
2160 (c == '_') || (c == ':') ||
2161 (IS_COMBINING(c)) ||
2162 (IS_EXTENDER(c))) {
2163 if (count++ > 100) {
2164 count = 0;
2165 GROW;
2166 }
2167 if (len + 10 > max) {
2168 max *= 2;
2169 buffer = (xmlChar *) xmlRealloc(buffer,
2170 max * sizeof(xmlChar));
2171 if (buffer == NULL) {
2172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2173 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002174 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002175 return(NULL);
2176 }
2177 }
2178 COPY_BUF(l,buffer,len,c);
2179 NEXTL(l);
2180 c = CUR_CHAR(l);
2181 }
2182 buffer[len] = 0;
2183 return(buffer);
2184 }
2185 }
2186 if (len == 0)
2187 return(NULL);
2188 return(xmlStrndup(buf, len));
2189}
2190
2191/**
2192 * xmlParseEntityValue:
2193 * @ctxt: an XML parser context
2194 * @orig: if non-NULL store a copy of the original entity value
2195 *
2196 * parse a value for ENTITY declarations
2197 *
2198 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2199 * "'" ([^%&'] | PEReference | Reference)* "'"
2200 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002201 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002202 */
2203
2204xmlChar *
2205xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2206 xmlChar *buf = NULL;
2207 int len = 0;
2208 int size = XML_PARSER_BUFFER_SIZE;
2209 int c, l;
2210 xmlChar stop;
2211 xmlChar *ret = NULL;
2212 const xmlChar *cur = NULL;
2213 xmlParserInputPtr input;
2214
2215 if (RAW == '"') stop = '"';
2216 else if (RAW == '\'') stop = '\'';
2217 else {
2218 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2220 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002223 return(NULL);
2224 }
2225 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2226 if (buf == NULL) {
2227 xmlGenericError(xmlGenericErrorContext,
2228 "malloc of %d byte failed\n", size);
2229 return(NULL);
2230 }
2231
2232 /*
2233 * The content of the entity definition is copied in a buffer.
2234 */
2235
2236 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2237 input = ctxt->input;
2238 GROW;
2239 NEXT;
2240 c = CUR_CHAR(l);
2241 /*
2242 * NOTE: 4.4.5 Included in Literal
2243 * When a parameter entity reference appears in a literal entity
2244 * value, ... a single or double quote character in the replacement
2245 * text is always treated as a normal data character and will not
2246 * terminate the literal.
2247 * In practice it means we stop the loop only when back at parsing
2248 * the initial entity and the quote is found
2249 */
2250 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2251 (ctxt->input != input))) {
2252 if (len + 5 >= size) {
2253 size *= 2;
2254 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2255 if (buf == NULL) {
2256 xmlGenericError(xmlGenericErrorContext,
2257 "realloc of %d byte failed\n", size);
2258 return(NULL);
2259 }
2260 }
2261 COPY_BUF(l,buf,len,c);
2262 NEXTL(l);
2263 /*
2264 * Pop-up of finished entities.
2265 */
2266 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2267 xmlPopInput(ctxt);
2268
2269 GROW;
2270 c = CUR_CHAR(l);
2271 if (c == 0) {
2272 GROW;
2273 c = CUR_CHAR(l);
2274 }
2275 }
2276 buf[len] = 0;
2277
2278 /*
2279 * Raise problem w.r.t. '&' and '%' being used in non-entities
2280 * reference constructs. Note Charref will be handled in
2281 * xmlStringDecodeEntities()
2282 */
2283 cur = buf;
2284 while (*cur != 0) { /* non input consuming */
2285 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2286 xmlChar *name;
2287 xmlChar tmp = *cur;
2288
2289 cur++;
2290 name = xmlParseStringName(ctxt, &cur);
2291 if ((name == NULL) || (*cur != ';')) {
2292 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2294 ctxt->sax->error(ctxt->userData,
2295 "EntityValue: '%c' forbidden except for entities references\n",
2296 tmp);
2297 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002298 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002299 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002300 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2301 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002302 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2304 ctxt->sax->error(ctxt->userData,
2305 "EntityValue: PEReferences forbidden in internal subset\n",
2306 tmp);
2307 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002308 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002309 }
2310 if (name != NULL)
2311 xmlFree(name);
2312 }
2313 cur++;
2314 }
2315
2316 /*
2317 * Then PEReference entities are substituted.
2318 */
2319 if (c != stop) {
2320 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2322 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2323 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002324 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002325 xmlFree(buf);
2326 } else {
2327 NEXT;
2328 /*
2329 * NOTE: 4.4.7 Bypassed
2330 * When a general entity reference appears in the EntityValue in
2331 * an entity declaration, it is bypassed and left as is.
2332 * so XML_SUBSTITUTE_REF is not set here.
2333 */
2334 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2335 0, 0, 0);
2336 if (orig != NULL)
2337 *orig = buf;
2338 else
2339 xmlFree(buf);
2340 }
2341
2342 return(ret);
2343}
2344
2345/**
2346 * xmlParseAttValue:
2347 * @ctxt: an XML parser context
2348 *
2349 * parse a value for an attribute
2350 * Note: the parser won't do substitution of entities here, this
2351 * will be handled later in xmlStringGetNodeList
2352 *
2353 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2354 * "'" ([^<&'] | Reference)* "'"
2355 *
2356 * 3.3.3 Attribute-Value Normalization:
2357 * Before the value of an attribute is passed to the application or
2358 * checked for validity, the XML processor must normalize it as follows:
2359 * - a character reference is processed by appending the referenced
2360 * character to the attribute value
2361 * - an entity reference is processed by recursively processing the
2362 * replacement text of the entity
2363 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2364 * appending #x20 to the normalized value, except that only a single
2365 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2366 * parsed entity or the literal entity value of an internal parsed entity
2367 * - other characters are processed by appending them to the normalized value
2368 * If the declared value is not CDATA, then the XML processor must further
2369 * process the normalized attribute value by discarding any leading and
2370 * trailing space (#x20) characters, and by replacing sequences of space
2371 * (#x20) characters by a single space (#x20) character.
2372 * All attributes for which no declaration has been read should be treated
2373 * by a non-validating parser as if declared CDATA.
2374 *
2375 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2376 */
2377
2378xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002379xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2380
2381xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002382xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2383 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002384 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002385 xmlChar *ret = NULL;
2386 SHRINK;
2387 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002388 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002389 if (*in != '"' && *in != '\'') {
2390 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2392 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2393 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002394 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002395 return(NULL);
2396 }
2397 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2398 limit = *in;
2399 ++in;
2400
2401 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2402 *in != '&' && *in != '<'
2403 ) {
2404 ++in;
2405 }
2406 if (*in != limit) {
2407 return xmlParseAttValueComplex(ctxt);
2408 }
2409 ++in;
2410 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2411 CUR_PTR = in;
2412 return ret;
2413}
2414
Daniel Veillard01c13b52002-12-10 15:19:08 +00002415/**
2416 * xmlParseAttValueComplex:
2417 * @ctxt: an XML parser context
2418 *
2419 * parse a value for an attribute, this is the fallback function
2420 * of xmlParseAttValue() when the attribute parsing requires handling
2421 * of non-ASCII characters.
2422 *
2423 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2424 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002425xmlChar *
2426xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2427 xmlChar limit = 0;
2428 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002429 int len = 0;
2430 int buf_size = 0;
2431 int c, l;
2432 xmlChar *current = NULL;
2433 xmlEntityPtr ent;
2434
2435
2436 SHRINK;
2437 if (NXT(0) == '"') {
2438 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2439 limit = '"';
2440 NEXT;
2441 } else if (NXT(0) == '\'') {
2442 limit = '\'';
2443 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2444 NEXT;
2445 } else {
2446 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2448 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2449 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002450 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002451 return(NULL);
2452 }
2453
2454 /*
2455 * allocate a translation buffer.
2456 */
2457 buf_size = XML_PARSER_BUFFER_SIZE;
2458 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2459 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002460 xmlGenericError(xmlGenericErrorContext,
2461 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002462 return(NULL);
2463 }
2464
2465 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002466 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002467 */
2468 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002469 while ((NXT(0) != limit) && /* checked */
2470 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002471 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002472 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002473 if (NXT(1) == '#') {
2474 int val = xmlParseCharRef(ctxt);
2475 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002476 if (ctxt->replaceEntities) {
2477 if (len > buf_size - 10) {
2478 growBuffer(buf);
2479 }
2480 buf[len++] = '&';
2481 } else {
2482 /*
2483 * The reparsing will be done in xmlStringGetNodeList()
2484 * called by the attribute() function in SAX.c
2485 */
2486 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002487
Daniel Veillard319a7422001-09-11 09:27:09 +00002488 if (len > buf_size - 10) {
2489 growBuffer(buf);
2490 }
2491 current = &buffer[0];
2492 while (*current != 0) { /* non input consuming */
2493 buf[len++] = *current++;
2494 }
Owen Taylor3473f882001-02-23 17:55:21 +00002495 }
2496 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002497 if (len > buf_size - 10) {
2498 growBuffer(buf);
2499 }
Owen Taylor3473f882001-02-23 17:55:21 +00002500 len += xmlCopyChar(0, &buf[len], val);
2501 }
2502 } else {
2503 ent = xmlParseEntityRef(ctxt);
2504 if ((ent != NULL) &&
2505 (ctxt->replaceEntities != 0)) {
2506 xmlChar *rep;
2507
2508 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2509 rep = xmlStringDecodeEntities(ctxt, ent->content,
2510 XML_SUBSTITUTE_REF, 0, 0, 0);
2511 if (rep != NULL) {
2512 current = rep;
2513 while (*current != 0) { /* non input consuming */
2514 buf[len++] = *current++;
2515 if (len > buf_size - 10) {
2516 growBuffer(buf);
2517 }
2518 }
2519 xmlFree(rep);
2520 }
2521 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002522 if (len > buf_size - 10) {
2523 growBuffer(buf);
2524 }
Owen Taylor3473f882001-02-23 17:55:21 +00002525 if (ent->content != NULL)
2526 buf[len++] = ent->content[0];
2527 }
2528 } else if (ent != NULL) {
2529 int i = xmlStrlen(ent->name);
2530 const xmlChar *cur = ent->name;
2531
2532 /*
2533 * This may look absurd but is needed to detect
2534 * entities problems
2535 */
2536 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2537 (ent->content != NULL)) {
2538 xmlChar *rep;
2539 rep = xmlStringDecodeEntities(ctxt, ent->content,
2540 XML_SUBSTITUTE_REF, 0, 0, 0);
2541 if (rep != NULL)
2542 xmlFree(rep);
2543 }
2544
2545 /*
2546 * Just output the reference
2547 */
2548 buf[len++] = '&';
2549 if (len > buf_size - i - 10) {
2550 growBuffer(buf);
2551 }
2552 for (;i > 0;i--)
2553 buf[len++] = *cur++;
2554 buf[len++] = ';';
2555 }
2556 }
2557 } else {
2558 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2559 COPY_BUF(l,buf,len,0x20);
2560 if (len > buf_size - 10) {
2561 growBuffer(buf);
2562 }
2563 } else {
2564 COPY_BUF(l,buf,len,c);
2565 if (len > buf_size - 10) {
2566 growBuffer(buf);
2567 }
2568 }
2569 NEXTL(l);
2570 }
2571 GROW;
2572 c = CUR_CHAR(l);
2573 }
2574 buf[len++] = 0;
2575 if (RAW == '<') {
2576 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2578 ctxt->sax->error(ctxt->userData,
2579 "Unescaped '<' not allowed in attributes values\n");
2580 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002581 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002582 } else if (RAW != limit) {
2583 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2585 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2586 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002587 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002588 } else
2589 NEXT;
2590 return(buf);
2591}
2592
2593/**
2594 * xmlParseSystemLiteral:
2595 * @ctxt: an XML parser context
2596 *
2597 * parse an XML Literal
2598 *
2599 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2600 *
2601 * Returns the SystemLiteral parsed or NULL
2602 */
2603
2604xmlChar *
2605xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2606 xmlChar *buf = NULL;
2607 int len = 0;
2608 int size = XML_PARSER_BUFFER_SIZE;
2609 int cur, l;
2610 xmlChar stop;
2611 int state = ctxt->instate;
2612 int count = 0;
2613
2614 SHRINK;
2615 if (RAW == '"') {
2616 NEXT;
2617 stop = '"';
2618 } else if (RAW == '\'') {
2619 NEXT;
2620 stop = '\'';
2621 } else {
2622 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2624 ctxt->sax->error(ctxt->userData,
2625 "SystemLiteral \" or ' expected\n");
2626 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002627 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002628 return(NULL);
2629 }
2630
2631 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2632 if (buf == NULL) {
2633 xmlGenericError(xmlGenericErrorContext,
2634 "malloc of %d byte failed\n", size);
2635 return(NULL);
2636 }
2637 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2638 cur = CUR_CHAR(l);
2639 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2640 if (len + 5 >= size) {
2641 size *= 2;
2642 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2643 if (buf == NULL) {
2644 xmlGenericError(xmlGenericErrorContext,
2645 "realloc of %d byte failed\n", size);
2646 ctxt->instate = (xmlParserInputState) state;
2647 return(NULL);
2648 }
2649 }
2650 count++;
2651 if (count > 50) {
2652 GROW;
2653 count = 0;
2654 }
2655 COPY_BUF(l,buf,len,cur);
2656 NEXTL(l);
2657 cur = CUR_CHAR(l);
2658 if (cur == 0) {
2659 GROW;
2660 SHRINK;
2661 cur = CUR_CHAR(l);
2662 }
2663 }
2664 buf[len] = 0;
2665 ctxt->instate = (xmlParserInputState) state;
2666 if (!IS_CHAR(cur)) {
2667 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2669 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2670 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002671 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002672 } else {
2673 NEXT;
2674 }
2675 return(buf);
2676}
2677
2678/**
2679 * xmlParsePubidLiteral:
2680 * @ctxt: an XML parser context
2681 *
2682 * parse an XML public literal
2683 *
2684 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2685 *
2686 * Returns the PubidLiteral parsed or NULL.
2687 */
2688
2689xmlChar *
2690xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2691 xmlChar *buf = NULL;
2692 int len = 0;
2693 int size = XML_PARSER_BUFFER_SIZE;
2694 xmlChar cur;
2695 xmlChar stop;
2696 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002697 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002698
2699 SHRINK;
2700 if (RAW == '"') {
2701 NEXT;
2702 stop = '"';
2703 } else if (RAW == '\'') {
2704 NEXT;
2705 stop = '\'';
2706 } else {
2707 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2709 ctxt->sax->error(ctxt->userData,
2710 "SystemLiteral \" or ' expected\n");
2711 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002712 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002713 return(NULL);
2714 }
2715 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2716 if (buf == NULL) {
2717 xmlGenericError(xmlGenericErrorContext,
2718 "malloc of %d byte failed\n", size);
2719 return(NULL);
2720 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002721 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002722 cur = CUR;
2723 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2724 if (len + 1 >= size) {
2725 size *= 2;
2726 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2727 if (buf == NULL) {
2728 xmlGenericError(xmlGenericErrorContext,
2729 "realloc of %d byte failed\n", size);
2730 return(NULL);
2731 }
2732 }
2733 buf[len++] = cur;
2734 count++;
2735 if (count > 50) {
2736 GROW;
2737 count = 0;
2738 }
2739 NEXT;
2740 cur = CUR;
2741 if (cur == 0) {
2742 GROW;
2743 SHRINK;
2744 cur = CUR;
2745 }
2746 }
2747 buf[len] = 0;
2748 if (cur != stop) {
2749 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2751 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2752 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002753 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002754 } else {
2755 NEXT;
2756 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002757 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002758 return(buf);
2759}
2760
Daniel Veillard48b2f892001-02-25 16:11:03 +00002761void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002762/**
2763 * xmlParseCharData:
2764 * @ctxt: an XML parser context
2765 * @cdata: int indicating whether we are within a CDATA section
2766 *
2767 * parse a CharData section.
2768 * if we are within a CDATA section ']]>' marks an end of section.
2769 *
2770 * The right angle bracket (>) may be represented using the string "&gt;",
2771 * and must, for compatibility, be escaped using "&gt;" or a character
2772 * reference when it appears in the string "]]>" in content, when that
2773 * string is not marking the end of a CDATA section.
2774 *
2775 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2776 */
2777
2778void
2779xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002780 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002781 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002782 int line = ctxt->input->line;
2783 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002784
2785 SHRINK;
2786 GROW;
2787 /*
2788 * Accelerated common case where input don't need to be
2789 * modified before passing it to the handler.
2790 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002791 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002792 in = ctxt->input->cur;
2793 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002794get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002795 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2796 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002797 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002798 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002799 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002800 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002801 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002802 ctxt->input->line++;
2803 in++;
2804 }
2805 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002806 }
2807 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002808 if ((in[1] == ']') && (in[2] == '>')) {
2809 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2811 ctxt->sax->error(ctxt->userData,
2812 "Sequence ']]>' not allowed in content\n");
2813 ctxt->input->cur = in;
2814 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002815 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002816 return;
2817 }
2818 in++;
2819 goto get_more;
2820 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002821 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002822 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002823 if (IS_BLANK(*ctxt->input->cur)) {
2824 const xmlChar *tmp = ctxt->input->cur;
2825 ctxt->input->cur = in;
2826 if (areBlanks(ctxt, tmp, nbchar)) {
2827 if (ctxt->sax->ignorableWhitespace != NULL)
2828 ctxt->sax->ignorableWhitespace(ctxt->userData,
2829 tmp, nbchar);
2830 } else {
2831 if (ctxt->sax->characters != NULL)
2832 ctxt->sax->characters(ctxt->userData,
2833 tmp, nbchar);
2834 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002835 line = ctxt->input->line;
2836 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002837 } else {
2838 if (ctxt->sax->characters != NULL)
2839 ctxt->sax->characters(ctxt->userData,
2840 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002841 line = ctxt->input->line;
2842 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002843 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002844 }
2845 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002846 if (*in == 0xD) {
2847 in++;
2848 if (*in == 0xA) {
2849 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002850 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002851 ctxt->input->line++;
2852 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002853 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002854 in--;
2855 }
2856 if (*in == '<') {
2857 return;
2858 }
2859 if (*in == '&') {
2860 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002861 }
2862 SHRINK;
2863 GROW;
2864 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002865 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002866 nbchar = 0;
2867 }
Daniel Veillard50582112001-03-26 22:52:16 +00002868 ctxt->input->line = line;
2869 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002870 xmlParseCharDataComplex(ctxt, cdata);
2871}
2872
Daniel Veillard01c13b52002-12-10 15:19:08 +00002873/**
2874 * xmlParseCharDataComplex:
2875 * @ctxt: an XML parser context
2876 * @cdata: int indicating whether we are within a CDATA section
2877 *
2878 * parse a CharData section.this is the fallback function
2879 * of xmlParseCharData() when the parsing requires handling
2880 * of non-ASCII characters.
2881 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002882void
2883xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002884 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2885 int nbchar = 0;
2886 int cur, l;
2887 int count = 0;
2888
2889 SHRINK;
2890 GROW;
2891 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002892 while ((cur != '<') && /* checked */
2893 (cur != '&') &&
2894 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002895 if ((cur == ']') && (NXT(1) == ']') &&
2896 (NXT(2) == '>')) {
2897 if (cdata) break;
2898 else {
2899 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2901 ctxt->sax->error(ctxt->userData,
2902 "Sequence ']]>' not allowed in content\n");
2903 /* Should this be relaxed ??? I see a "must here */
2904 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002905 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002906 }
2907 }
2908 COPY_BUF(l,buf,nbchar,cur);
2909 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2910 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002911 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002912 */
2913 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2914 if (areBlanks(ctxt, buf, nbchar)) {
2915 if (ctxt->sax->ignorableWhitespace != NULL)
2916 ctxt->sax->ignorableWhitespace(ctxt->userData,
2917 buf, nbchar);
2918 } else {
2919 if (ctxt->sax->characters != NULL)
2920 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2921 }
2922 }
2923 nbchar = 0;
2924 }
2925 count++;
2926 if (count > 50) {
2927 GROW;
2928 count = 0;
2929 }
2930 NEXTL(l);
2931 cur = CUR_CHAR(l);
2932 }
2933 if (nbchar != 0) {
2934 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002935 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002936 */
2937 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2938 if (areBlanks(ctxt, buf, nbchar)) {
2939 if (ctxt->sax->ignorableWhitespace != NULL)
2940 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2941 } else {
2942 if (ctxt->sax->characters != NULL)
2943 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2944 }
2945 }
2946 }
2947}
2948
2949/**
2950 * xmlParseExternalID:
2951 * @ctxt: an XML parser context
2952 * @publicID: a xmlChar** receiving PubidLiteral
2953 * @strict: indicate whether we should restrict parsing to only
2954 * production [75], see NOTE below
2955 *
2956 * Parse an External ID or a Public ID
2957 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002958 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002959 * 'PUBLIC' S PubidLiteral S SystemLiteral
2960 *
2961 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2962 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2963 *
2964 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2965 *
2966 * Returns the function returns SystemLiteral and in the second
2967 * case publicID receives PubidLiteral, is strict is off
2968 * it is possible to return NULL and have publicID set.
2969 */
2970
2971xmlChar *
2972xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2973 xmlChar *URI = NULL;
2974
2975 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002976
2977 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002978 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2979 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2980 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2981 SKIP(6);
2982 if (!IS_BLANK(CUR)) {
2983 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2985 ctxt->sax->error(ctxt->userData,
2986 "Space required after 'SYSTEM'\n");
2987 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002988 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002989 }
2990 SKIP_BLANKS;
2991 URI = xmlParseSystemLiteral(ctxt);
2992 if (URI == NULL) {
2993 ctxt->errNo = XML_ERR_URI_REQUIRED;
2994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2995 ctxt->sax->error(ctxt->userData,
2996 "xmlParseExternalID: SYSTEM, no URI\n");
2997 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002998 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002999 }
3000 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3001 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3002 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3003 SKIP(6);
3004 if (!IS_BLANK(CUR)) {
3005 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3007 ctxt->sax->error(ctxt->userData,
3008 "Space required after 'PUBLIC'\n");
3009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003011 }
3012 SKIP_BLANKS;
3013 *publicID = xmlParsePubidLiteral(ctxt);
3014 if (*publicID == NULL) {
3015 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
3016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3017 ctxt->sax->error(ctxt->userData,
3018 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
3019 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003020 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003021 }
3022 if (strict) {
3023 /*
3024 * We don't handle [83] so "S SystemLiteral" is required.
3025 */
3026 if (!IS_BLANK(CUR)) {
3027 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3029 ctxt->sax->error(ctxt->userData,
3030 "Space required after the Public Identifier\n");
3031 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003032 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003033 }
3034 } else {
3035 /*
3036 * We handle [83] so we return immediately, if
3037 * "S SystemLiteral" is not detected. From a purely parsing
3038 * point of view that's a nice mess.
3039 */
3040 const xmlChar *ptr;
3041 GROW;
3042
3043 ptr = CUR_PTR;
3044 if (!IS_BLANK(*ptr)) return(NULL);
3045
3046 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3047 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3048 }
3049 SKIP_BLANKS;
3050 URI = xmlParseSystemLiteral(ctxt);
3051 if (URI == NULL) {
3052 ctxt->errNo = XML_ERR_URI_REQUIRED;
3053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3054 ctxt->sax->error(ctxt->userData,
3055 "xmlParseExternalID: PUBLIC, no URI\n");
3056 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003058 }
3059 }
3060 return(URI);
3061}
3062
3063/**
3064 * xmlParseComment:
3065 * @ctxt: an XML parser context
3066 *
3067 * Skip an XML (SGML) comment <!-- .... -->
3068 * The spec says that "For compatibility, the string "--" (double-hyphen)
3069 * must not occur within comments. "
3070 *
3071 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3072 */
3073void
3074xmlParseComment(xmlParserCtxtPtr ctxt) {
3075 xmlChar *buf = NULL;
3076 int len;
3077 int size = XML_PARSER_BUFFER_SIZE;
3078 int q, ql;
3079 int r, rl;
3080 int cur, l;
3081 xmlParserInputState state;
3082 xmlParserInputPtr input = ctxt->input;
3083 int count = 0;
3084
3085 /*
3086 * Check that there is a comment right here.
3087 */
3088 if ((RAW != '<') || (NXT(1) != '!') ||
3089 (NXT(2) != '-') || (NXT(3) != '-')) return;
3090
3091 state = ctxt->instate;
3092 ctxt->instate = XML_PARSER_COMMENT;
3093 SHRINK;
3094 SKIP(4);
3095 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3096 if (buf == NULL) {
3097 xmlGenericError(xmlGenericErrorContext,
3098 "malloc of %d byte failed\n", size);
3099 ctxt->instate = state;
3100 return;
3101 }
3102 q = CUR_CHAR(ql);
3103 NEXTL(ql);
3104 r = CUR_CHAR(rl);
3105 NEXTL(rl);
3106 cur = CUR_CHAR(l);
3107 len = 0;
3108 while (IS_CHAR(cur) && /* checked */
3109 ((cur != '>') ||
3110 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003111 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003112 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3114 ctxt->sax->error(ctxt->userData,
3115 "Comment must not contain '--' (double-hyphen)`\n");
3116 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003117 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003118 }
3119 if (len + 5 >= size) {
3120 size *= 2;
3121 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3122 if (buf == NULL) {
3123 xmlGenericError(xmlGenericErrorContext,
3124 "realloc of %d byte failed\n", size);
3125 ctxt->instate = state;
3126 return;
3127 }
3128 }
3129 COPY_BUF(ql,buf,len,q);
3130 q = r;
3131 ql = rl;
3132 r = cur;
3133 rl = l;
3134
3135 count++;
3136 if (count > 50) {
3137 GROW;
3138 count = 0;
3139 }
3140 NEXTL(l);
3141 cur = CUR_CHAR(l);
3142 if (cur == 0) {
3143 SHRINK;
3144 GROW;
3145 cur = CUR_CHAR(l);
3146 }
3147 }
3148 buf[len] = 0;
3149 if (!IS_CHAR(cur)) {
3150 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3152 ctxt->sax->error(ctxt->userData,
3153 "Comment not terminated \n<!--%.50s\n", buf);
3154 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003155 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003156 xmlFree(buf);
3157 } else {
3158 if (input != ctxt->input) {
3159 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3161 ctxt->sax->error(ctxt->userData,
3162"Comment doesn't start and stop in the same entity\n");
3163 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003164 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003165 }
3166 NEXT;
3167 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3168 (!ctxt->disableSAX))
3169 ctxt->sax->comment(ctxt->userData, buf);
3170 xmlFree(buf);
3171 }
3172 ctxt->instate = state;
3173}
3174
3175/**
3176 * xmlParsePITarget:
3177 * @ctxt: an XML parser context
3178 *
3179 * parse the name of a PI
3180 *
3181 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3182 *
3183 * Returns the PITarget name or NULL
3184 */
3185
3186xmlChar *
3187xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3188 xmlChar *name;
3189
3190 name = xmlParseName(ctxt);
3191 if ((name != NULL) &&
3192 ((name[0] == 'x') || (name[0] == 'X')) &&
3193 ((name[1] == 'm') || (name[1] == 'M')) &&
3194 ((name[2] == 'l') || (name[2] == 'L'))) {
3195 int i;
3196 if ((name[0] == 'x') && (name[1] == 'm') &&
3197 (name[2] == 'l') && (name[3] == 0)) {
3198 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3200 ctxt->sax->error(ctxt->userData,
3201 "XML declaration allowed only at the start of the document\n");
3202 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003203 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003204 return(name);
3205 } else if (name[3] == 0) {
3206 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3208 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3209 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003211 return(name);
3212 }
3213 for (i = 0;;i++) {
3214 if (xmlW3CPIs[i] == NULL) break;
3215 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3216 return(name);
3217 }
3218 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3219 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3220 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003221 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003222 }
3223 }
3224 return(name);
3225}
3226
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003227#ifdef LIBXML_CATALOG_ENABLED
3228/**
3229 * xmlParseCatalogPI:
3230 * @ctxt: an XML parser context
3231 * @catalog: the PI value string
3232 *
3233 * parse an XML Catalog Processing Instruction.
3234 *
3235 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3236 *
3237 * Occurs only if allowed by the user and if happening in the Misc
3238 * part of the document before any doctype informations
3239 * This will add the given catalog to the parsing context in order
3240 * to be used if there is a resolution need further down in the document
3241 */
3242
3243static void
3244xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3245 xmlChar *URL = NULL;
3246 const xmlChar *tmp, *base;
3247 xmlChar marker;
3248
3249 tmp = catalog;
3250 while (IS_BLANK(*tmp)) tmp++;
3251 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3252 goto error;
3253 tmp += 7;
3254 while (IS_BLANK(*tmp)) tmp++;
3255 if (*tmp != '=') {
3256 return;
3257 }
3258 tmp++;
3259 while (IS_BLANK(*tmp)) tmp++;
3260 marker = *tmp;
3261 if ((marker != '\'') && (marker != '"'))
3262 goto error;
3263 tmp++;
3264 base = tmp;
3265 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3266 if (*tmp == 0)
3267 goto error;
3268 URL = xmlStrndup(base, tmp - base);
3269 tmp++;
3270 while (IS_BLANK(*tmp)) tmp++;
3271 if (*tmp != 0)
3272 goto error;
3273
3274 if (URL != NULL) {
3275 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3276 xmlFree(URL);
3277 }
3278 return;
3279
3280error:
3281 ctxt->errNo = XML_WAR_CATALOG_PI;
3282 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3283 ctxt->sax->warning(ctxt->userData,
3284 "Catalog PI syntax error: %s\n", catalog);
3285 if (URL != NULL)
3286 xmlFree(URL);
3287}
3288#endif
3289
Owen Taylor3473f882001-02-23 17:55:21 +00003290/**
3291 * xmlParsePI:
3292 * @ctxt: an XML parser context
3293 *
3294 * parse an XML Processing Instruction.
3295 *
3296 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3297 *
3298 * The processing is transfered to SAX once parsed.
3299 */
3300
3301void
3302xmlParsePI(xmlParserCtxtPtr ctxt) {
3303 xmlChar *buf = NULL;
3304 int len = 0;
3305 int size = XML_PARSER_BUFFER_SIZE;
3306 int cur, l;
3307 xmlChar *target;
3308 xmlParserInputState state;
3309 int count = 0;
3310
3311 if ((RAW == '<') && (NXT(1) == '?')) {
3312 xmlParserInputPtr input = ctxt->input;
3313 state = ctxt->instate;
3314 ctxt->instate = XML_PARSER_PI;
3315 /*
3316 * this is a Processing Instruction.
3317 */
3318 SKIP(2);
3319 SHRINK;
3320
3321 /*
3322 * Parse the target name and check for special support like
3323 * namespace.
3324 */
3325 target = xmlParsePITarget(ctxt);
3326 if (target != NULL) {
3327 if ((RAW == '?') && (NXT(1) == '>')) {
3328 if (input != ctxt->input) {
3329 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3331 ctxt->sax->error(ctxt->userData,
3332 "PI declaration doesn't start and stop in the same entity\n");
3333 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003334 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003335 }
3336 SKIP(2);
3337
3338 /*
3339 * SAX: PI detected.
3340 */
3341 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3342 (ctxt->sax->processingInstruction != NULL))
3343 ctxt->sax->processingInstruction(ctxt->userData,
3344 target, NULL);
3345 ctxt->instate = state;
3346 xmlFree(target);
3347 return;
3348 }
3349 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3350 if (buf == NULL) {
3351 xmlGenericError(xmlGenericErrorContext,
3352 "malloc of %d byte failed\n", size);
3353 ctxt->instate = state;
3354 return;
3355 }
3356 cur = CUR;
3357 if (!IS_BLANK(cur)) {
3358 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3360 ctxt->sax->error(ctxt->userData,
3361 "xmlParsePI: PI %s space expected\n", target);
3362 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003363 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003364 }
3365 SKIP_BLANKS;
3366 cur = CUR_CHAR(l);
3367 while (IS_CHAR(cur) && /* checked */
3368 ((cur != '?') || (NXT(1) != '>'))) {
3369 if (len + 5 >= size) {
3370 size *= 2;
3371 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3372 if (buf == NULL) {
3373 xmlGenericError(xmlGenericErrorContext,
3374 "realloc of %d byte failed\n", size);
3375 ctxt->instate = state;
3376 return;
3377 }
3378 }
3379 count++;
3380 if (count > 50) {
3381 GROW;
3382 count = 0;
3383 }
3384 COPY_BUF(l,buf,len,cur);
3385 NEXTL(l);
3386 cur = CUR_CHAR(l);
3387 if (cur == 0) {
3388 SHRINK;
3389 GROW;
3390 cur = CUR_CHAR(l);
3391 }
3392 }
3393 buf[len] = 0;
3394 if (cur != '?') {
3395 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3397 ctxt->sax->error(ctxt->userData,
3398 "xmlParsePI: PI %s never end ...\n", target);
3399 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003400 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003401 } else {
3402 if (input != ctxt->input) {
3403 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3405 ctxt->sax->error(ctxt->userData,
3406 "PI declaration doesn't start and stop in the same entity\n");
3407 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003408 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003409 }
3410 SKIP(2);
3411
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003412#ifdef LIBXML_CATALOG_ENABLED
3413 if (((state == XML_PARSER_MISC) ||
3414 (state == XML_PARSER_START)) &&
3415 (xmlStrEqual(target, XML_CATALOG_PI))) {
3416 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3417 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3418 (allow == XML_CATA_ALLOW_ALL))
3419 xmlParseCatalogPI(ctxt, buf);
3420 }
3421#endif
3422
3423
Owen Taylor3473f882001-02-23 17:55:21 +00003424 /*
3425 * SAX: PI detected.
3426 */
3427 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3428 (ctxt->sax->processingInstruction != NULL))
3429 ctxt->sax->processingInstruction(ctxt->userData,
3430 target, buf);
3431 }
3432 xmlFree(buf);
3433 xmlFree(target);
3434 } else {
3435 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3437 ctxt->sax->error(ctxt->userData,
3438 "xmlParsePI : no target name\n");
3439 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003440 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003441 }
3442 ctxt->instate = state;
3443 }
3444}
3445
3446/**
3447 * xmlParseNotationDecl:
3448 * @ctxt: an XML parser context
3449 *
3450 * parse a notation declaration
3451 *
3452 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3453 *
3454 * Hence there is actually 3 choices:
3455 * 'PUBLIC' S PubidLiteral
3456 * 'PUBLIC' S PubidLiteral S SystemLiteral
3457 * and 'SYSTEM' S SystemLiteral
3458 *
3459 * See the NOTE on xmlParseExternalID().
3460 */
3461
3462void
3463xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3464 xmlChar *name;
3465 xmlChar *Pubid;
3466 xmlChar *Systemid;
3467
3468 if ((RAW == '<') && (NXT(1) == '!') &&
3469 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3470 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3471 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3472 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3473 xmlParserInputPtr input = ctxt->input;
3474 SHRINK;
3475 SKIP(10);
3476 if (!IS_BLANK(CUR)) {
3477 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3479 ctxt->sax->error(ctxt->userData,
3480 "Space required after '<!NOTATION'\n");
3481 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003482 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003483 return;
3484 }
3485 SKIP_BLANKS;
3486
Daniel Veillard76d66f42001-05-16 21:05:17 +00003487 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003488 if (name == NULL) {
3489 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3491 ctxt->sax->error(ctxt->userData,
3492 "NOTATION: Name expected here\n");
3493 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003494 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003495 return;
3496 }
3497 if (!IS_BLANK(CUR)) {
3498 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3500 ctxt->sax->error(ctxt->userData,
3501 "Space required after the NOTATION name'\n");
3502 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003503 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003504 return;
3505 }
3506 SKIP_BLANKS;
3507
3508 /*
3509 * Parse the IDs.
3510 */
3511 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3512 SKIP_BLANKS;
3513
3514 if (RAW == '>') {
3515 if (input != ctxt->input) {
3516 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3518 ctxt->sax->error(ctxt->userData,
3519"Notation declaration doesn't start and stop in the same entity\n");
3520 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003521 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003522 }
3523 NEXT;
3524 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3525 (ctxt->sax->notationDecl != NULL))
3526 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3527 } else {
3528 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3530 ctxt->sax->error(ctxt->userData,
3531 "'>' required to close NOTATION declaration\n");
3532 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003533 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003534 }
3535 xmlFree(name);
3536 if (Systemid != NULL) xmlFree(Systemid);
3537 if (Pubid != NULL) xmlFree(Pubid);
3538 }
3539}
3540
3541/**
3542 * xmlParseEntityDecl:
3543 * @ctxt: an XML parser context
3544 *
3545 * parse <!ENTITY declarations
3546 *
3547 * [70] EntityDecl ::= GEDecl | PEDecl
3548 *
3549 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3550 *
3551 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3552 *
3553 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3554 *
3555 * [74] PEDef ::= EntityValue | ExternalID
3556 *
3557 * [76] NDataDecl ::= S 'NDATA' S Name
3558 *
3559 * [ VC: Notation Declared ]
3560 * The Name must match the declared name of a notation.
3561 */
3562
3563void
3564xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3565 xmlChar *name = NULL;
3566 xmlChar *value = NULL;
3567 xmlChar *URI = NULL, *literal = NULL;
3568 xmlChar *ndata = NULL;
3569 int isParameter = 0;
3570 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003571 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003572
3573 GROW;
3574 if ((RAW == '<') && (NXT(1) == '!') &&
3575 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3576 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3577 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3578 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003579 SHRINK;
3580 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003581 skipped = SKIP_BLANKS;
3582 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003583 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3585 ctxt->sax->error(ctxt->userData,
3586 "Space required after '<!ENTITY'\n");
3587 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003588 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003589 }
Owen Taylor3473f882001-02-23 17:55:21 +00003590
3591 if (RAW == '%') {
3592 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003593 skipped = SKIP_BLANKS;
3594 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003595 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3597 ctxt->sax->error(ctxt->userData,
3598 "Space required after '%'\n");
3599 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003600 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003601 }
Owen Taylor3473f882001-02-23 17:55:21 +00003602 isParameter = 1;
3603 }
3604
Daniel Veillard76d66f42001-05-16 21:05:17 +00003605 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003606 if (name == NULL) {
3607 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3609 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3610 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003611 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003612 return;
3613 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003614 skipped = SKIP_BLANKS;
3615 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003616 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3618 ctxt->sax->error(ctxt->userData,
3619 "Space required after the entity name\n");
3620 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003621 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003622 }
Owen Taylor3473f882001-02-23 17:55:21 +00003623
Daniel Veillardf5582f12002-06-11 10:08:16 +00003624 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003625 /*
3626 * handle the various case of definitions...
3627 */
3628 if (isParameter) {
3629 if ((RAW == '"') || (RAW == '\'')) {
3630 value = xmlParseEntityValue(ctxt, &orig);
3631 if (value) {
3632 if ((ctxt->sax != NULL) &&
3633 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3634 ctxt->sax->entityDecl(ctxt->userData, name,
3635 XML_INTERNAL_PARAMETER_ENTITY,
3636 NULL, NULL, value);
3637 }
3638 } else {
3639 URI = xmlParseExternalID(ctxt, &literal, 1);
3640 if ((URI == NULL) && (literal == NULL)) {
3641 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3643 ctxt->sax->error(ctxt->userData,
3644 "Entity value required\n");
3645 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003646 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003647 }
3648 if (URI) {
3649 xmlURIPtr uri;
3650
3651 uri = xmlParseURI((const char *) URI);
3652 if (uri == NULL) {
3653 ctxt->errNo = XML_ERR_INVALID_URI;
3654 if ((ctxt->sax != NULL) &&
3655 (!ctxt->disableSAX) &&
3656 (ctxt->sax->error != NULL))
3657 ctxt->sax->error(ctxt->userData,
3658 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003659 /*
3660 * This really ought to be a well formedness error
3661 * but the XML Core WG decided otherwise c.f. issue
3662 * E26 of the XML erratas.
3663 */
Owen Taylor3473f882001-02-23 17:55:21 +00003664 } else {
3665 if (uri->fragment != NULL) {
3666 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3667 if ((ctxt->sax != NULL) &&
3668 (!ctxt->disableSAX) &&
3669 (ctxt->sax->error != NULL))
3670 ctxt->sax->error(ctxt->userData,
3671 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003672 /*
3673 * Okay this is foolish to block those but not
3674 * invalid URIs.
3675 */
Owen Taylor3473f882001-02-23 17:55:21 +00003676 ctxt->wellFormed = 0;
3677 } else {
3678 if ((ctxt->sax != NULL) &&
3679 (!ctxt->disableSAX) &&
3680 (ctxt->sax->entityDecl != NULL))
3681 ctxt->sax->entityDecl(ctxt->userData, name,
3682 XML_EXTERNAL_PARAMETER_ENTITY,
3683 literal, URI, NULL);
3684 }
3685 xmlFreeURI(uri);
3686 }
3687 }
3688 }
3689 } else {
3690 if ((RAW == '"') || (RAW == '\'')) {
3691 value = xmlParseEntityValue(ctxt, &orig);
3692 if ((ctxt->sax != NULL) &&
3693 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3694 ctxt->sax->entityDecl(ctxt->userData, name,
3695 XML_INTERNAL_GENERAL_ENTITY,
3696 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003697 /*
3698 * For expat compatibility in SAX mode.
3699 */
3700 if ((ctxt->myDoc == NULL) ||
3701 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3702 if (ctxt->myDoc == NULL) {
3703 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3704 }
3705 if (ctxt->myDoc->intSubset == NULL)
3706 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3707 BAD_CAST "fake", NULL, NULL);
3708
3709 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3710 NULL, NULL, value);
3711 }
Owen Taylor3473f882001-02-23 17:55:21 +00003712 } else {
3713 URI = xmlParseExternalID(ctxt, &literal, 1);
3714 if ((URI == NULL) && (literal == NULL)) {
3715 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3717 ctxt->sax->error(ctxt->userData,
3718 "Entity value required\n");
3719 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003720 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003721 }
3722 if (URI) {
3723 xmlURIPtr uri;
3724
3725 uri = xmlParseURI((const char *)URI);
3726 if (uri == NULL) {
3727 ctxt->errNo = XML_ERR_INVALID_URI;
3728 if ((ctxt->sax != NULL) &&
3729 (!ctxt->disableSAX) &&
3730 (ctxt->sax->error != NULL))
3731 ctxt->sax->error(ctxt->userData,
3732 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003733 /*
3734 * This really ought to be a well formedness error
3735 * but the XML Core WG decided otherwise c.f. issue
3736 * E26 of the XML erratas.
3737 */
Owen Taylor3473f882001-02-23 17:55:21 +00003738 } else {
3739 if (uri->fragment != NULL) {
3740 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3741 if ((ctxt->sax != NULL) &&
3742 (!ctxt->disableSAX) &&
3743 (ctxt->sax->error != NULL))
3744 ctxt->sax->error(ctxt->userData,
3745 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003746 /*
3747 * Okay this is foolish to block those but not
3748 * invalid URIs.
3749 */
Owen Taylor3473f882001-02-23 17:55:21 +00003750 ctxt->wellFormed = 0;
3751 }
3752 xmlFreeURI(uri);
3753 }
3754 }
3755 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3756 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3758 ctxt->sax->error(ctxt->userData,
3759 "Space required before 'NDATA'\n");
3760 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003761 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003762 }
3763 SKIP_BLANKS;
3764 if ((RAW == 'N') && (NXT(1) == 'D') &&
3765 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3766 (NXT(4) == 'A')) {
3767 SKIP(5);
3768 if (!IS_BLANK(CUR)) {
3769 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3771 ctxt->sax->error(ctxt->userData,
3772 "Space required after 'NDATA'\n");
3773 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003774 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003775 }
3776 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003777 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003778 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3779 (ctxt->sax->unparsedEntityDecl != NULL))
3780 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3781 literal, URI, ndata);
3782 } else {
3783 if ((ctxt->sax != NULL) &&
3784 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3785 ctxt->sax->entityDecl(ctxt->userData, name,
3786 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3787 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003788 /*
3789 * For expat compatibility in SAX mode.
3790 * assuming the entity repalcement was asked for
3791 */
3792 if ((ctxt->replaceEntities != 0) &&
3793 ((ctxt->myDoc == NULL) ||
3794 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3795 if (ctxt->myDoc == NULL) {
3796 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3797 }
3798
3799 if (ctxt->myDoc->intSubset == NULL)
3800 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3801 BAD_CAST "fake", NULL, NULL);
3802 entityDecl(ctxt, name,
3803 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3804 literal, URI, NULL);
3805 }
Owen Taylor3473f882001-02-23 17:55:21 +00003806 }
3807 }
3808 }
3809 SKIP_BLANKS;
3810 if (RAW != '>') {
3811 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3813 ctxt->sax->error(ctxt->userData,
3814 "xmlParseEntityDecl: entity %s not terminated\n", name);
3815 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003816 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003817 } else {
3818 if (input != ctxt->input) {
3819 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3821 ctxt->sax->error(ctxt->userData,
3822"Entity declaration doesn't start and stop in the same entity\n");
3823 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003824 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003825 }
3826 NEXT;
3827 }
3828 if (orig != NULL) {
3829 /*
3830 * Ugly mechanism to save the raw entity value.
3831 */
3832 xmlEntityPtr cur = NULL;
3833
3834 if (isParameter) {
3835 if ((ctxt->sax != NULL) &&
3836 (ctxt->sax->getParameterEntity != NULL))
3837 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3838 } else {
3839 if ((ctxt->sax != NULL) &&
3840 (ctxt->sax->getEntity != NULL))
3841 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003842 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3843 cur = getEntity(ctxt, name);
3844 }
Owen Taylor3473f882001-02-23 17:55:21 +00003845 }
3846 if (cur != NULL) {
3847 if (cur->orig != NULL)
3848 xmlFree(orig);
3849 else
3850 cur->orig = orig;
3851 } else
3852 xmlFree(orig);
3853 }
3854 if (name != NULL) xmlFree(name);
3855 if (value != NULL) xmlFree(value);
3856 if (URI != NULL) xmlFree(URI);
3857 if (literal != NULL) xmlFree(literal);
3858 if (ndata != NULL) xmlFree(ndata);
3859 }
3860}
3861
3862/**
3863 * xmlParseDefaultDecl:
3864 * @ctxt: an XML parser context
3865 * @value: Receive a possible fixed default value for the attribute
3866 *
3867 * Parse an attribute default declaration
3868 *
3869 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3870 *
3871 * [ VC: Required Attribute ]
3872 * if the default declaration is the keyword #REQUIRED, then the
3873 * attribute must be specified for all elements of the type in the
3874 * attribute-list declaration.
3875 *
3876 * [ VC: Attribute Default Legal ]
3877 * The declared default value must meet the lexical constraints of
3878 * the declared attribute type c.f. xmlValidateAttributeDecl()
3879 *
3880 * [ VC: Fixed Attribute Default ]
3881 * if an attribute has a default value declared with the #FIXED
3882 * keyword, instances of that attribute must match the default value.
3883 *
3884 * [ WFC: No < in Attribute Values ]
3885 * handled in xmlParseAttValue()
3886 *
3887 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3888 * or XML_ATTRIBUTE_FIXED.
3889 */
3890
3891int
3892xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3893 int val;
3894 xmlChar *ret;
3895
3896 *value = NULL;
3897 if ((RAW == '#') && (NXT(1) == 'R') &&
3898 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3899 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3900 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3901 (NXT(8) == 'D')) {
3902 SKIP(9);
3903 return(XML_ATTRIBUTE_REQUIRED);
3904 }
3905 if ((RAW == '#') && (NXT(1) == 'I') &&
3906 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3907 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3908 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3909 SKIP(8);
3910 return(XML_ATTRIBUTE_IMPLIED);
3911 }
3912 val = XML_ATTRIBUTE_NONE;
3913 if ((RAW == '#') && (NXT(1) == 'F') &&
3914 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3915 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3916 SKIP(6);
3917 val = XML_ATTRIBUTE_FIXED;
3918 if (!IS_BLANK(CUR)) {
3919 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3921 ctxt->sax->error(ctxt->userData,
3922 "Space required after '#FIXED'\n");
3923 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003924 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003925 }
3926 SKIP_BLANKS;
3927 }
3928 ret = xmlParseAttValue(ctxt);
3929 ctxt->instate = XML_PARSER_DTD;
3930 if (ret == NULL) {
3931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3932 ctxt->sax->error(ctxt->userData,
3933 "Attribute default value declaration error\n");
3934 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003935 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003936 } else
3937 *value = ret;
3938 return(val);
3939}
3940
3941/**
3942 * xmlParseNotationType:
3943 * @ctxt: an XML parser context
3944 *
3945 * parse an Notation attribute type.
3946 *
3947 * Note: the leading 'NOTATION' S part has already being parsed...
3948 *
3949 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3950 *
3951 * [ VC: Notation Attributes ]
3952 * Values of this type must match one of the notation names included
3953 * in the declaration; all notation names in the declaration must be declared.
3954 *
3955 * Returns: the notation attribute tree built while parsing
3956 */
3957
3958xmlEnumerationPtr
3959xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3960 xmlChar *name;
3961 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3962
3963 if (RAW != '(') {
3964 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3966 ctxt->sax->error(ctxt->userData,
3967 "'(' required to start 'NOTATION'\n");
3968 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003969 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003970 return(NULL);
3971 }
3972 SHRINK;
3973 do {
3974 NEXT;
3975 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003976 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003977 if (name == NULL) {
3978 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3979 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3980 ctxt->sax->error(ctxt->userData,
3981 "Name expected in NOTATION declaration\n");
3982 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003983 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003984 return(ret);
3985 }
3986 cur = xmlCreateEnumeration(name);
3987 xmlFree(name);
3988 if (cur == NULL) return(ret);
3989 if (last == NULL) ret = last = cur;
3990 else {
3991 last->next = cur;
3992 last = cur;
3993 }
3994 SKIP_BLANKS;
3995 } while (RAW == '|');
3996 if (RAW != ')') {
3997 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3999 ctxt->sax->error(ctxt->userData,
4000 "')' required to finish NOTATION declaration\n");
4001 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004002 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004003 if ((last != NULL) && (last != ret))
4004 xmlFreeEnumeration(last);
4005 return(ret);
4006 }
4007 NEXT;
4008 return(ret);
4009}
4010
4011/**
4012 * xmlParseEnumerationType:
4013 * @ctxt: an XML parser context
4014 *
4015 * parse an Enumeration attribute type.
4016 *
4017 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4018 *
4019 * [ VC: Enumeration ]
4020 * Values of this type must match one of the Nmtoken tokens in
4021 * the declaration
4022 *
4023 * Returns: the enumeration attribute tree built while parsing
4024 */
4025
4026xmlEnumerationPtr
4027xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4028 xmlChar *name;
4029 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4030
4031 if (RAW != '(') {
4032 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4034 ctxt->sax->error(ctxt->userData,
4035 "'(' required to start ATTLIST enumeration\n");
4036 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004037 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004038 return(NULL);
4039 }
4040 SHRINK;
4041 do {
4042 NEXT;
4043 SKIP_BLANKS;
4044 name = xmlParseNmtoken(ctxt);
4045 if (name == NULL) {
4046 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4048 ctxt->sax->error(ctxt->userData,
4049 "NmToken expected in ATTLIST enumeration\n");
4050 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004051 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004052 return(ret);
4053 }
4054 cur = xmlCreateEnumeration(name);
4055 xmlFree(name);
4056 if (cur == NULL) return(ret);
4057 if (last == NULL) ret = last = cur;
4058 else {
4059 last->next = cur;
4060 last = cur;
4061 }
4062 SKIP_BLANKS;
4063 } while (RAW == '|');
4064 if (RAW != ')') {
4065 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4067 ctxt->sax->error(ctxt->userData,
4068 "')' required to finish ATTLIST enumeration\n");
4069 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004070 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004071 return(ret);
4072 }
4073 NEXT;
4074 return(ret);
4075}
4076
4077/**
4078 * xmlParseEnumeratedType:
4079 * @ctxt: an XML parser context
4080 * @tree: the enumeration tree built while parsing
4081 *
4082 * parse an Enumerated attribute type.
4083 *
4084 * [57] EnumeratedType ::= NotationType | Enumeration
4085 *
4086 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4087 *
4088 *
4089 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4090 */
4091
4092int
4093xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4094 if ((RAW == 'N') && (NXT(1) == 'O') &&
4095 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4096 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4097 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4098 SKIP(8);
4099 if (!IS_BLANK(CUR)) {
4100 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4102 ctxt->sax->error(ctxt->userData,
4103 "Space required after 'NOTATION'\n");
4104 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004105 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004106 return(0);
4107 }
4108 SKIP_BLANKS;
4109 *tree = xmlParseNotationType(ctxt);
4110 if (*tree == NULL) return(0);
4111 return(XML_ATTRIBUTE_NOTATION);
4112 }
4113 *tree = xmlParseEnumerationType(ctxt);
4114 if (*tree == NULL) return(0);
4115 return(XML_ATTRIBUTE_ENUMERATION);
4116}
4117
4118/**
4119 * xmlParseAttributeType:
4120 * @ctxt: an XML parser context
4121 * @tree: the enumeration tree built while parsing
4122 *
4123 * parse the Attribute list def for an element
4124 *
4125 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4126 *
4127 * [55] StringType ::= 'CDATA'
4128 *
4129 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4130 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4131 *
4132 * Validity constraints for attribute values syntax are checked in
4133 * xmlValidateAttributeValue()
4134 *
4135 * [ VC: ID ]
4136 * Values of type ID must match the Name production. A name must not
4137 * appear more than once in an XML document as a value of this type;
4138 * i.e., ID values must uniquely identify the elements which bear them.
4139 *
4140 * [ VC: One ID per Element Type ]
4141 * No element type may have more than one ID attribute specified.
4142 *
4143 * [ VC: ID Attribute Default ]
4144 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4145 *
4146 * [ VC: IDREF ]
4147 * Values of type IDREF must match the Name production, and values
4148 * of type IDREFS must match Names; each IDREF Name must match the value
4149 * of an ID attribute on some element in the XML document; i.e. IDREF
4150 * values must match the value of some ID attribute.
4151 *
4152 * [ VC: Entity Name ]
4153 * Values of type ENTITY must match the Name production, values
4154 * of type ENTITIES must match Names; each Entity Name must match the
4155 * name of an unparsed entity declared in the DTD.
4156 *
4157 * [ VC: Name Token ]
4158 * Values of type NMTOKEN must match the Nmtoken production; values
4159 * of type NMTOKENS must match Nmtokens.
4160 *
4161 * Returns the attribute type
4162 */
4163int
4164xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4165 SHRINK;
4166 if ((RAW == 'C') && (NXT(1) == 'D') &&
4167 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4168 (NXT(4) == 'A')) {
4169 SKIP(5);
4170 return(XML_ATTRIBUTE_CDATA);
4171 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4172 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4173 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4174 SKIP(6);
4175 return(XML_ATTRIBUTE_IDREFS);
4176 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4177 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4178 (NXT(4) == 'F')) {
4179 SKIP(5);
4180 return(XML_ATTRIBUTE_IDREF);
4181 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4182 SKIP(2);
4183 return(XML_ATTRIBUTE_ID);
4184 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4185 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4186 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4187 SKIP(6);
4188 return(XML_ATTRIBUTE_ENTITY);
4189 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4190 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4191 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4192 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4193 SKIP(8);
4194 return(XML_ATTRIBUTE_ENTITIES);
4195 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4196 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4197 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4198 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4199 SKIP(8);
4200 return(XML_ATTRIBUTE_NMTOKENS);
4201 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4202 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4203 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4204 (NXT(6) == 'N')) {
4205 SKIP(7);
4206 return(XML_ATTRIBUTE_NMTOKEN);
4207 }
4208 return(xmlParseEnumeratedType(ctxt, tree));
4209}
4210
4211/**
4212 * xmlParseAttributeListDecl:
4213 * @ctxt: an XML parser context
4214 *
4215 * : parse the Attribute list def for an element
4216 *
4217 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4218 *
4219 * [53] AttDef ::= S Name S AttType S DefaultDecl
4220 *
4221 */
4222void
4223xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4224 xmlChar *elemName;
4225 xmlChar *attrName;
4226 xmlEnumerationPtr tree;
4227
4228 if ((RAW == '<') && (NXT(1) == '!') &&
4229 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4230 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4231 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4232 (NXT(8) == 'T')) {
4233 xmlParserInputPtr input = ctxt->input;
4234
4235 SKIP(9);
4236 if (!IS_BLANK(CUR)) {
4237 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4239 ctxt->sax->error(ctxt->userData,
4240 "Space required after '<!ATTLIST'\n");
4241 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004242 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004243 }
4244 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004245 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004246 if (elemName == NULL) {
4247 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4249 ctxt->sax->error(ctxt->userData,
4250 "ATTLIST: no name for Element\n");
4251 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004252 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004253 return;
4254 }
4255 SKIP_BLANKS;
4256 GROW;
4257 while (RAW != '>') {
4258 const xmlChar *check = CUR_PTR;
4259 int type;
4260 int def;
4261 xmlChar *defaultValue = NULL;
4262
4263 GROW;
4264 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004265 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004266 if (attrName == NULL) {
4267 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4269 ctxt->sax->error(ctxt->userData,
4270 "ATTLIST: no name for Attribute\n");
4271 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004272 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004273 break;
4274 }
4275 GROW;
4276 if (!IS_BLANK(CUR)) {
4277 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4279 ctxt->sax->error(ctxt->userData,
4280 "Space required after the attribute name\n");
4281 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004282 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004283 if (attrName != NULL)
4284 xmlFree(attrName);
4285 if (defaultValue != NULL)
4286 xmlFree(defaultValue);
4287 break;
4288 }
4289 SKIP_BLANKS;
4290
4291 type = xmlParseAttributeType(ctxt, &tree);
4292 if (type <= 0) {
4293 if (attrName != NULL)
4294 xmlFree(attrName);
4295 if (defaultValue != NULL)
4296 xmlFree(defaultValue);
4297 break;
4298 }
4299
4300 GROW;
4301 if (!IS_BLANK(CUR)) {
4302 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4304 ctxt->sax->error(ctxt->userData,
4305 "Space required after the attribute type\n");
4306 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004307 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004308 if (attrName != NULL)
4309 xmlFree(attrName);
4310 if (defaultValue != NULL)
4311 xmlFree(defaultValue);
4312 if (tree != NULL)
4313 xmlFreeEnumeration(tree);
4314 break;
4315 }
4316 SKIP_BLANKS;
4317
4318 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4319 if (def <= 0) {
4320 if (attrName != NULL)
4321 xmlFree(attrName);
4322 if (defaultValue != NULL)
4323 xmlFree(defaultValue);
4324 if (tree != NULL)
4325 xmlFreeEnumeration(tree);
4326 break;
4327 }
4328
4329 GROW;
4330 if (RAW != '>') {
4331 if (!IS_BLANK(CUR)) {
4332 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4334 ctxt->sax->error(ctxt->userData,
4335 "Space required after the attribute default value\n");
4336 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004337 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004338 if (attrName != NULL)
4339 xmlFree(attrName);
4340 if (defaultValue != NULL)
4341 xmlFree(defaultValue);
4342 if (tree != NULL)
4343 xmlFreeEnumeration(tree);
4344 break;
4345 }
4346 SKIP_BLANKS;
4347 }
4348 if (check == CUR_PTR) {
4349 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4351 ctxt->sax->error(ctxt->userData,
4352 "xmlParseAttributeListDecl: detected internal error\n");
4353 if (attrName != NULL)
4354 xmlFree(attrName);
4355 if (defaultValue != NULL)
4356 xmlFree(defaultValue);
4357 if (tree != NULL)
4358 xmlFreeEnumeration(tree);
4359 break;
4360 }
4361 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4362 (ctxt->sax->attributeDecl != NULL))
4363 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4364 type, def, defaultValue, tree);
4365 if (attrName != NULL)
4366 xmlFree(attrName);
4367 if (defaultValue != NULL)
4368 xmlFree(defaultValue);
4369 GROW;
4370 }
4371 if (RAW == '>') {
4372 if (input != ctxt->input) {
4373 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4375 ctxt->sax->error(ctxt->userData,
4376"Attribute list declaration doesn't start and stop in the same entity\n");
4377 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004378 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004379 }
4380 NEXT;
4381 }
4382
4383 xmlFree(elemName);
4384 }
4385}
4386
4387/**
4388 * xmlParseElementMixedContentDecl:
4389 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004390 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004391 *
4392 * parse the declaration for a Mixed Element content
4393 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4394 *
4395 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4396 * '(' S? '#PCDATA' S? ')'
4397 *
4398 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4399 *
4400 * [ VC: No Duplicate Types ]
4401 * The same name must not appear more than once in a single
4402 * mixed-content declaration.
4403 *
4404 * returns: the list of the xmlElementContentPtr describing the element choices
4405 */
4406xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004407xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004408 xmlElementContentPtr ret = NULL, cur = NULL, n;
4409 xmlChar *elem = NULL;
4410
4411 GROW;
4412 if ((RAW == '#') && (NXT(1) == 'P') &&
4413 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4414 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4415 (NXT(6) == 'A')) {
4416 SKIP(7);
4417 SKIP_BLANKS;
4418 SHRINK;
4419 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004420 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4421 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4422 if (ctxt->vctxt.error != NULL)
4423 ctxt->vctxt.error(ctxt->vctxt.userData,
4424"Element content declaration doesn't start and stop in the same entity\n");
4425 ctxt->valid = 0;
4426 }
Owen Taylor3473f882001-02-23 17:55:21 +00004427 NEXT;
4428 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4429 if (RAW == '*') {
4430 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4431 NEXT;
4432 }
4433 return(ret);
4434 }
4435 if ((RAW == '(') || (RAW == '|')) {
4436 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4437 if (ret == NULL) return(NULL);
4438 }
4439 while (RAW == '|') {
4440 NEXT;
4441 if (elem == NULL) {
4442 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4443 if (ret == NULL) return(NULL);
4444 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004445 if (cur != NULL)
4446 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004447 cur = ret;
4448 } else {
4449 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4450 if (n == NULL) return(NULL);
4451 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004452 if (n->c1 != NULL)
4453 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004454 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004455 if (n != NULL)
4456 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004457 cur = n;
4458 xmlFree(elem);
4459 }
4460 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004461 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004462 if (elem == NULL) {
4463 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4465 ctxt->sax->error(ctxt->userData,
4466 "xmlParseElementMixedContentDecl : Name expected\n");
4467 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004468 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004469 xmlFreeElementContent(cur);
4470 return(NULL);
4471 }
4472 SKIP_BLANKS;
4473 GROW;
4474 }
4475 if ((RAW == ')') && (NXT(1) == '*')) {
4476 if (elem != NULL) {
4477 cur->c2 = xmlNewElementContent(elem,
4478 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004479 if (cur->c2 != NULL)
4480 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004481 xmlFree(elem);
4482 }
4483 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004484 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4485 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4486 if (ctxt->vctxt.error != NULL)
4487 ctxt->vctxt.error(ctxt->vctxt.userData,
4488"Element content declaration doesn't start and stop in the same entity\n");
4489 ctxt->valid = 0;
4490 }
Owen Taylor3473f882001-02-23 17:55:21 +00004491 SKIP(2);
4492 } else {
4493 if (elem != NULL) xmlFree(elem);
4494 xmlFreeElementContent(ret);
4495 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4497 ctxt->sax->error(ctxt->userData,
4498 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4499 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004500 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004501 return(NULL);
4502 }
4503
4504 } else {
4505 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4507 ctxt->sax->error(ctxt->userData,
4508 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4509 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004510 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004511 }
4512 return(ret);
4513}
4514
4515/**
4516 * xmlParseElementChildrenContentDecl:
4517 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004518 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004519 *
4520 * parse the declaration for a Mixed Element content
4521 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4522 *
4523 *
4524 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4525 *
4526 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4527 *
4528 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4529 *
4530 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4531 *
4532 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4533 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004534 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004535 * opening or closing parentheses in a choice, seq, or Mixed
4536 * construct is contained in the replacement text for a parameter
4537 * entity, both must be contained in the same replacement text. For
4538 * interoperability, if a parameter-entity reference appears in a
4539 * choice, seq, or Mixed construct, its replacement text should not
4540 * be empty, and neither the first nor last non-blank character of
4541 * the replacement text should be a connector (| or ,).
4542 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004543 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004544 * hierarchy.
4545 */
4546xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004547xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004548(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004549 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4550 xmlChar *elem;
4551 xmlChar type = 0;
4552
4553 SKIP_BLANKS;
4554 GROW;
4555 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004556 xmlParserInputPtr input = ctxt->input;
4557
Owen Taylor3473f882001-02-23 17:55:21 +00004558 /* Recurse on first child */
4559 NEXT;
4560 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004561 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004562 SKIP_BLANKS;
4563 GROW;
4564 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004565 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004566 if (elem == NULL) {
4567 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4569 ctxt->sax->error(ctxt->userData,
4570 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4571 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004572 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004573 return(NULL);
4574 }
4575 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4576 GROW;
4577 if (RAW == '?') {
4578 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4579 NEXT;
4580 } else if (RAW == '*') {
4581 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4582 NEXT;
4583 } else if (RAW == '+') {
4584 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4585 NEXT;
4586 } else {
4587 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4588 }
4589 xmlFree(elem);
4590 GROW;
4591 }
4592 SKIP_BLANKS;
4593 SHRINK;
4594 while (RAW != ')') {
4595 /*
4596 * Each loop we parse one separator and one element.
4597 */
4598 if (RAW == ',') {
4599 if (type == 0) type = CUR;
4600
4601 /*
4602 * Detect "Name | Name , Name" error
4603 */
4604 else if (type != CUR) {
4605 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4607 ctxt->sax->error(ctxt->userData,
4608 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4609 type);
4610 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004611 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004612 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004613 xmlFreeElementContent(last);
4614 if (ret != NULL)
4615 xmlFreeElementContent(ret);
4616 return(NULL);
4617 }
4618 NEXT;
4619
4620 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4621 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004622 if ((last != NULL) && (last != ret))
4623 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004624 xmlFreeElementContent(ret);
4625 return(NULL);
4626 }
4627 if (last == NULL) {
4628 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004629 if (ret != NULL)
4630 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004631 ret = cur = op;
4632 } else {
4633 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004634 if (op != NULL)
4635 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004636 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004637 if (last != NULL)
4638 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004639 cur =op;
4640 last = NULL;
4641 }
4642 } else if (RAW == '|') {
4643 if (type == 0) type = CUR;
4644
4645 /*
4646 * Detect "Name , Name | Name" error
4647 */
4648 else if (type != CUR) {
4649 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4651 ctxt->sax->error(ctxt->userData,
4652 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4653 type);
4654 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004656 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004657 xmlFreeElementContent(last);
4658 if (ret != NULL)
4659 xmlFreeElementContent(ret);
4660 return(NULL);
4661 }
4662 NEXT;
4663
4664 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4665 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004666 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004667 xmlFreeElementContent(last);
4668 if (ret != NULL)
4669 xmlFreeElementContent(ret);
4670 return(NULL);
4671 }
4672 if (last == NULL) {
4673 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004674 if (ret != NULL)
4675 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004676 ret = cur = op;
4677 } else {
4678 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004679 if (op != NULL)
4680 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004681 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004682 if (last != NULL)
4683 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004684 cur =op;
4685 last = NULL;
4686 }
4687 } else {
4688 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4690 ctxt->sax->error(ctxt->userData,
4691 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4692 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004693 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004694 if (ret != NULL)
4695 xmlFreeElementContent(ret);
4696 return(NULL);
4697 }
4698 GROW;
4699 SKIP_BLANKS;
4700 GROW;
4701 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004702 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004703 /* Recurse on second child */
4704 NEXT;
4705 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004706 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004707 SKIP_BLANKS;
4708 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004709 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004710 if (elem == NULL) {
4711 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4713 ctxt->sax->error(ctxt->userData,
4714 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4715 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004716 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004717 if (ret != NULL)
4718 xmlFreeElementContent(ret);
4719 return(NULL);
4720 }
4721 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4722 xmlFree(elem);
4723 if (RAW == '?') {
4724 last->ocur = XML_ELEMENT_CONTENT_OPT;
4725 NEXT;
4726 } else if (RAW == '*') {
4727 last->ocur = XML_ELEMENT_CONTENT_MULT;
4728 NEXT;
4729 } else if (RAW == '+') {
4730 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4731 NEXT;
4732 } else {
4733 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4734 }
4735 }
4736 SKIP_BLANKS;
4737 GROW;
4738 }
4739 if ((cur != NULL) && (last != NULL)) {
4740 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004741 if (last != NULL)
4742 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004743 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004744 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4745 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4746 if (ctxt->vctxt.error != NULL)
4747 ctxt->vctxt.error(ctxt->vctxt.userData,
4748"Element content declaration doesn't start and stop in the same entity\n");
4749 ctxt->valid = 0;
4750 }
Owen Taylor3473f882001-02-23 17:55:21 +00004751 NEXT;
4752 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004753 if (ret != NULL)
4754 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004755 NEXT;
4756 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004757 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004758 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004759 cur = ret;
4760 /*
4761 * Some normalization:
4762 * (a | b* | c?)* == (a | b | c)*
4763 */
4764 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4765 if ((cur->c1 != NULL) &&
4766 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4767 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4768 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4769 if ((cur->c2 != NULL) &&
4770 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4771 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4772 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4773 cur = cur->c2;
4774 }
4775 }
Owen Taylor3473f882001-02-23 17:55:21 +00004776 NEXT;
4777 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004778 if (ret != NULL) {
4779 int found = 0;
4780
Daniel Veillarde470df72001-04-18 21:41:07 +00004781 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004782 /*
4783 * Some normalization:
4784 * (a | b*)+ == (a | b)*
4785 * (a | b?)+ == (a | b)*
4786 */
4787 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4788 if ((cur->c1 != NULL) &&
4789 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4790 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4791 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4792 found = 1;
4793 }
4794 if ((cur->c2 != NULL) &&
4795 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4796 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4797 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4798 found = 1;
4799 }
4800 cur = cur->c2;
4801 }
4802 if (found)
4803 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4804 }
Owen Taylor3473f882001-02-23 17:55:21 +00004805 NEXT;
4806 }
4807 return(ret);
4808}
4809
4810/**
4811 * xmlParseElementContentDecl:
4812 * @ctxt: an XML parser context
4813 * @name: the name of the element being defined.
4814 * @result: the Element Content pointer will be stored here if any
4815 *
4816 * parse the declaration for an Element content either Mixed or Children,
4817 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4818 *
4819 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4820 *
4821 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4822 */
4823
4824int
4825xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4826 xmlElementContentPtr *result) {
4827
4828 xmlElementContentPtr tree = NULL;
4829 xmlParserInputPtr input = ctxt->input;
4830 int res;
4831
4832 *result = NULL;
4833
4834 if (RAW != '(') {
4835 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4837 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004838 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004839 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004840 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004841 return(-1);
4842 }
4843 NEXT;
4844 GROW;
4845 SKIP_BLANKS;
4846 if ((RAW == '#') && (NXT(1) == 'P') &&
4847 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4848 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4849 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004850 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004851 res = XML_ELEMENT_TYPE_MIXED;
4852 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004853 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004854 res = XML_ELEMENT_TYPE_ELEMENT;
4855 }
Owen Taylor3473f882001-02-23 17:55:21 +00004856 SKIP_BLANKS;
4857 *result = tree;
4858 return(res);
4859}
4860
4861/**
4862 * xmlParseElementDecl:
4863 * @ctxt: an XML parser context
4864 *
4865 * parse an Element declaration.
4866 *
4867 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4868 *
4869 * [ VC: Unique Element Type Declaration ]
4870 * No element type may be declared more than once
4871 *
4872 * Returns the type of the element, or -1 in case of error
4873 */
4874int
4875xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4876 xmlChar *name;
4877 int ret = -1;
4878 xmlElementContentPtr content = NULL;
4879
4880 GROW;
4881 if ((RAW == '<') && (NXT(1) == '!') &&
4882 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4883 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4884 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4885 (NXT(8) == 'T')) {
4886 xmlParserInputPtr input = ctxt->input;
4887
4888 SKIP(9);
4889 if (!IS_BLANK(CUR)) {
4890 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4892 ctxt->sax->error(ctxt->userData,
4893 "Space required after 'ELEMENT'\n");
4894 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004895 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004896 }
4897 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004898 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004899 if (name == NULL) {
4900 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4902 ctxt->sax->error(ctxt->userData,
4903 "xmlParseElementDecl: no name for Element\n");
4904 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004905 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004906 return(-1);
4907 }
4908 while ((RAW == 0) && (ctxt->inputNr > 1))
4909 xmlPopInput(ctxt);
4910 if (!IS_BLANK(CUR)) {
4911 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4913 ctxt->sax->error(ctxt->userData,
4914 "Space required after the element name\n");
4915 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004916 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004917 }
4918 SKIP_BLANKS;
4919 if ((RAW == 'E') && (NXT(1) == 'M') &&
4920 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4921 (NXT(4) == 'Y')) {
4922 SKIP(5);
4923 /*
4924 * Element must always be empty.
4925 */
4926 ret = XML_ELEMENT_TYPE_EMPTY;
4927 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4928 (NXT(2) == 'Y')) {
4929 SKIP(3);
4930 /*
4931 * Element is a generic container.
4932 */
4933 ret = XML_ELEMENT_TYPE_ANY;
4934 } else if (RAW == '(') {
4935 ret = xmlParseElementContentDecl(ctxt, name, &content);
4936 } else {
4937 /*
4938 * [ WFC: PEs in Internal Subset ] error handling.
4939 */
4940 if ((RAW == '%') && (ctxt->external == 0) &&
4941 (ctxt->inputNr == 1)) {
4942 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4944 ctxt->sax->error(ctxt->userData,
4945 "PEReference: forbidden within markup decl in internal subset\n");
4946 } else {
4947 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4949 ctxt->sax->error(ctxt->userData,
4950 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4951 }
4952 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004953 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004954 if (name != NULL) xmlFree(name);
4955 return(-1);
4956 }
4957
4958 SKIP_BLANKS;
4959 /*
4960 * Pop-up of finished entities.
4961 */
4962 while ((RAW == 0) && (ctxt->inputNr > 1))
4963 xmlPopInput(ctxt);
4964 SKIP_BLANKS;
4965
4966 if (RAW != '>') {
4967 ctxt->errNo = XML_ERR_GT_REQUIRED;
4968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4969 ctxt->sax->error(ctxt->userData,
4970 "xmlParseElementDecl: expected '>' at the end\n");
4971 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004972 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004973 } else {
4974 if (input != ctxt->input) {
4975 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4977 ctxt->sax->error(ctxt->userData,
4978"Element declaration doesn't start and stop in the same entity\n");
4979 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004980 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004981 }
4982
4983 NEXT;
4984 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4985 (ctxt->sax->elementDecl != NULL))
4986 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4987 content);
4988 }
4989 if (content != NULL) {
4990 xmlFreeElementContent(content);
4991 }
4992 if (name != NULL) {
4993 xmlFree(name);
4994 }
4995 }
4996 return(ret);
4997}
4998
4999/**
Owen Taylor3473f882001-02-23 17:55:21 +00005000 * xmlParseConditionalSections
5001 * @ctxt: an XML parser context
5002 *
5003 * [61] conditionalSect ::= includeSect | ignoreSect
5004 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5005 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5006 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5007 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5008 */
5009
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005010static void
Owen Taylor3473f882001-02-23 17:55:21 +00005011xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5012 SKIP(3);
5013 SKIP_BLANKS;
5014 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5015 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5016 (NXT(6) == 'E')) {
5017 SKIP(7);
5018 SKIP_BLANKS;
5019 if (RAW != '[') {
5020 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5022 ctxt->sax->error(ctxt->userData,
5023 "XML conditional section '[' expected\n");
5024 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005026 } else {
5027 NEXT;
5028 }
5029 if (xmlParserDebugEntities) {
5030 if ((ctxt->input != NULL) && (ctxt->input->filename))
5031 xmlGenericError(xmlGenericErrorContext,
5032 "%s(%d): ", ctxt->input->filename,
5033 ctxt->input->line);
5034 xmlGenericError(xmlGenericErrorContext,
5035 "Entering INCLUDE Conditional Section\n");
5036 }
5037
5038 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5039 (NXT(2) != '>'))) {
5040 const xmlChar *check = CUR_PTR;
5041 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005042
5043 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5044 xmlParseConditionalSections(ctxt);
5045 } else if (IS_BLANK(CUR)) {
5046 NEXT;
5047 } else if (RAW == '%') {
5048 xmlParsePEReference(ctxt);
5049 } else
5050 xmlParseMarkupDecl(ctxt);
5051
5052 /*
5053 * Pop-up of finished entities.
5054 */
5055 while ((RAW == 0) && (ctxt->inputNr > 1))
5056 xmlPopInput(ctxt);
5057
Daniel Veillardfdc91562002-07-01 21:52:03 +00005058 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005059 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5061 ctxt->sax->error(ctxt->userData,
5062 "Content error in the external subset\n");
5063 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005064 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005065 break;
5066 }
5067 }
5068 if (xmlParserDebugEntities) {
5069 if ((ctxt->input != NULL) && (ctxt->input->filename))
5070 xmlGenericError(xmlGenericErrorContext,
5071 "%s(%d): ", ctxt->input->filename,
5072 ctxt->input->line);
5073 xmlGenericError(xmlGenericErrorContext,
5074 "Leaving INCLUDE Conditional Section\n");
5075 }
5076
5077 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5078 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5079 int state;
5080 int instate;
5081 int depth = 0;
5082
5083 SKIP(6);
5084 SKIP_BLANKS;
5085 if (RAW != '[') {
5086 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5088 ctxt->sax->error(ctxt->userData,
5089 "XML conditional section '[' expected\n");
5090 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005091 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005092 } else {
5093 NEXT;
5094 }
5095 if (xmlParserDebugEntities) {
5096 if ((ctxt->input != NULL) && (ctxt->input->filename))
5097 xmlGenericError(xmlGenericErrorContext,
5098 "%s(%d): ", ctxt->input->filename,
5099 ctxt->input->line);
5100 xmlGenericError(xmlGenericErrorContext,
5101 "Entering IGNORE Conditional Section\n");
5102 }
5103
5104 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005105 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005106 * But disable SAX event generating DTD building in the meantime
5107 */
5108 state = ctxt->disableSAX;
5109 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005110 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005111 ctxt->instate = XML_PARSER_IGNORE;
5112
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005113 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005114 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5115 depth++;
5116 SKIP(3);
5117 continue;
5118 }
5119 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5120 if (--depth >= 0) SKIP(3);
5121 continue;
5122 }
5123 NEXT;
5124 continue;
5125 }
5126
5127 ctxt->disableSAX = state;
5128 ctxt->instate = instate;
5129
5130 if (xmlParserDebugEntities) {
5131 if ((ctxt->input != NULL) && (ctxt->input->filename))
5132 xmlGenericError(xmlGenericErrorContext,
5133 "%s(%d): ", ctxt->input->filename,
5134 ctxt->input->line);
5135 xmlGenericError(xmlGenericErrorContext,
5136 "Leaving IGNORE Conditional Section\n");
5137 }
5138
5139 } else {
5140 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5142 ctxt->sax->error(ctxt->userData,
5143 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5144 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005145 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005146 }
5147
5148 if (RAW == 0)
5149 SHRINK;
5150
5151 if (RAW == 0) {
5152 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5154 ctxt->sax->error(ctxt->userData,
5155 "XML conditional section not closed\n");
5156 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005157 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005158 } else {
5159 SKIP(3);
5160 }
5161}
5162
5163/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005164 * xmlParseMarkupDecl:
5165 * @ctxt: an XML parser context
5166 *
5167 * parse Markup declarations
5168 *
5169 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5170 * NotationDecl | PI | Comment
5171 *
5172 * [ VC: Proper Declaration/PE Nesting ]
5173 * Parameter-entity replacement text must be properly nested with
5174 * markup declarations. That is to say, if either the first character
5175 * or the last character of a markup declaration (markupdecl above) is
5176 * contained in the replacement text for a parameter-entity reference,
5177 * both must be contained in the same replacement text.
5178 *
5179 * [ WFC: PEs in Internal Subset ]
5180 * In the internal DTD subset, parameter-entity references can occur
5181 * only where markup declarations can occur, not within markup declarations.
5182 * (This does not apply to references that occur in external parameter
5183 * entities or to the external subset.)
5184 */
5185void
5186xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5187 GROW;
5188 xmlParseElementDecl(ctxt);
5189 xmlParseAttributeListDecl(ctxt);
5190 xmlParseEntityDecl(ctxt);
5191 xmlParseNotationDecl(ctxt);
5192 xmlParsePI(ctxt);
5193 xmlParseComment(ctxt);
5194 /*
5195 * This is only for internal subset. On external entities,
5196 * the replacement is done before parsing stage
5197 */
5198 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5199 xmlParsePEReference(ctxt);
5200
5201 /*
5202 * Conditional sections are allowed from entities included
5203 * by PE References in the internal subset.
5204 */
5205 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5206 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5207 xmlParseConditionalSections(ctxt);
5208 }
5209 }
5210
5211 ctxt->instate = XML_PARSER_DTD;
5212}
5213
5214/**
5215 * xmlParseTextDecl:
5216 * @ctxt: an XML parser context
5217 *
5218 * parse an XML declaration header for external entities
5219 *
5220 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5221 *
5222 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5223 */
5224
5225void
5226xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5227 xmlChar *version;
5228
5229 /*
5230 * We know that '<?xml' is here.
5231 */
5232 if ((RAW == '<') && (NXT(1) == '?') &&
5233 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5234 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5235 SKIP(5);
5236 } else {
5237 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5239 ctxt->sax->error(ctxt->userData,
5240 "Text declaration '<?xml' required\n");
5241 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005242 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005243
5244 return;
5245 }
5246
5247 if (!IS_BLANK(CUR)) {
5248 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5250 ctxt->sax->error(ctxt->userData,
5251 "Space needed after '<?xml'\n");
5252 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005253 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005254 }
5255 SKIP_BLANKS;
5256
5257 /*
5258 * We may have the VersionInfo here.
5259 */
5260 version = xmlParseVersionInfo(ctxt);
5261 if (version == NULL)
5262 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005263 else {
5264 if (!IS_BLANK(CUR)) {
5265 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5267 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5268 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005269 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005270 }
5271 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005272 ctxt->input->version = version;
5273
5274 /*
5275 * We must have the encoding declaration
5276 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005277 xmlParseEncodingDecl(ctxt);
5278 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5279 /*
5280 * The XML REC instructs us to stop parsing right here
5281 */
5282 return;
5283 }
5284
5285 SKIP_BLANKS;
5286 if ((RAW == '?') && (NXT(1) == '>')) {
5287 SKIP(2);
5288 } else if (RAW == '>') {
5289 /* Deprecated old WD ... */
5290 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5292 ctxt->sax->error(ctxt->userData,
5293 "XML declaration must end-up with '?>'\n");
5294 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005295 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005296 NEXT;
5297 } else {
5298 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5300 ctxt->sax->error(ctxt->userData,
5301 "parsing XML declaration: '?>' expected\n");
5302 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005303 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005304 MOVETO_ENDTAG(CUR_PTR);
5305 NEXT;
5306 }
5307}
5308
5309/**
Owen Taylor3473f882001-02-23 17:55:21 +00005310 * xmlParseExternalSubset:
5311 * @ctxt: an XML parser context
5312 * @ExternalID: the external identifier
5313 * @SystemID: the system identifier (or URL)
5314 *
5315 * parse Markup declarations from an external subset
5316 *
5317 * [30] extSubset ::= textDecl? extSubsetDecl
5318 *
5319 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5320 */
5321void
5322xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5323 const xmlChar *SystemID) {
5324 GROW;
5325 if ((RAW == '<') && (NXT(1) == '?') &&
5326 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5327 (NXT(4) == 'l')) {
5328 xmlParseTextDecl(ctxt);
5329 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5330 /*
5331 * The XML REC instructs us to stop parsing right here
5332 */
5333 ctxt->instate = XML_PARSER_EOF;
5334 return;
5335 }
5336 }
5337 if (ctxt->myDoc == NULL) {
5338 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5339 }
5340 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5341 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5342
5343 ctxt->instate = XML_PARSER_DTD;
5344 ctxt->external = 1;
5345 while (((RAW == '<') && (NXT(1) == '?')) ||
5346 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005347 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005348 const xmlChar *check = CUR_PTR;
5349 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005350
5351 GROW;
5352 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5353 xmlParseConditionalSections(ctxt);
5354 } else if (IS_BLANK(CUR)) {
5355 NEXT;
5356 } else if (RAW == '%') {
5357 xmlParsePEReference(ctxt);
5358 } else
5359 xmlParseMarkupDecl(ctxt);
5360
5361 /*
5362 * Pop-up of finished entities.
5363 */
5364 while ((RAW == 0) && (ctxt->inputNr > 1))
5365 xmlPopInput(ctxt);
5366
Daniel Veillardfdc91562002-07-01 21:52:03 +00005367 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005368 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5370 ctxt->sax->error(ctxt->userData,
5371 "Content error in the external subset\n");
5372 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005373 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005374 break;
5375 }
5376 }
5377
5378 if (RAW != 0) {
5379 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5381 ctxt->sax->error(ctxt->userData,
5382 "Extra content at the end of the document\n");
5383 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005384 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005385 }
5386
5387}
5388
5389/**
5390 * xmlParseReference:
5391 * @ctxt: an XML parser context
5392 *
5393 * parse and handle entity references in content, depending on the SAX
5394 * interface, this may end-up in a call to character() if this is a
5395 * CharRef, a predefined entity, if there is no reference() callback.
5396 * or if the parser was asked to switch to that mode.
5397 *
5398 * [67] Reference ::= EntityRef | CharRef
5399 */
5400void
5401xmlParseReference(xmlParserCtxtPtr ctxt) {
5402 xmlEntityPtr ent;
5403 xmlChar *val;
5404 if (RAW != '&') return;
5405
5406 if (NXT(1) == '#') {
5407 int i = 0;
5408 xmlChar out[10];
5409 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005410 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005411
5412 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5413 /*
5414 * So we are using non-UTF-8 buffers
5415 * Check that the char fit on 8bits, if not
5416 * generate a CharRef.
5417 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005418 if (value <= 0xFF) {
5419 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005420 out[1] = 0;
5421 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5422 (!ctxt->disableSAX))
5423 ctxt->sax->characters(ctxt->userData, out, 1);
5424 } else {
5425 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005426 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005427 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005428 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005429 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5430 (!ctxt->disableSAX))
5431 ctxt->sax->reference(ctxt->userData, out);
5432 }
5433 } else {
5434 /*
5435 * Just encode the value in UTF-8
5436 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005437 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005438 out[i] = 0;
5439 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5440 (!ctxt->disableSAX))
5441 ctxt->sax->characters(ctxt->userData, out, i);
5442 }
5443 } else {
5444 ent = xmlParseEntityRef(ctxt);
5445 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005446 if (!ctxt->wellFormed)
5447 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005448 if ((ent->name != NULL) &&
5449 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5450 xmlNodePtr list = NULL;
5451 int ret;
5452
5453
5454 /*
5455 * The first reference to the entity trigger a parsing phase
5456 * where the ent->children is filled with the result from
5457 * the parsing.
5458 */
5459 if (ent->children == NULL) {
5460 xmlChar *value;
5461 value = ent->content;
5462
5463 /*
5464 * Check that this entity is well formed
5465 */
5466 if ((value != NULL) &&
5467 (value[1] == 0) && (value[0] == '<') &&
5468 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5469 /*
5470 * DONE: get definite answer on this !!!
5471 * Lots of entity decls are used to declare a single
5472 * char
5473 * <!ENTITY lt "<">
5474 * Which seems to be valid since
5475 * 2.4: The ampersand character (&) and the left angle
5476 * bracket (<) may appear in their literal form only
5477 * when used ... They are also legal within the literal
5478 * entity value of an internal entity declaration;i
5479 * see "4.3.2 Well-Formed Parsed Entities".
5480 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5481 * Looking at the OASIS test suite and James Clark
5482 * tests, this is broken. However the XML REC uses
5483 * it. Is the XML REC not well-formed ????
5484 * This is a hack to avoid this problem
5485 *
5486 * ANSWER: since lt gt amp .. are already defined,
5487 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005488 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005489 * is lousy but acceptable.
5490 */
5491 list = xmlNewDocText(ctxt->myDoc, value);
5492 if (list != NULL) {
5493 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5494 (ent->children == NULL)) {
5495 ent->children = list;
5496 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005497 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005498 list->parent = (xmlNodePtr) ent;
5499 } else {
5500 xmlFreeNodeList(list);
5501 }
5502 } else if (list != NULL) {
5503 xmlFreeNodeList(list);
5504 }
5505 } else {
5506 /*
5507 * 4.3.2: An internal general parsed entity is well-formed
5508 * if its replacement text matches the production labeled
5509 * content.
5510 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005511
5512 void *user_data;
5513 /*
5514 * This is a bit hackish but this seems the best
5515 * way to make sure both SAX and DOM entity support
5516 * behaves okay.
5517 */
5518 if (ctxt->userData == ctxt)
5519 user_data = NULL;
5520 else
5521 user_data = ctxt->userData;
5522
Owen Taylor3473f882001-02-23 17:55:21 +00005523 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5524 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005525 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5526 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005527 ctxt->depth--;
5528 } else if (ent->etype ==
5529 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5530 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005531 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005532 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005533 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005534 ctxt->depth--;
5535 } else {
5536 ret = -1;
5537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5538 ctxt->sax->error(ctxt->userData,
5539 "Internal: invalid entity type\n");
5540 }
5541 if (ret == XML_ERR_ENTITY_LOOP) {
5542 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5544 ctxt->sax->error(ctxt->userData,
5545 "Detected entity reference loop\n");
5546 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005547 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005548 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005549 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005550 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5551 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005552 (ent->children == NULL)) {
5553 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005554 if (ctxt->replaceEntities) {
5555 /*
5556 * Prune it directly in the generated document
5557 * except for single text nodes.
5558 */
5559 if ((list->type == XML_TEXT_NODE) &&
5560 (list->next == NULL)) {
5561 list->parent = (xmlNodePtr) ent;
5562 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005563 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005564 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005565 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005566 while (list != NULL) {
5567 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005568 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005569 if (list->next == NULL)
5570 ent->last = list;
5571 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005572 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005573 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005574 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5575 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005576 }
5577 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005578 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005579 while (list != NULL) {
5580 list->parent = (xmlNodePtr) ent;
5581 if (list->next == NULL)
5582 ent->last = list;
5583 list = list->next;
5584 }
Owen Taylor3473f882001-02-23 17:55:21 +00005585 }
5586 } else {
5587 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005588 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005589 }
5590 } else if (ret > 0) {
5591 ctxt->errNo = ret;
5592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5593 ctxt->sax->error(ctxt->userData,
5594 "Entity value required\n");
5595 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005596 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005597 } else if (list != NULL) {
5598 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005599 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005600 }
5601 }
5602 }
5603 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5604 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5605 /*
5606 * Create a node.
5607 */
5608 ctxt->sax->reference(ctxt->userData, ent->name);
5609 return;
5610 } else if (ctxt->replaceEntities) {
5611 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5612 /*
5613 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005614 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005615 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005616 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005617 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005618 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005619 cur = ent->children;
5620 while (cur != NULL) {
5621 new = xmlCopyNode(cur, 1);
Daniel Veillard8f872442003-01-09 23:19:02 +00005622 if (new != NULL) {
5623 new->_private = cur->_private;
5624 if (firstChild == NULL){
5625 firstChild = new;
5626 }
5627 xmlAddChild(ctxt->node, new);
Daniel Veillard8107a222002-01-13 14:10:10 +00005628 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005629 if (cur == ent->last)
5630 break;
5631 cur = cur->next;
5632 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005633 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5634 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005635 } else {
5636 /*
5637 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005638 * node with a possible previous text one which
5639 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005640 */
5641 if (ent->children->type == XML_TEXT_NODE)
5642 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5643 if ((ent->last != ent->children) &&
5644 (ent->last->type == XML_TEXT_NODE))
5645 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5646 xmlAddChildList(ctxt->node, ent->children);
5647 }
5648
Owen Taylor3473f882001-02-23 17:55:21 +00005649 /*
5650 * This is to avoid a nasty side effect, see
5651 * characters() in SAX.c
5652 */
5653 ctxt->nodemem = 0;
5654 ctxt->nodelen = 0;
5655 return;
5656 } else {
5657 /*
5658 * Probably running in SAX mode
5659 */
5660 xmlParserInputPtr input;
5661
5662 input = xmlNewEntityInputStream(ctxt, ent);
5663 xmlPushInput(ctxt, input);
5664 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5665 (RAW == '<') && (NXT(1) == '?') &&
5666 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5667 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5668 xmlParseTextDecl(ctxt);
5669 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5670 /*
5671 * The XML REC instructs us to stop parsing right here
5672 */
5673 ctxt->instate = XML_PARSER_EOF;
5674 return;
5675 }
5676 if (input->standalone == 1) {
5677 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5679 ctxt->sax->error(ctxt->userData,
5680 "external parsed entities cannot be standalone\n");
5681 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005682 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005683 }
5684 }
5685 return;
5686 }
5687 }
5688 } else {
5689 val = ent->content;
5690 if (val == NULL) return;
5691 /*
5692 * inline the entity.
5693 */
5694 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5695 (!ctxt->disableSAX))
5696 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5697 }
5698 }
5699}
5700
5701/**
5702 * xmlParseEntityRef:
5703 * @ctxt: an XML parser context
5704 *
5705 * parse ENTITY references declarations
5706 *
5707 * [68] EntityRef ::= '&' Name ';'
5708 *
5709 * [ WFC: Entity Declared ]
5710 * In a document without any DTD, a document with only an internal DTD
5711 * subset which contains no parameter entity references, or a document
5712 * with "standalone='yes'", the Name given in the entity reference
5713 * must match that in an entity declaration, except that well-formed
5714 * documents need not declare any of the following entities: amp, lt,
5715 * gt, apos, quot. The declaration of a parameter entity must precede
5716 * any reference to it. Similarly, the declaration of a general entity
5717 * must precede any reference to it which appears in a default value in an
5718 * attribute-list declaration. Note that if entities are declared in the
5719 * external subset or in external parameter entities, a non-validating
5720 * processor is not obligated to read and process their declarations;
5721 * for such documents, the rule that an entity must be declared is a
5722 * well-formedness constraint only if standalone='yes'.
5723 *
5724 * [ WFC: Parsed Entity ]
5725 * An entity reference must not contain the name of an unparsed entity
5726 *
5727 * Returns the xmlEntityPtr if found, or NULL otherwise.
5728 */
5729xmlEntityPtr
5730xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5731 xmlChar *name;
5732 xmlEntityPtr ent = NULL;
5733
5734 GROW;
5735
5736 if (RAW == '&') {
5737 NEXT;
5738 name = xmlParseName(ctxt);
5739 if (name == NULL) {
5740 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5742 ctxt->sax->error(ctxt->userData,
5743 "xmlParseEntityRef: no name\n");
5744 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005745 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005746 } else {
5747 if (RAW == ';') {
5748 NEXT;
5749 /*
5750 * Ask first SAX for entity resolution, otherwise try the
5751 * predefined set.
5752 */
5753 if (ctxt->sax != NULL) {
5754 if (ctxt->sax->getEntity != NULL)
5755 ent = ctxt->sax->getEntity(ctxt->userData, name);
5756 if (ent == NULL)
5757 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005758 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5759 ent = getEntity(ctxt, name);
5760 }
Owen Taylor3473f882001-02-23 17:55:21 +00005761 }
5762 /*
5763 * [ WFC: Entity Declared ]
5764 * In a document without any DTD, a document with only an
5765 * internal DTD subset which contains no parameter entity
5766 * references, or a document with "standalone='yes'", the
5767 * Name given in the entity reference must match that in an
5768 * entity declaration, except that well-formed documents
5769 * need not declare any of the following entities: amp, lt,
5770 * gt, apos, quot.
5771 * The declaration of a parameter entity must precede any
5772 * reference to it.
5773 * Similarly, the declaration of a general entity must
5774 * precede any reference to it which appears in a default
5775 * value in an attribute-list declaration. Note that if
5776 * entities are declared in the external subset or in
5777 * external parameter entities, a non-validating processor
5778 * is not obligated to read and process their declarations;
5779 * for such documents, the rule that an entity must be
5780 * declared is a well-formedness constraint only if
5781 * standalone='yes'.
5782 */
5783 if (ent == NULL) {
5784 if ((ctxt->standalone == 1) ||
5785 ((ctxt->hasExternalSubset == 0) &&
5786 (ctxt->hasPErefs == 0))) {
5787 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5789 ctxt->sax->error(ctxt->userData,
5790 "Entity '%s' not defined\n", name);
5791 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005792 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005793 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005794 } else {
5795 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005797 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005798 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005799 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005800 }
5801 }
5802
5803 /*
5804 * [ WFC: Parsed Entity ]
5805 * An entity reference must not contain the name of an
5806 * unparsed entity
5807 */
5808 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5809 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5811 ctxt->sax->error(ctxt->userData,
5812 "Entity reference to unparsed entity %s\n", name);
5813 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005814 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005815 }
5816
5817 /*
5818 * [ WFC: No External Entity References ]
5819 * Attribute values cannot contain direct or indirect
5820 * entity references to external entities.
5821 */
5822 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5823 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5824 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5826 ctxt->sax->error(ctxt->userData,
5827 "Attribute references external entity '%s'\n", name);
5828 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005829 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005830 }
5831 /*
5832 * [ WFC: No < in Attribute Values ]
5833 * The replacement text of any entity referred to directly or
5834 * indirectly in an attribute value (other than "&lt;") must
5835 * not contain a <.
5836 */
5837 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5838 (ent != NULL) &&
5839 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5840 (ent->content != NULL) &&
5841 (xmlStrchr(ent->content, '<'))) {
5842 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5844 ctxt->sax->error(ctxt->userData,
5845 "'<' in entity '%s' is not allowed in attributes values\n", name);
5846 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005847 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005848 }
5849
5850 /*
5851 * Internal check, no parameter entities here ...
5852 */
5853 else {
5854 switch (ent->etype) {
5855 case XML_INTERNAL_PARAMETER_ENTITY:
5856 case XML_EXTERNAL_PARAMETER_ENTITY:
5857 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5859 ctxt->sax->error(ctxt->userData,
5860 "Attempt to reference the parameter entity '%s'\n", name);
5861 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005862 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005863 break;
5864 default:
5865 break;
5866 }
5867 }
5868
5869 /*
5870 * [ WFC: No Recursion ]
5871 * A parsed entity must not contain a recursive reference
5872 * to itself, either directly or indirectly.
5873 * Done somewhere else
5874 */
5875
5876 } else {
5877 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5879 ctxt->sax->error(ctxt->userData,
5880 "xmlParseEntityRef: expecting ';'\n");
5881 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005882 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005883 }
5884 xmlFree(name);
5885 }
5886 }
5887 return(ent);
5888}
5889
5890/**
5891 * xmlParseStringEntityRef:
5892 * @ctxt: an XML parser context
5893 * @str: a pointer to an index in the string
5894 *
5895 * parse ENTITY references declarations, but this version parses it from
5896 * a string value.
5897 *
5898 * [68] EntityRef ::= '&' Name ';'
5899 *
5900 * [ WFC: Entity Declared ]
5901 * In a document without any DTD, a document with only an internal DTD
5902 * subset which contains no parameter entity references, or a document
5903 * with "standalone='yes'", the Name given in the entity reference
5904 * must match that in an entity declaration, except that well-formed
5905 * documents need not declare any of the following entities: amp, lt,
5906 * gt, apos, quot. The declaration of a parameter entity must precede
5907 * any reference to it. Similarly, the declaration of a general entity
5908 * must precede any reference to it which appears in a default value in an
5909 * attribute-list declaration. Note that if entities are declared in the
5910 * external subset or in external parameter entities, a non-validating
5911 * processor is not obligated to read and process their declarations;
5912 * for such documents, the rule that an entity must be declared is a
5913 * well-formedness constraint only if standalone='yes'.
5914 *
5915 * [ WFC: Parsed Entity ]
5916 * An entity reference must not contain the name of an unparsed entity
5917 *
5918 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5919 * is updated to the current location in the string.
5920 */
5921xmlEntityPtr
5922xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5923 xmlChar *name;
5924 const xmlChar *ptr;
5925 xmlChar cur;
5926 xmlEntityPtr ent = NULL;
5927
5928 if ((str == NULL) || (*str == NULL))
5929 return(NULL);
5930 ptr = *str;
5931 cur = *ptr;
5932 if (cur == '&') {
5933 ptr++;
5934 cur = *ptr;
5935 name = xmlParseStringName(ctxt, &ptr);
5936 if (name == NULL) {
5937 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5939 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005940 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005941 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005942 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005943 } else {
5944 if (*ptr == ';') {
5945 ptr++;
5946 /*
5947 * Ask first SAX for entity resolution, otherwise try the
5948 * predefined set.
5949 */
5950 if (ctxt->sax != NULL) {
5951 if (ctxt->sax->getEntity != NULL)
5952 ent = ctxt->sax->getEntity(ctxt->userData, name);
5953 if (ent == NULL)
5954 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005955 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5956 ent = getEntity(ctxt, name);
5957 }
Owen Taylor3473f882001-02-23 17:55:21 +00005958 }
5959 /*
5960 * [ WFC: Entity Declared ]
5961 * In a document without any DTD, a document with only an
5962 * internal DTD subset which contains no parameter entity
5963 * references, or a document with "standalone='yes'", the
5964 * Name given in the entity reference must match that in an
5965 * entity declaration, except that well-formed documents
5966 * need not declare any of the following entities: amp, lt,
5967 * gt, apos, quot.
5968 * The declaration of a parameter entity must precede any
5969 * reference to it.
5970 * Similarly, the declaration of a general entity must
5971 * precede any reference to it which appears in a default
5972 * value in an attribute-list declaration. Note that if
5973 * entities are declared in the external subset or in
5974 * external parameter entities, a non-validating processor
5975 * is not obligated to read and process their declarations;
5976 * for such documents, the rule that an entity must be
5977 * declared is a well-formedness constraint only if
5978 * standalone='yes'.
5979 */
5980 if (ent == NULL) {
5981 if ((ctxt->standalone == 1) ||
5982 ((ctxt->hasExternalSubset == 0) &&
5983 (ctxt->hasPErefs == 0))) {
5984 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5986 ctxt->sax->error(ctxt->userData,
5987 "Entity '%s' not defined\n", name);
5988 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005989 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005990 } else {
5991 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5992 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5993 ctxt->sax->warning(ctxt->userData,
5994 "Entity '%s' not defined\n", name);
5995 }
5996 }
5997
5998 /*
5999 * [ WFC: Parsed Entity ]
6000 * An entity reference must not contain the name of an
6001 * unparsed entity
6002 */
6003 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6004 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6006 ctxt->sax->error(ctxt->userData,
6007 "Entity reference to unparsed entity %s\n", name);
6008 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006009 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006010 }
6011
6012 /*
6013 * [ WFC: No External Entity References ]
6014 * Attribute values cannot contain direct or indirect
6015 * entity references to external entities.
6016 */
6017 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6018 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6019 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6021 ctxt->sax->error(ctxt->userData,
6022 "Attribute references external entity '%s'\n", name);
6023 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006024 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006025 }
6026 /*
6027 * [ WFC: No < in Attribute Values ]
6028 * The replacement text of any entity referred to directly or
6029 * indirectly in an attribute value (other than "&lt;") must
6030 * not contain a <.
6031 */
6032 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6033 (ent != NULL) &&
6034 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6035 (ent->content != NULL) &&
6036 (xmlStrchr(ent->content, '<'))) {
6037 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6039 ctxt->sax->error(ctxt->userData,
6040 "'<' in entity '%s' is not allowed in attributes values\n", name);
6041 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006042 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006043 }
6044
6045 /*
6046 * Internal check, no parameter entities here ...
6047 */
6048 else {
6049 switch (ent->etype) {
6050 case XML_INTERNAL_PARAMETER_ENTITY:
6051 case XML_EXTERNAL_PARAMETER_ENTITY:
6052 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6054 ctxt->sax->error(ctxt->userData,
6055 "Attempt to reference the parameter entity '%s'\n", name);
6056 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006058 break;
6059 default:
6060 break;
6061 }
6062 }
6063
6064 /*
6065 * [ WFC: No Recursion ]
6066 * A parsed entity must not contain a recursive reference
6067 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006068 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006069 */
6070
6071 } else {
6072 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6074 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006075 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006076 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006077 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006078 }
6079 xmlFree(name);
6080 }
6081 }
6082 *str = ptr;
6083 return(ent);
6084}
6085
6086/**
6087 * xmlParsePEReference:
6088 * @ctxt: an XML parser context
6089 *
6090 * parse PEReference declarations
6091 * The entity content is handled directly by pushing it's content as
6092 * a new input stream.
6093 *
6094 * [69] PEReference ::= '%' Name ';'
6095 *
6096 * [ WFC: No Recursion ]
6097 * A parsed entity must not contain a recursive
6098 * reference to itself, either directly or indirectly.
6099 *
6100 * [ WFC: Entity Declared ]
6101 * In a document without any DTD, a document with only an internal DTD
6102 * subset which contains no parameter entity references, or a document
6103 * with "standalone='yes'", ... ... The declaration of a parameter
6104 * entity must precede any reference to it...
6105 *
6106 * [ VC: Entity Declared ]
6107 * In a document with an external subset or external parameter entities
6108 * with "standalone='no'", ... ... The declaration of a parameter entity
6109 * must precede any reference to it...
6110 *
6111 * [ WFC: In DTD ]
6112 * Parameter-entity references may only appear in the DTD.
6113 * NOTE: misleading but this is handled.
6114 */
6115void
6116xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6117 xmlChar *name;
6118 xmlEntityPtr entity = NULL;
6119 xmlParserInputPtr input;
6120
6121 if (RAW == '%') {
6122 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006123 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006124 if (name == NULL) {
6125 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6127 ctxt->sax->error(ctxt->userData,
6128 "xmlParsePEReference: no name\n");
6129 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006130 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006131 } else {
6132 if (RAW == ';') {
6133 NEXT;
6134 if ((ctxt->sax != NULL) &&
6135 (ctxt->sax->getParameterEntity != NULL))
6136 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6137 name);
6138 if (entity == NULL) {
6139 /*
6140 * [ WFC: Entity Declared ]
6141 * In a document without any DTD, a document with only an
6142 * internal DTD subset which contains no parameter entity
6143 * references, or a document with "standalone='yes'", ...
6144 * ... The declaration of a parameter entity must precede
6145 * any reference to it...
6146 */
6147 if ((ctxt->standalone == 1) ||
6148 ((ctxt->hasExternalSubset == 0) &&
6149 (ctxt->hasPErefs == 0))) {
6150 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6151 if ((!ctxt->disableSAX) &&
6152 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6153 ctxt->sax->error(ctxt->userData,
6154 "PEReference: %%%s; not found\n", name);
6155 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006156 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006157 } else {
6158 /*
6159 * [ VC: Entity Declared ]
6160 * In a document with an external subset or external
6161 * parameter entities with "standalone='no'", ...
6162 * ... The declaration of a parameter entity must precede
6163 * any reference to it...
6164 */
6165 if ((!ctxt->disableSAX) &&
6166 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6167 ctxt->sax->warning(ctxt->userData,
6168 "PEReference: %%%s; not found\n", name);
6169 ctxt->valid = 0;
6170 }
6171 } else {
6172 /*
6173 * Internal checking in case the entity quest barfed
6174 */
6175 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6176 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6177 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6178 ctxt->sax->warning(ctxt->userData,
6179 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006180 } else if (ctxt->input->free != deallocblankswrapper) {
6181 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6182 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006183 } else {
6184 /*
6185 * TODO !!!
6186 * handle the extra spaces added before and after
6187 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6188 */
6189 input = xmlNewEntityInputStream(ctxt, entity);
6190 xmlPushInput(ctxt, input);
6191 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6192 (RAW == '<') && (NXT(1) == '?') &&
6193 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6194 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6195 xmlParseTextDecl(ctxt);
6196 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6197 /*
6198 * The XML REC instructs us to stop parsing
6199 * right here
6200 */
6201 ctxt->instate = XML_PARSER_EOF;
6202 xmlFree(name);
6203 return;
6204 }
6205 }
Owen Taylor3473f882001-02-23 17:55:21 +00006206 }
6207 }
6208 ctxt->hasPErefs = 1;
6209 } else {
6210 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6212 ctxt->sax->error(ctxt->userData,
6213 "xmlParsePEReference: expecting ';'\n");
6214 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006215 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006216 }
6217 xmlFree(name);
6218 }
6219 }
6220}
6221
6222/**
6223 * xmlParseStringPEReference:
6224 * @ctxt: an XML parser context
6225 * @str: a pointer to an index in the string
6226 *
6227 * parse PEReference declarations
6228 *
6229 * [69] PEReference ::= '%' Name ';'
6230 *
6231 * [ WFC: No Recursion ]
6232 * A parsed entity must not contain a recursive
6233 * reference to itself, either directly or indirectly.
6234 *
6235 * [ WFC: Entity Declared ]
6236 * In a document without any DTD, a document with only an internal DTD
6237 * subset which contains no parameter entity references, or a document
6238 * with "standalone='yes'", ... ... The declaration of a parameter
6239 * entity must precede any reference to it...
6240 *
6241 * [ VC: Entity Declared ]
6242 * In a document with an external subset or external parameter entities
6243 * with "standalone='no'", ... ... The declaration of a parameter entity
6244 * must precede any reference to it...
6245 *
6246 * [ WFC: In DTD ]
6247 * Parameter-entity references may only appear in the DTD.
6248 * NOTE: misleading but this is handled.
6249 *
6250 * Returns the string of the entity content.
6251 * str is updated to the current value of the index
6252 */
6253xmlEntityPtr
6254xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6255 const xmlChar *ptr;
6256 xmlChar cur;
6257 xmlChar *name;
6258 xmlEntityPtr entity = NULL;
6259
6260 if ((str == NULL) || (*str == NULL)) return(NULL);
6261 ptr = *str;
6262 cur = *ptr;
6263 if (cur == '%') {
6264 ptr++;
6265 cur = *ptr;
6266 name = xmlParseStringName(ctxt, &ptr);
6267 if (name == NULL) {
6268 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6270 ctxt->sax->error(ctxt->userData,
6271 "xmlParseStringPEReference: no name\n");
6272 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006273 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006274 } else {
6275 cur = *ptr;
6276 if (cur == ';') {
6277 ptr++;
6278 cur = *ptr;
6279 if ((ctxt->sax != NULL) &&
6280 (ctxt->sax->getParameterEntity != NULL))
6281 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6282 name);
6283 if (entity == NULL) {
6284 /*
6285 * [ WFC: Entity Declared ]
6286 * In a document without any DTD, a document with only an
6287 * internal DTD subset which contains no parameter entity
6288 * references, or a document with "standalone='yes'", ...
6289 * ... The declaration of a parameter entity must precede
6290 * any reference to it...
6291 */
6292 if ((ctxt->standalone == 1) ||
6293 ((ctxt->hasExternalSubset == 0) &&
6294 (ctxt->hasPErefs == 0))) {
6295 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6296 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6297 ctxt->sax->error(ctxt->userData,
6298 "PEReference: %%%s; not found\n", name);
6299 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006300 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006301 } else {
6302 /*
6303 * [ VC: Entity Declared ]
6304 * In a document with an external subset or external
6305 * parameter entities with "standalone='no'", ...
6306 * ... The declaration of a parameter entity must
6307 * precede any reference to it...
6308 */
6309 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6310 ctxt->sax->warning(ctxt->userData,
6311 "PEReference: %%%s; not found\n", name);
6312 ctxt->valid = 0;
6313 }
6314 } else {
6315 /*
6316 * Internal checking in case the entity quest barfed
6317 */
6318 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6319 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6320 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6321 ctxt->sax->warning(ctxt->userData,
6322 "Internal: %%%s; is not a parameter entity\n", name);
6323 }
6324 }
6325 ctxt->hasPErefs = 1;
6326 } else {
6327 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6329 ctxt->sax->error(ctxt->userData,
6330 "xmlParseStringPEReference: expecting ';'\n");
6331 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006332 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006333 }
6334 xmlFree(name);
6335 }
6336 }
6337 *str = ptr;
6338 return(entity);
6339}
6340
6341/**
6342 * xmlParseDocTypeDecl:
6343 * @ctxt: an XML parser context
6344 *
6345 * parse a DOCTYPE declaration
6346 *
6347 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6348 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6349 *
6350 * [ VC: Root Element Type ]
6351 * The Name in the document type declaration must match the element
6352 * type of the root element.
6353 */
6354
6355void
6356xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6357 xmlChar *name = NULL;
6358 xmlChar *ExternalID = NULL;
6359 xmlChar *URI = NULL;
6360
6361 /*
6362 * We know that '<!DOCTYPE' has been detected.
6363 */
6364 SKIP(9);
6365
6366 SKIP_BLANKS;
6367
6368 /*
6369 * Parse the DOCTYPE name.
6370 */
6371 name = xmlParseName(ctxt);
6372 if (name == NULL) {
6373 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6375 ctxt->sax->error(ctxt->userData,
6376 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6377 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006378 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006379 }
6380 ctxt->intSubName = name;
6381
6382 SKIP_BLANKS;
6383
6384 /*
6385 * Check for SystemID and ExternalID
6386 */
6387 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6388
6389 if ((URI != NULL) || (ExternalID != NULL)) {
6390 ctxt->hasExternalSubset = 1;
6391 }
6392 ctxt->extSubURI = URI;
6393 ctxt->extSubSystem = ExternalID;
6394
6395 SKIP_BLANKS;
6396
6397 /*
6398 * Create and update the internal subset.
6399 */
6400 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6401 (!ctxt->disableSAX))
6402 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6403
6404 /*
6405 * Is there any internal subset declarations ?
6406 * they are handled separately in xmlParseInternalSubset()
6407 */
6408 if (RAW == '[')
6409 return;
6410
6411 /*
6412 * We should be at the end of the DOCTYPE declaration.
6413 */
6414 if (RAW != '>') {
6415 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006417 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006418 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006419 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006420 }
6421 NEXT;
6422}
6423
6424/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006425 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006426 * @ctxt: an XML parser context
6427 *
6428 * parse the internal subset declaration
6429 *
6430 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6431 */
6432
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006433static void
Owen Taylor3473f882001-02-23 17:55:21 +00006434xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6435 /*
6436 * Is there any DTD definition ?
6437 */
6438 if (RAW == '[') {
6439 ctxt->instate = XML_PARSER_DTD;
6440 NEXT;
6441 /*
6442 * Parse the succession of Markup declarations and
6443 * PEReferences.
6444 * Subsequence (markupdecl | PEReference | S)*
6445 */
6446 while (RAW != ']') {
6447 const xmlChar *check = CUR_PTR;
6448 int cons = ctxt->input->consumed;
6449
6450 SKIP_BLANKS;
6451 xmlParseMarkupDecl(ctxt);
6452 xmlParsePEReference(ctxt);
6453
6454 /*
6455 * Pop-up of finished entities.
6456 */
6457 while ((RAW == 0) && (ctxt->inputNr > 1))
6458 xmlPopInput(ctxt);
6459
6460 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6461 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6463 ctxt->sax->error(ctxt->userData,
6464 "xmlParseInternalSubset: error detected in Markup declaration\n");
6465 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006466 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006467 break;
6468 }
6469 }
6470 if (RAW == ']') {
6471 NEXT;
6472 SKIP_BLANKS;
6473 }
6474 }
6475
6476 /*
6477 * We should be at the end of the DOCTYPE declaration.
6478 */
6479 if (RAW != '>') {
6480 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006482 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006483 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006484 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006485 }
6486 NEXT;
6487}
6488
6489/**
6490 * xmlParseAttribute:
6491 * @ctxt: an XML parser context
6492 * @value: a xmlChar ** used to store the value of the attribute
6493 *
6494 * parse an attribute
6495 *
6496 * [41] Attribute ::= Name Eq AttValue
6497 *
6498 * [ WFC: No External Entity References ]
6499 * Attribute values cannot contain direct or indirect entity references
6500 * to external entities.
6501 *
6502 * [ WFC: No < in Attribute Values ]
6503 * The replacement text of any entity referred to directly or indirectly in
6504 * an attribute value (other than "&lt;") must not contain a <.
6505 *
6506 * [ VC: Attribute Value Type ]
6507 * The attribute must have been declared; the value must be of the type
6508 * declared for it.
6509 *
6510 * [25] Eq ::= S? '=' S?
6511 *
6512 * With namespace:
6513 *
6514 * [NS 11] Attribute ::= QName Eq AttValue
6515 *
6516 * Also the case QName == xmlns:??? is handled independently as a namespace
6517 * definition.
6518 *
6519 * Returns the attribute name, and the value in *value.
6520 */
6521
6522xmlChar *
6523xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6524 xmlChar *name, *val;
6525
6526 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006527 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006528 name = xmlParseName(ctxt);
6529 if (name == NULL) {
6530 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6532 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6533 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006534 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006535 return(NULL);
6536 }
6537
6538 /*
6539 * read the value
6540 */
6541 SKIP_BLANKS;
6542 if (RAW == '=') {
6543 NEXT;
6544 SKIP_BLANKS;
6545 val = xmlParseAttValue(ctxt);
6546 ctxt->instate = XML_PARSER_CONTENT;
6547 } else {
6548 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6550 ctxt->sax->error(ctxt->userData,
6551 "Specification mandate value for attribute %s\n", name);
6552 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006553 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006554 xmlFree(name);
6555 return(NULL);
6556 }
6557
6558 /*
6559 * Check that xml:lang conforms to the specification
6560 * No more registered as an error, just generate a warning now
6561 * since this was deprecated in XML second edition
6562 */
6563 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6564 if (!xmlCheckLanguageID(val)) {
6565 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6566 ctxt->sax->warning(ctxt->userData,
6567 "Malformed value for xml:lang : %s\n", val);
6568 }
6569 }
6570
6571 /*
6572 * Check that xml:space conforms to the specification
6573 */
6574 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6575 if (xmlStrEqual(val, BAD_CAST "default"))
6576 *(ctxt->space) = 0;
6577 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6578 *(ctxt->space) = 1;
6579 else {
6580 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6582 ctxt->sax->error(ctxt->userData,
6583"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6584 val);
6585 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006586 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006587 }
6588 }
6589
6590 *value = val;
6591 return(name);
6592}
6593
6594/**
6595 * xmlParseStartTag:
6596 * @ctxt: an XML parser context
6597 *
6598 * parse a start of tag either for rule element or
6599 * EmptyElement. In both case we don't parse the tag closing chars.
6600 *
6601 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6602 *
6603 * [ WFC: Unique Att Spec ]
6604 * No attribute name may appear more than once in the same start-tag or
6605 * empty-element tag.
6606 *
6607 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6608 *
6609 * [ WFC: Unique Att Spec ]
6610 * No attribute name may appear more than once in the same start-tag or
6611 * empty-element tag.
6612 *
6613 * With namespace:
6614 *
6615 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6616 *
6617 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6618 *
6619 * Returns the element name parsed
6620 */
6621
6622xmlChar *
6623xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6624 xmlChar *name;
6625 xmlChar *attname;
6626 xmlChar *attvalue;
6627 const xmlChar **atts = NULL;
6628 int nbatts = 0;
6629 int maxatts = 0;
6630 int i;
6631
6632 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006633 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006634
6635 name = xmlParseName(ctxt);
6636 if (name == NULL) {
6637 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6639 ctxt->sax->error(ctxt->userData,
6640 "xmlParseStartTag: invalid element name\n");
6641 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006642 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006643 return(NULL);
6644 }
6645
6646 /*
6647 * Now parse the attributes, it ends up with the ending
6648 *
6649 * (S Attribute)* S?
6650 */
6651 SKIP_BLANKS;
6652 GROW;
6653
Daniel Veillard21a0f912001-02-25 19:54:14 +00006654 while ((RAW != '>') &&
6655 ((RAW != '/') || (NXT(1) != '>')) &&
6656 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006657 const xmlChar *q = CUR_PTR;
6658 int cons = ctxt->input->consumed;
6659
6660 attname = xmlParseAttribute(ctxt, &attvalue);
6661 if ((attname != NULL) && (attvalue != NULL)) {
6662 /*
6663 * [ WFC: Unique Att Spec ]
6664 * No attribute name may appear more than once in the same
6665 * start-tag or empty-element tag.
6666 */
6667 for (i = 0; i < nbatts;i += 2) {
6668 if (xmlStrEqual(atts[i], attname)) {
6669 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6671 ctxt->sax->error(ctxt->userData,
6672 "Attribute %s redefined\n",
6673 attname);
6674 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006675 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006676 xmlFree(attname);
6677 xmlFree(attvalue);
6678 goto failed;
6679 }
6680 }
6681
6682 /*
6683 * Add the pair to atts
6684 */
6685 if (atts == NULL) {
6686 maxatts = 10;
6687 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6688 if (atts == NULL) {
6689 xmlGenericError(xmlGenericErrorContext,
6690 "malloc of %ld byte failed\n",
6691 maxatts * (long)sizeof(xmlChar *));
6692 return(NULL);
6693 }
6694 } else if (nbatts + 4 > maxatts) {
6695 maxatts *= 2;
6696 atts = (const xmlChar **) xmlRealloc((void *) atts,
6697 maxatts * sizeof(xmlChar *));
6698 if (atts == NULL) {
6699 xmlGenericError(xmlGenericErrorContext,
6700 "realloc of %ld byte failed\n",
6701 maxatts * (long)sizeof(xmlChar *));
6702 return(NULL);
6703 }
6704 }
6705 atts[nbatts++] = attname;
6706 atts[nbatts++] = attvalue;
6707 atts[nbatts] = NULL;
6708 atts[nbatts + 1] = NULL;
6709 } else {
6710 if (attname != NULL)
6711 xmlFree(attname);
6712 if (attvalue != NULL)
6713 xmlFree(attvalue);
6714 }
6715
6716failed:
6717
Daniel Veillard3772de32002-12-17 10:31:45 +00006718 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006719 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6720 break;
6721 if (!IS_BLANK(RAW)) {
6722 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6724 ctxt->sax->error(ctxt->userData,
6725 "attributes construct error\n");
6726 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006727 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006728 }
6729 SKIP_BLANKS;
6730 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6731 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6733 ctxt->sax->error(ctxt->userData,
6734 "xmlParseStartTag: problem parsing attributes\n");
6735 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006736 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006737 break;
6738 }
6739 GROW;
6740 }
6741
6742 /*
6743 * SAX: Start of Element !
6744 */
6745 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6746 (!ctxt->disableSAX))
6747 ctxt->sax->startElement(ctxt->userData, name, atts);
6748
6749 if (atts != NULL) {
6750 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6751 xmlFree((void *) atts);
6752 }
6753 return(name);
6754}
6755
6756/**
6757 * xmlParseEndTag:
6758 * @ctxt: an XML parser context
6759 *
6760 * parse an end of tag
6761 *
6762 * [42] ETag ::= '</' Name S? '>'
6763 *
6764 * With namespace
6765 *
6766 * [NS 9] ETag ::= '</' QName S? '>'
6767 */
6768
6769void
6770xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6771 xmlChar *name;
6772 xmlChar *oldname;
6773
6774 GROW;
6775 if ((RAW != '<') || (NXT(1) != '/')) {
6776 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6778 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6779 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006780 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006781 return;
6782 }
6783 SKIP(2);
6784
Daniel Veillard46de64e2002-05-29 08:21:33 +00006785 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006786
6787 /*
6788 * We should definitely be at the ending "S? '>'" part
6789 */
6790 GROW;
6791 SKIP_BLANKS;
6792 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6793 ctxt->errNo = XML_ERR_GT_REQUIRED;
6794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6795 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6796 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006797 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006798 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006799 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006800
6801 /*
6802 * [ WFC: Element Type Match ]
6803 * The Name in an element's end-tag must match the element type in the
6804 * start-tag.
6805 *
6806 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006807 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006808 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006810 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006811 ctxt->sax->error(ctxt->userData,
6812 "Opening and ending tag mismatch: %s and %s\n",
6813 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006814 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006815 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006816 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006817 }
6818
6819 }
6820 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006821 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6822#if 0
6823 else {
6824 /*
6825 * Recover in case of one missing close
6826 */
6827 if ((ctxt->nameNr > 2) &&
6828 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6829 namePop(ctxt);
6830 spacePop(ctxt);
6831 }
6832 }
6833#endif
6834 if (name != NULL)
6835 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006836 }
6837
6838 /*
6839 * SAX: End of Tag
6840 */
6841 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6842 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006843 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006844
Owen Taylor3473f882001-02-23 17:55:21 +00006845 oldname = namePop(ctxt);
6846 spacePop(ctxt);
6847 if (oldname != NULL) {
6848#ifdef DEBUG_STACK
6849 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6850#endif
6851 xmlFree(oldname);
6852 }
6853 return;
6854}
6855
6856/**
6857 * xmlParseCDSect:
6858 * @ctxt: an XML parser context
6859 *
6860 * Parse escaped pure raw content.
6861 *
6862 * [18] CDSect ::= CDStart CData CDEnd
6863 *
6864 * [19] CDStart ::= '<![CDATA['
6865 *
6866 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6867 *
6868 * [21] CDEnd ::= ']]>'
6869 */
6870void
6871xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6872 xmlChar *buf = NULL;
6873 int len = 0;
6874 int size = XML_PARSER_BUFFER_SIZE;
6875 int r, rl;
6876 int s, sl;
6877 int cur, l;
6878 int count = 0;
6879
6880 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6881 (NXT(2) == '[') && (NXT(3) == 'C') &&
6882 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6883 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6884 (NXT(8) == '[')) {
6885 SKIP(9);
6886 } else
6887 return;
6888
6889 ctxt->instate = XML_PARSER_CDATA_SECTION;
6890 r = CUR_CHAR(rl);
6891 if (!IS_CHAR(r)) {
6892 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6893 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6894 ctxt->sax->error(ctxt->userData,
6895 "CData section not finished\n");
6896 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006897 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006898 ctxt->instate = XML_PARSER_CONTENT;
6899 return;
6900 }
6901 NEXTL(rl);
6902 s = CUR_CHAR(sl);
6903 if (!IS_CHAR(s)) {
6904 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6906 ctxt->sax->error(ctxt->userData,
6907 "CData section not finished\n");
6908 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006909 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006910 ctxt->instate = XML_PARSER_CONTENT;
6911 return;
6912 }
6913 NEXTL(sl);
6914 cur = CUR_CHAR(l);
6915 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6916 if (buf == NULL) {
6917 xmlGenericError(xmlGenericErrorContext,
6918 "malloc of %d byte failed\n", size);
6919 return;
6920 }
6921 while (IS_CHAR(cur) &&
6922 ((r != ']') || (s != ']') || (cur != '>'))) {
6923 if (len + 5 >= size) {
6924 size *= 2;
6925 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6926 if (buf == NULL) {
6927 xmlGenericError(xmlGenericErrorContext,
6928 "realloc of %d byte failed\n", size);
6929 return;
6930 }
6931 }
6932 COPY_BUF(rl,buf,len,r);
6933 r = s;
6934 rl = sl;
6935 s = cur;
6936 sl = l;
6937 count++;
6938 if (count > 50) {
6939 GROW;
6940 count = 0;
6941 }
6942 NEXTL(l);
6943 cur = CUR_CHAR(l);
6944 }
6945 buf[len] = 0;
6946 ctxt->instate = XML_PARSER_CONTENT;
6947 if (cur != '>') {
6948 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6950 ctxt->sax->error(ctxt->userData,
6951 "CData section not finished\n%.50s\n", buf);
6952 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006953 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006954 xmlFree(buf);
6955 return;
6956 }
6957 NEXTL(l);
6958
6959 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006960 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006961 */
6962 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6963 if (ctxt->sax->cdataBlock != NULL)
6964 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006965 else if (ctxt->sax->characters != NULL)
6966 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006967 }
6968 xmlFree(buf);
6969}
6970
6971/**
6972 * xmlParseContent:
6973 * @ctxt: an XML parser context
6974 *
6975 * Parse a content:
6976 *
6977 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6978 */
6979
6980void
6981xmlParseContent(xmlParserCtxtPtr ctxt) {
6982 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006983 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006984 ((RAW != '<') || (NXT(1) != '/'))) {
6985 const xmlChar *test = CUR_PTR;
6986 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006987 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006988
6989 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006990 * First case : a Processing Instruction.
6991 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006992 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006993 xmlParsePI(ctxt);
6994 }
6995
6996 /*
6997 * Second case : a CDSection
6998 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006999 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007000 (NXT(2) == '[') && (NXT(3) == 'C') &&
7001 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7002 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7003 (NXT(8) == '[')) {
7004 xmlParseCDSect(ctxt);
7005 }
7006
7007 /*
7008 * Third case : a comment
7009 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007010 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007011 (NXT(2) == '-') && (NXT(3) == '-')) {
7012 xmlParseComment(ctxt);
7013 ctxt->instate = XML_PARSER_CONTENT;
7014 }
7015
7016 /*
7017 * Fourth case : a sub-element.
7018 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007019 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007020 xmlParseElement(ctxt);
7021 }
7022
7023 /*
7024 * Fifth case : a reference. If if has not been resolved,
7025 * parsing returns it's Name, create the node
7026 */
7027
Daniel Veillard21a0f912001-02-25 19:54:14 +00007028 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007029 xmlParseReference(ctxt);
7030 }
7031
7032 /*
7033 * Last case, text. Note that References are handled directly.
7034 */
7035 else {
7036 xmlParseCharData(ctxt, 0);
7037 }
7038
7039 GROW;
7040 /*
7041 * Pop-up of finished entities.
7042 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007043 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007044 xmlPopInput(ctxt);
7045 SHRINK;
7046
Daniel Veillardfdc91562002-07-01 21:52:03 +00007047 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007048 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7050 ctxt->sax->error(ctxt->userData,
7051 "detected an error in element content\n");
7052 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007053 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007054 ctxt->instate = XML_PARSER_EOF;
7055 break;
7056 }
7057 }
7058}
7059
7060/**
7061 * xmlParseElement:
7062 * @ctxt: an XML parser context
7063 *
7064 * parse an XML element, this is highly recursive
7065 *
7066 * [39] element ::= EmptyElemTag | STag content ETag
7067 *
7068 * [ WFC: Element Type Match ]
7069 * The Name in an element's end-tag must match the element type in the
7070 * start-tag.
7071 *
7072 * [ VC: Element Valid ]
7073 * An element is valid if there is a declaration matching elementdecl
7074 * where the Name matches the element type and one of the following holds:
7075 * - The declaration matches EMPTY and the element has no content.
7076 * - The declaration matches children and the sequence of child elements
7077 * belongs to the language generated by the regular expression in the
7078 * content model, with optional white space (characters matching the
7079 * nonterminal S) between each pair of child elements.
7080 * - The declaration matches Mixed and the content consists of character
7081 * data and child elements whose types match names in the content model.
7082 * - The declaration matches ANY, and the types of any child elements have
7083 * been declared.
7084 */
7085
7086void
7087xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007088 xmlChar *name;
7089 xmlChar *oldname;
7090 xmlParserNodeInfo node_info;
7091 xmlNodePtr ret;
7092
7093 /* Capture start position */
7094 if (ctxt->record_info) {
7095 node_info.begin_pos = ctxt->input->consumed +
7096 (CUR_PTR - ctxt->input->base);
7097 node_info.begin_line = ctxt->input->line;
7098 }
7099
7100 if (ctxt->spaceNr == 0)
7101 spacePush(ctxt, -1);
7102 else
7103 spacePush(ctxt, *ctxt->space);
7104
7105 name = xmlParseStartTag(ctxt);
7106 if (name == NULL) {
7107 spacePop(ctxt);
7108 return;
7109 }
7110 namePush(ctxt, name);
7111 ret = ctxt->node;
7112
7113 /*
7114 * [ VC: Root Element Type ]
7115 * The Name in the document type declaration must match the element
7116 * type of the root element.
7117 */
7118 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7119 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7120 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7121
7122 /*
7123 * Check for an Empty Element.
7124 */
7125 if ((RAW == '/') && (NXT(1) == '>')) {
7126 SKIP(2);
7127 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7128 (!ctxt->disableSAX))
7129 ctxt->sax->endElement(ctxt->userData, name);
7130 oldname = namePop(ctxt);
7131 spacePop(ctxt);
7132 if (oldname != NULL) {
7133#ifdef DEBUG_STACK
7134 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7135#endif
7136 xmlFree(oldname);
7137 }
7138 if ( ret != NULL && ctxt->record_info ) {
7139 node_info.end_pos = ctxt->input->consumed +
7140 (CUR_PTR - ctxt->input->base);
7141 node_info.end_line = ctxt->input->line;
7142 node_info.node = ret;
7143 xmlParserAddNodeInfo(ctxt, &node_info);
7144 }
7145 return;
7146 }
7147 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007148 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007149 } else {
7150 ctxt->errNo = XML_ERR_GT_REQUIRED;
7151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7152 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007153 "Couldn't find end of Start Tag %s\n",
7154 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007155 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007156 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007157
7158 /*
7159 * end of parsing of this node.
7160 */
7161 nodePop(ctxt);
7162 oldname = namePop(ctxt);
7163 spacePop(ctxt);
7164 if (oldname != NULL) {
7165#ifdef DEBUG_STACK
7166 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7167#endif
7168 xmlFree(oldname);
7169 }
7170
7171 /*
7172 * Capture end position and add node
7173 */
7174 if ( ret != NULL && ctxt->record_info ) {
7175 node_info.end_pos = ctxt->input->consumed +
7176 (CUR_PTR - ctxt->input->base);
7177 node_info.end_line = ctxt->input->line;
7178 node_info.node = ret;
7179 xmlParserAddNodeInfo(ctxt, &node_info);
7180 }
7181 return;
7182 }
7183
7184 /*
7185 * Parse the content of the element:
7186 */
7187 xmlParseContent(ctxt);
7188 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007189 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7191 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007192 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007193 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007194 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007195
7196 /*
7197 * end of parsing of this node.
7198 */
7199 nodePop(ctxt);
7200 oldname = namePop(ctxt);
7201 spacePop(ctxt);
7202 if (oldname != NULL) {
7203#ifdef DEBUG_STACK
7204 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7205#endif
7206 xmlFree(oldname);
7207 }
7208 return;
7209 }
7210
7211 /*
7212 * parse the end of tag: '</' should be here.
7213 */
7214 xmlParseEndTag(ctxt);
7215
7216 /*
7217 * Capture end position and add node
7218 */
7219 if ( ret != NULL && ctxt->record_info ) {
7220 node_info.end_pos = ctxt->input->consumed +
7221 (CUR_PTR - ctxt->input->base);
7222 node_info.end_line = ctxt->input->line;
7223 node_info.node = ret;
7224 xmlParserAddNodeInfo(ctxt, &node_info);
7225 }
7226}
7227
7228/**
7229 * xmlParseVersionNum:
7230 * @ctxt: an XML parser context
7231 *
7232 * parse the XML version value.
7233 *
7234 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7235 *
7236 * Returns the string giving the XML version number, or NULL
7237 */
7238xmlChar *
7239xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7240 xmlChar *buf = NULL;
7241 int len = 0;
7242 int size = 10;
7243 xmlChar cur;
7244
7245 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7246 if (buf == NULL) {
7247 xmlGenericError(xmlGenericErrorContext,
7248 "malloc of %d byte failed\n", size);
7249 return(NULL);
7250 }
7251 cur = CUR;
7252 while (((cur >= 'a') && (cur <= 'z')) ||
7253 ((cur >= 'A') && (cur <= 'Z')) ||
7254 ((cur >= '0') && (cur <= '9')) ||
7255 (cur == '_') || (cur == '.') ||
7256 (cur == ':') || (cur == '-')) {
7257 if (len + 1 >= size) {
7258 size *= 2;
7259 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7260 if (buf == NULL) {
7261 xmlGenericError(xmlGenericErrorContext,
7262 "realloc of %d byte failed\n", size);
7263 return(NULL);
7264 }
7265 }
7266 buf[len++] = cur;
7267 NEXT;
7268 cur=CUR;
7269 }
7270 buf[len] = 0;
7271 return(buf);
7272}
7273
7274/**
7275 * xmlParseVersionInfo:
7276 * @ctxt: an XML parser context
7277 *
7278 * parse the XML version.
7279 *
7280 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7281 *
7282 * [25] Eq ::= S? '=' S?
7283 *
7284 * Returns the version string, e.g. "1.0"
7285 */
7286
7287xmlChar *
7288xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7289 xmlChar *version = NULL;
7290 const xmlChar *q;
7291
7292 if ((RAW == 'v') && (NXT(1) == 'e') &&
7293 (NXT(2) == 'r') && (NXT(3) == 's') &&
7294 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7295 (NXT(6) == 'n')) {
7296 SKIP(7);
7297 SKIP_BLANKS;
7298 if (RAW != '=') {
7299 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7301 ctxt->sax->error(ctxt->userData,
7302 "xmlParseVersionInfo : expected '='\n");
7303 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007305 return(NULL);
7306 }
7307 NEXT;
7308 SKIP_BLANKS;
7309 if (RAW == '"') {
7310 NEXT;
7311 q = CUR_PTR;
7312 version = xmlParseVersionNum(ctxt);
7313 if (RAW != '"') {
7314 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7316 ctxt->sax->error(ctxt->userData,
7317 "String not closed\n%.50s\n", q);
7318 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007319 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007320 } else
7321 NEXT;
7322 } else if (RAW == '\''){
7323 NEXT;
7324 q = CUR_PTR;
7325 version = xmlParseVersionNum(ctxt);
7326 if (RAW != '\'') {
7327 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7329 ctxt->sax->error(ctxt->userData,
7330 "String not closed\n%.50s\n", q);
7331 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007332 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007333 } else
7334 NEXT;
7335 } else {
7336 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7338 ctxt->sax->error(ctxt->userData,
7339 "xmlParseVersionInfo : expected ' or \"\n");
7340 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007341 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007342 }
7343 }
7344 return(version);
7345}
7346
7347/**
7348 * xmlParseEncName:
7349 * @ctxt: an XML parser context
7350 *
7351 * parse the XML encoding name
7352 *
7353 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7354 *
7355 * Returns the encoding name value or NULL
7356 */
7357xmlChar *
7358xmlParseEncName(xmlParserCtxtPtr ctxt) {
7359 xmlChar *buf = NULL;
7360 int len = 0;
7361 int size = 10;
7362 xmlChar cur;
7363
7364 cur = CUR;
7365 if (((cur >= 'a') && (cur <= 'z')) ||
7366 ((cur >= 'A') && (cur <= 'Z'))) {
7367 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7368 if (buf == NULL) {
7369 xmlGenericError(xmlGenericErrorContext,
7370 "malloc of %d byte failed\n", size);
7371 return(NULL);
7372 }
7373
7374 buf[len++] = cur;
7375 NEXT;
7376 cur = CUR;
7377 while (((cur >= 'a') && (cur <= 'z')) ||
7378 ((cur >= 'A') && (cur <= 'Z')) ||
7379 ((cur >= '0') && (cur <= '9')) ||
7380 (cur == '.') || (cur == '_') ||
7381 (cur == '-')) {
7382 if (len + 1 >= size) {
7383 size *= 2;
7384 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7385 if (buf == NULL) {
7386 xmlGenericError(xmlGenericErrorContext,
7387 "realloc of %d byte failed\n", size);
7388 return(NULL);
7389 }
7390 }
7391 buf[len++] = cur;
7392 NEXT;
7393 cur = CUR;
7394 if (cur == 0) {
7395 SHRINK;
7396 GROW;
7397 cur = CUR;
7398 }
7399 }
7400 buf[len] = 0;
7401 } else {
7402 ctxt->errNo = XML_ERR_ENCODING_NAME;
7403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7404 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7405 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007406 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007407 }
7408 return(buf);
7409}
7410
7411/**
7412 * xmlParseEncodingDecl:
7413 * @ctxt: an XML parser context
7414 *
7415 * parse the XML encoding declaration
7416 *
7417 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7418 *
7419 * this setups the conversion filters.
7420 *
7421 * Returns the encoding value or NULL
7422 */
7423
7424xmlChar *
7425xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7426 xmlChar *encoding = NULL;
7427 const xmlChar *q;
7428
7429 SKIP_BLANKS;
7430 if ((RAW == 'e') && (NXT(1) == 'n') &&
7431 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7432 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7433 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7434 SKIP(8);
7435 SKIP_BLANKS;
7436 if (RAW != '=') {
7437 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7438 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7439 ctxt->sax->error(ctxt->userData,
7440 "xmlParseEncodingDecl : expected '='\n");
7441 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007442 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007443 return(NULL);
7444 }
7445 NEXT;
7446 SKIP_BLANKS;
7447 if (RAW == '"') {
7448 NEXT;
7449 q = CUR_PTR;
7450 encoding = xmlParseEncName(ctxt);
7451 if (RAW != '"') {
7452 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7454 ctxt->sax->error(ctxt->userData,
7455 "String not closed\n%.50s\n", q);
7456 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007457 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007458 } else
7459 NEXT;
7460 } else if (RAW == '\''){
7461 NEXT;
7462 q = CUR_PTR;
7463 encoding = xmlParseEncName(ctxt);
7464 if (RAW != '\'') {
7465 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7467 ctxt->sax->error(ctxt->userData,
7468 "String not closed\n%.50s\n", q);
7469 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007470 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007471 } else
7472 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007473 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007474 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7476 ctxt->sax->error(ctxt->userData,
7477 "xmlParseEncodingDecl : expected ' or \"\n");
7478 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007479 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007480 }
7481 if (encoding != NULL) {
7482 xmlCharEncoding enc;
7483 xmlCharEncodingHandlerPtr handler;
7484
7485 if (ctxt->input->encoding != NULL)
7486 xmlFree((xmlChar *) ctxt->input->encoding);
7487 ctxt->input->encoding = encoding;
7488
7489 enc = xmlParseCharEncoding((const char *) encoding);
7490 /*
7491 * registered set of known encodings
7492 */
7493 if (enc != XML_CHAR_ENCODING_ERROR) {
7494 xmlSwitchEncoding(ctxt, enc);
7495 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007496 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007497 xmlFree(encoding);
7498 return(NULL);
7499 }
7500 } else {
7501 /*
7502 * fallback for unknown encodings
7503 */
7504 handler = xmlFindCharEncodingHandler((const char *) encoding);
7505 if (handler != NULL) {
7506 xmlSwitchToEncoding(ctxt, handler);
7507 } else {
7508 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7510 ctxt->sax->error(ctxt->userData,
7511 "Unsupported encoding %s\n", encoding);
7512 return(NULL);
7513 }
7514 }
7515 }
7516 }
7517 return(encoding);
7518}
7519
7520/**
7521 * xmlParseSDDecl:
7522 * @ctxt: an XML parser context
7523 *
7524 * parse the XML standalone declaration
7525 *
7526 * [32] SDDecl ::= S 'standalone' Eq
7527 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7528 *
7529 * [ VC: Standalone Document Declaration ]
7530 * TODO The standalone document declaration must have the value "no"
7531 * if any external markup declarations contain declarations of:
7532 * - attributes with default values, if elements to which these
7533 * attributes apply appear in the document without specifications
7534 * of values for these attributes, or
7535 * - entities (other than amp, lt, gt, apos, quot), if references
7536 * to those entities appear in the document, or
7537 * - attributes with values subject to normalization, where the
7538 * attribute appears in the document with a value which will change
7539 * as a result of normalization, or
7540 * - element types with element content, if white space occurs directly
7541 * within any instance of those types.
7542 *
7543 * Returns 1 if standalone, 0 otherwise
7544 */
7545
7546int
7547xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7548 int standalone = -1;
7549
7550 SKIP_BLANKS;
7551 if ((RAW == 's') && (NXT(1) == 't') &&
7552 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7553 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7554 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7555 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7556 SKIP(10);
7557 SKIP_BLANKS;
7558 if (RAW != '=') {
7559 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7561 ctxt->sax->error(ctxt->userData,
7562 "XML standalone declaration : expected '='\n");
7563 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007564 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007565 return(standalone);
7566 }
7567 NEXT;
7568 SKIP_BLANKS;
7569 if (RAW == '\''){
7570 NEXT;
7571 if ((RAW == 'n') && (NXT(1) == 'o')) {
7572 standalone = 0;
7573 SKIP(2);
7574 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7575 (NXT(2) == 's')) {
7576 standalone = 1;
7577 SKIP(3);
7578 } else {
7579 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7581 ctxt->sax->error(ctxt->userData,
7582 "standalone accepts only 'yes' or 'no'\n");
7583 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007584 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007585 }
7586 if (RAW != '\'') {
7587 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7589 ctxt->sax->error(ctxt->userData, "String not closed\n");
7590 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007591 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007592 } else
7593 NEXT;
7594 } else if (RAW == '"'){
7595 NEXT;
7596 if ((RAW == 'n') && (NXT(1) == 'o')) {
7597 standalone = 0;
7598 SKIP(2);
7599 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7600 (NXT(2) == 's')) {
7601 standalone = 1;
7602 SKIP(3);
7603 } else {
7604 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7606 ctxt->sax->error(ctxt->userData,
7607 "standalone accepts only 'yes' or 'no'\n");
7608 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007609 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007610 }
7611 if (RAW != '"') {
7612 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7614 ctxt->sax->error(ctxt->userData, "String not closed\n");
7615 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007616 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007617 } else
7618 NEXT;
7619 } else {
7620 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7622 ctxt->sax->error(ctxt->userData,
7623 "Standalone value not found\n");
7624 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007625 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007626 }
7627 }
7628 return(standalone);
7629}
7630
7631/**
7632 * xmlParseXMLDecl:
7633 * @ctxt: an XML parser context
7634 *
7635 * parse an XML declaration header
7636 *
7637 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7638 */
7639
7640void
7641xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7642 xmlChar *version;
7643
7644 /*
7645 * We know that '<?xml' is here.
7646 */
7647 SKIP(5);
7648
7649 if (!IS_BLANK(RAW)) {
7650 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7652 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7653 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007654 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007655 }
7656 SKIP_BLANKS;
7657
7658 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007659 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007660 */
7661 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007662 if (version == NULL) {
7663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7664 ctxt->sax->error(ctxt->userData,
7665 "Malformed declaration expecting version\n");
7666 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007667 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007668 } else {
7669 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7670 /*
7671 * TODO: Blueberry should be detected here
7672 */
7673 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7674 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7675 version);
7676 }
7677 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007678 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007679 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007680 }
Owen Taylor3473f882001-02-23 17:55:21 +00007681
7682 /*
7683 * We may have the encoding declaration
7684 */
7685 if (!IS_BLANK(RAW)) {
7686 if ((RAW == '?') && (NXT(1) == '>')) {
7687 SKIP(2);
7688 return;
7689 }
7690 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7692 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7693 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007694 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007695 }
7696 xmlParseEncodingDecl(ctxt);
7697 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7698 /*
7699 * The XML REC instructs us to stop parsing right here
7700 */
7701 return;
7702 }
7703
7704 /*
7705 * We may have the standalone status.
7706 */
7707 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7708 if ((RAW == '?') && (NXT(1) == '>')) {
7709 SKIP(2);
7710 return;
7711 }
7712 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7714 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7715 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007716 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007717 }
7718 SKIP_BLANKS;
7719 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7720
7721 SKIP_BLANKS;
7722 if ((RAW == '?') && (NXT(1) == '>')) {
7723 SKIP(2);
7724 } else if (RAW == '>') {
7725 /* Deprecated old WD ... */
7726 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7728 ctxt->sax->error(ctxt->userData,
7729 "XML declaration must end-up with '?>'\n");
7730 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007731 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007732 NEXT;
7733 } else {
7734 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7736 ctxt->sax->error(ctxt->userData,
7737 "parsing XML declaration: '?>' expected\n");
7738 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007739 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007740 MOVETO_ENDTAG(CUR_PTR);
7741 NEXT;
7742 }
7743}
7744
7745/**
7746 * xmlParseMisc:
7747 * @ctxt: an XML parser context
7748 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007749 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007750 *
7751 * [27] Misc ::= Comment | PI | S
7752 */
7753
7754void
7755xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007756 while (((RAW == '<') && (NXT(1) == '?')) ||
7757 ((RAW == '<') && (NXT(1) == '!') &&
7758 (NXT(2) == '-') && (NXT(3) == '-')) ||
7759 IS_BLANK(CUR)) {
7760 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007761 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007762 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007763 NEXT;
7764 } else
7765 xmlParseComment(ctxt);
7766 }
7767}
7768
7769/**
7770 * xmlParseDocument:
7771 * @ctxt: an XML parser context
7772 *
7773 * parse an XML document (and build a tree if using the standard SAX
7774 * interface).
7775 *
7776 * [1] document ::= prolog element Misc*
7777 *
7778 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7779 *
7780 * Returns 0, -1 in case of error. the parser context is augmented
7781 * as a result of the parsing.
7782 */
7783
7784int
7785xmlParseDocument(xmlParserCtxtPtr ctxt) {
7786 xmlChar start[4];
7787 xmlCharEncoding enc;
7788
7789 xmlInitParser();
7790
7791 GROW;
7792
7793 /*
7794 * SAX: beginning of the document processing.
7795 */
7796 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7797 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7798
Daniel Veillard50f34372001-08-03 12:06:36 +00007799 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007800 /*
7801 * Get the 4 first bytes and decode the charset
7802 * if enc != XML_CHAR_ENCODING_NONE
7803 * plug some encoding conversion routines.
7804 */
7805 start[0] = RAW;
7806 start[1] = NXT(1);
7807 start[2] = NXT(2);
7808 start[3] = NXT(3);
7809 enc = xmlDetectCharEncoding(start, 4);
7810 if (enc != XML_CHAR_ENCODING_NONE) {
7811 xmlSwitchEncoding(ctxt, enc);
7812 }
Owen Taylor3473f882001-02-23 17:55:21 +00007813 }
7814
7815
7816 if (CUR == 0) {
7817 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7819 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7820 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007821 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007822 }
7823
7824 /*
7825 * Check for the XMLDecl in the Prolog.
7826 */
7827 GROW;
7828 if ((RAW == '<') && (NXT(1) == '?') &&
7829 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7830 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7831
7832 /*
7833 * Note that we will switch encoding on the fly.
7834 */
7835 xmlParseXMLDecl(ctxt);
7836 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7837 /*
7838 * The XML REC instructs us to stop parsing right here
7839 */
7840 return(-1);
7841 }
7842 ctxt->standalone = ctxt->input->standalone;
7843 SKIP_BLANKS;
7844 } else {
7845 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7846 }
7847 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7848 ctxt->sax->startDocument(ctxt->userData);
7849
7850 /*
7851 * The Misc part of the Prolog
7852 */
7853 GROW;
7854 xmlParseMisc(ctxt);
7855
7856 /*
7857 * Then possibly doc type declaration(s) and more Misc
7858 * (doctypedecl Misc*)?
7859 */
7860 GROW;
7861 if ((RAW == '<') && (NXT(1) == '!') &&
7862 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7863 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7864 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7865 (NXT(8) == 'E')) {
7866
7867 ctxt->inSubset = 1;
7868 xmlParseDocTypeDecl(ctxt);
7869 if (RAW == '[') {
7870 ctxt->instate = XML_PARSER_DTD;
7871 xmlParseInternalSubset(ctxt);
7872 }
7873
7874 /*
7875 * Create and update the external subset.
7876 */
7877 ctxt->inSubset = 2;
7878 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7879 (!ctxt->disableSAX))
7880 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7881 ctxt->extSubSystem, ctxt->extSubURI);
7882 ctxt->inSubset = 0;
7883
7884
7885 ctxt->instate = XML_PARSER_PROLOG;
7886 xmlParseMisc(ctxt);
7887 }
7888
7889 /*
7890 * Time to start parsing the tree itself
7891 */
7892 GROW;
7893 if (RAW != '<') {
7894 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7896 ctxt->sax->error(ctxt->userData,
7897 "Start tag expected, '<' not found\n");
7898 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007899 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007900 ctxt->instate = XML_PARSER_EOF;
7901 } else {
7902 ctxt->instate = XML_PARSER_CONTENT;
7903 xmlParseElement(ctxt);
7904 ctxt->instate = XML_PARSER_EPILOG;
7905
7906
7907 /*
7908 * The Misc part at the end
7909 */
7910 xmlParseMisc(ctxt);
7911
Daniel Veillard561b7f82002-03-20 21:55:57 +00007912 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007913 ctxt->errNo = XML_ERR_DOCUMENT_END;
7914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7915 ctxt->sax->error(ctxt->userData,
7916 "Extra content at the end of the document\n");
7917 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007918 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007919 }
7920 ctxt->instate = XML_PARSER_EOF;
7921 }
7922
7923 /*
7924 * SAX: end of the document processing.
7925 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007926 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007927 ctxt->sax->endDocument(ctxt->userData);
7928
Daniel Veillard5997aca2002-03-18 18:36:20 +00007929 /*
7930 * Remove locally kept entity definitions if the tree was not built
7931 */
7932 if ((ctxt->myDoc != NULL) &&
7933 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7934 xmlFreeDoc(ctxt->myDoc);
7935 ctxt->myDoc = NULL;
7936 }
7937
Daniel Veillardc7612992002-02-17 22:47:37 +00007938 if (! ctxt->wellFormed) {
7939 ctxt->valid = 0;
7940 return(-1);
7941 }
Owen Taylor3473f882001-02-23 17:55:21 +00007942 return(0);
7943}
7944
7945/**
7946 * xmlParseExtParsedEnt:
7947 * @ctxt: an XML parser context
7948 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007949 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007950 * An external general parsed entity is well-formed if it matches the
7951 * production labeled extParsedEnt.
7952 *
7953 * [78] extParsedEnt ::= TextDecl? content
7954 *
7955 * Returns 0, -1 in case of error. the parser context is augmented
7956 * as a result of the parsing.
7957 */
7958
7959int
7960xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7961 xmlChar start[4];
7962 xmlCharEncoding enc;
7963
7964 xmlDefaultSAXHandlerInit();
7965
7966 GROW;
7967
7968 /*
7969 * SAX: beginning of the document processing.
7970 */
7971 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7972 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7973
7974 /*
7975 * Get the 4 first bytes and decode the charset
7976 * if enc != XML_CHAR_ENCODING_NONE
7977 * plug some encoding conversion routines.
7978 */
7979 start[0] = RAW;
7980 start[1] = NXT(1);
7981 start[2] = NXT(2);
7982 start[3] = NXT(3);
7983 enc = xmlDetectCharEncoding(start, 4);
7984 if (enc != XML_CHAR_ENCODING_NONE) {
7985 xmlSwitchEncoding(ctxt, enc);
7986 }
7987
7988
7989 if (CUR == 0) {
7990 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7992 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7993 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007994 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007995 }
7996
7997 /*
7998 * Check for the XMLDecl in the Prolog.
7999 */
8000 GROW;
8001 if ((RAW == '<') && (NXT(1) == '?') &&
8002 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8003 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8004
8005 /*
8006 * Note that we will switch encoding on the fly.
8007 */
8008 xmlParseXMLDecl(ctxt);
8009 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8010 /*
8011 * The XML REC instructs us to stop parsing right here
8012 */
8013 return(-1);
8014 }
8015 SKIP_BLANKS;
8016 } else {
8017 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8018 }
8019 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8020 ctxt->sax->startDocument(ctxt->userData);
8021
8022 /*
8023 * Doing validity checking on chunk doesn't make sense
8024 */
8025 ctxt->instate = XML_PARSER_CONTENT;
8026 ctxt->validate = 0;
8027 ctxt->loadsubset = 0;
8028 ctxt->depth = 0;
8029
8030 xmlParseContent(ctxt);
8031
8032 if ((RAW == '<') && (NXT(1) == '/')) {
8033 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8035 ctxt->sax->error(ctxt->userData,
8036 "chunk is not well balanced\n");
8037 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008038 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008039 } else if (RAW != 0) {
8040 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8042 ctxt->sax->error(ctxt->userData,
8043 "extra content at the end of well balanced chunk\n");
8044 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008045 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008046 }
8047
8048 /*
8049 * SAX: end of the document processing.
8050 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008051 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008052 ctxt->sax->endDocument(ctxt->userData);
8053
8054 if (! ctxt->wellFormed) return(-1);
8055 return(0);
8056}
8057
8058/************************************************************************
8059 * *
8060 * Progressive parsing interfaces *
8061 * *
8062 ************************************************************************/
8063
8064/**
8065 * xmlParseLookupSequence:
8066 * @ctxt: an XML parser context
8067 * @first: the first char to lookup
8068 * @next: the next char to lookup or zero
8069 * @third: the next char to lookup or zero
8070 *
8071 * Try to find if a sequence (first, next, third) or just (first next) or
8072 * (first) is available in the input stream.
8073 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8074 * to avoid rescanning sequences of bytes, it DOES change the state of the
8075 * parser, do not use liberally.
8076 *
8077 * Returns the index to the current parsing point if the full sequence
8078 * is available, -1 otherwise.
8079 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008080static int
Owen Taylor3473f882001-02-23 17:55:21 +00008081xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8082 xmlChar next, xmlChar third) {
8083 int base, len;
8084 xmlParserInputPtr in;
8085 const xmlChar *buf;
8086
8087 in = ctxt->input;
8088 if (in == NULL) return(-1);
8089 base = in->cur - in->base;
8090 if (base < 0) return(-1);
8091 if (ctxt->checkIndex > base)
8092 base = ctxt->checkIndex;
8093 if (in->buf == NULL) {
8094 buf = in->base;
8095 len = in->length;
8096 } else {
8097 buf = in->buf->buffer->content;
8098 len = in->buf->buffer->use;
8099 }
8100 /* take into account the sequence length */
8101 if (third) len -= 2;
8102 else if (next) len --;
8103 for (;base < len;base++) {
8104 if (buf[base] == first) {
8105 if (third != 0) {
8106 if ((buf[base + 1] != next) ||
8107 (buf[base + 2] != third)) continue;
8108 } else if (next != 0) {
8109 if (buf[base + 1] != next) continue;
8110 }
8111 ctxt->checkIndex = 0;
8112#ifdef DEBUG_PUSH
8113 if (next == 0)
8114 xmlGenericError(xmlGenericErrorContext,
8115 "PP: lookup '%c' found at %d\n",
8116 first, base);
8117 else if (third == 0)
8118 xmlGenericError(xmlGenericErrorContext,
8119 "PP: lookup '%c%c' found at %d\n",
8120 first, next, base);
8121 else
8122 xmlGenericError(xmlGenericErrorContext,
8123 "PP: lookup '%c%c%c' found at %d\n",
8124 first, next, third, base);
8125#endif
8126 return(base - (in->cur - in->base));
8127 }
8128 }
8129 ctxt->checkIndex = base;
8130#ifdef DEBUG_PUSH
8131 if (next == 0)
8132 xmlGenericError(xmlGenericErrorContext,
8133 "PP: lookup '%c' failed\n", first);
8134 else if (third == 0)
8135 xmlGenericError(xmlGenericErrorContext,
8136 "PP: lookup '%c%c' failed\n", first, next);
8137 else
8138 xmlGenericError(xmlGenericErrorContext,
8139 "PP: lookup '%c%c%c' failed\n", first, next, third);
8140#endif
8141 return(-1);
8142}
8143
8144/**
8145 * xmlParseTryOrFinish:
8146 * @ctxt: an XML parser context
8147 * @terminate: last chunk indicator
8148 *
8149 * Try to progress on parsing
8150 *
8151 * Returns zero if no parsing was possible
8152 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008153static int
Owen Taylor3473f882001-02-23 17:55:21 +00008154xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8155 int ret = 0;
8156 int avail;
8157 xmlChar cur, next;
8158
8159#ifdef DEBUG_PUSH
8160 switch (ctxt->instate) {
8161 case XML_PARSER_EOF:
8162 xmlGenericError(xmlGenericErrorContext,
8163 "PP: try EOF\n"); break;
8164 case XML_PARSER_START:
8165 xmlGenericError(xmlGenericErrorContext,
8166 "PP: try START\n"); break;
8167 case XML_PARSER_MISC:
8168 xmlGenericError(xmlGenericErrorContext,
8169 "PP: try MISC\n");break;
8170 case XML_PARSER_COMMENT:
8171 xmlGenericError(xmlGenericErrorContext,
8172 "PP: try COMMENT\n");break;
8173 case XML_PARSER_PROLOG:
8174 xmlGenericError(xmlGenericErrorContext,
8175 "PP: try PROLOG\n");break;
8176 case XML_PARSER_START_TAG:
8177 xmlGenericError(xmlGenericErrorContext,
8178 "PP: try START_TAG\n");break;
8179 case XML_PARSER_CONTENT:
8180 xmlGenericError(xmlGenericErrorContext,
8181 "PP: try CONTENT\n");break;
8182 case XML_PARSER_CDATA_SECTION:
8183 xmlGenericError(xmlGenericErrorContext,
8184 "PP: try CDATA_SECTION\n");break;
8185 case XML_PARSER_END_TAG:
8186 xmlGenericError(xmlGenericErrorContext,
8187 "PP: try END_TAG\n");break;
8188 case XML_PARSER_ENTITY_DECL:
8189 xmlGenericError(xmlGenericErrorContext,
8190 "PP: try ENTITY_DECL\n");break;
8191 case XML_PARSER_ENTITY_VALUE:
8192 xmlGenericError(xmlGenericErrorContext,
8193 "PP: try ENTITY_VALUE\n");break;
8194 case XML_PARSER_ATTRIBUTE_VALUE:
8195 xmlGenericError(xmlGenericErrorContext,
8196 "PP: try ATTRIBUTE_VALUE\n");break;
8197 case XML_PARSER_DTD:
8198 xmlGenericError(xmlGenericErrorContext,
8199 "PP: try DTD\n");break;
8200 case XML_PARSER_EPILOG:
8201 xmlGenericError(xmlGenericErrorContext,
8202 "PP: try EPILOG\n");break;
8203 case XML_PARSER_PI:
8204 xmlGenericError(xmlGenericErrorContext,
8205 "PP: try PI\n");break;
8206 case XML_PARSER_IGNORE:
8207 xmlGenericError(xmlGenericErrorContext,
8208 "PP: try IGNORE\n");break;
8209 }
8210#endif
8211
8212 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008213 SHRINK;
8214
Owen Taylor3473f882001-02-23 17:55:21 +00008215 /*
8216 * Pop-up of finished entities.
8217 */
8218 while ((RAW == 0) && (ctxt->inputNr > 1))
8219 xmlPopInput(ctxt);
8220
8221 if (ctxt->input ==NULL) break;
8222 if (ctxt->input->buf == NULL)
8223 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008224 else {
8225 /*
8226 * If we are operating on converted input, try to flush
8227 * remainng chars to avoid them stalling in the non-converted
8228 * buffer.
8229 */
8230 if ((ctxt->input->buf->raw != NULL) &&
8231 (ctxt->input->buf->raw->use > 0)) {
8232 int base = ctxt->input->base -
8233 ctxt->input->buf->buffer->content;
8234 int current = ctxt->input->cur - ctxt->input->base;
8235
8236 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8237 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8238 ctxt->input->cur = ctxt->input->base + current;
8239 ctxt->input->end =
8240 &ctxt->input->buf->buffer->content[
8241 ctxt->input->buf->buffer->use];
8242 }
8243 avail = ctxt->input->buf->buffer->use -
8244 (ctxt->input->cur - ctxt->input->base);
8245 }
Owen Taylor3473f882001-02-23 17:55:21 +00008246 if (avail < 1)
8247 goto done;
8248 switch (ctxt->instate) {
8249 case XML_PARSER_EOF:
8250 /*
8251 * Document parsing is done !
8252 */
8253 goto done;
8254 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008255 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8256 xmlChar start[4];
8257 xmlCharEncoding enc;
8258
8259 /*
8260 * Very first chars read from the document flow.
8261 */
8262 if (avail < 4)
8263 goto done;
8264
8265 /*
8266 * Get the 4 first bytes and decode the charset
8267 * if enc != XML_CHAR_ENCODING_NONE
8268 * plug some encoding conversion routines.
8269 */
8270 start[0] = RAW;
8271 start[1] = NXT(1);
8272 start[2] = NXT(2);
8273 start[3] = NXT(3);
8274 enc = xmlDetectCharEncoding(start, 4);
8275 if (enc != XML_CHAR_ENCODING_NONE) {
8276 xmlSwitchEncoding(ctxt, enc);
8277 }
8278 break;
8279 }
Owen Taylor3473f882001-02-23 17:55:21 +00008280
8281 cur = ctxt->input->cur[0];
8282 next = ctxt->input->cur[1];
8283 if (cur == 0) {
8284 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8285 ctxt->sax->setDocumentLocator(ctxt->userData,
8286 &xmlDefaultSAXLocator);
8287 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8288 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8289 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8290 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008291 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008292 ctxt->instate = XML_PARSER_EOF;
8293#ifdef DEBUG_PUSH
8294 xmlGenericError(xmlGenericErrorContext,
8295 "PP: entering EOF\n");
8296#endif
8297 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8298 ctxt->sax->endDocument(ctxt->userData);
8299 goto done;
8300 }
8301 if ((cur == '<') && (next == '?')) {
8302 /* PI or XML decl */
8303 if (avail < 5) return(ret);
8304 if ((!terminate) &&
8305 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8306 return(ret);
8307 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8308 ctxt->sax->setDocumentLocator(ctxt->userData,
8309 &xmlDefaultSAXLocator);
8310 if ((ctxt->input->cur[2] == 'x') &&
8311 (ctxt->input->cur[3] == 'm') &&
8312 (ctxt->input->cur[4] == 'l') &&
8313 (IS_BLANK(ctxt->input->cur[5]))) {
8314 ret += 5;
8315#ifdef DEBUG_PUSH
8316 xmlGenericError(xmlGenericErrorContext,
8317 "PP: Parsing XML Decl\n");
8318#endif
8319 xmlParseXMLDecl(ctxt);
8320 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8321 /*
8322 * The XML REC instructs us to stop parsing right
8323 * here
8324 */
8325 ctxt->instate = XML_PARSER_EOF;
8326 return(0);
8327 }
8328 ctxt->standalone = ctxt->input->standalone;
8329 if ((ctxt->encoding == NULL) &&
8330 (ctxt->input->encoding != NULL))
8331 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8332 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8333 (!ctxt->disableSAX))
8334 ctxt->sax->startDocument(ctxt->userData);
8335 ctxt->instate = XML_PARSER_MISC;
8336#ifdef DEBUG_PUSH
8337 xmlGenericError(xmlGenericErrorContext,
8338 "PP: entering MISC\n");
8339#endif
8340 } else {
8341 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8342 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8343 (!ctxt->disableSAX))
8344 ctxt->sax->startDocument(ctxt->userData);
8345 ctxt->instate = XML_PARSER_MISC;
8346#ifdef DEBUG_PUSH
8347 xmlGenericError(xmlGenericErrorContext,
8348 "PP: entering MISC\n");
8349#endif
8350 }
8351 } else {
8352 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8353 ctxt->sax->setDocumentLocator(ctxt->userData,
8354 &xmlDefaultSAXLocator);
8355 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8356 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8357 (!ctxt->disableSAX))
8358 ctxt->sax->startDocument(ctxt->userData);
8359 ctxt->instate = XML_PARSER_MISC;
8360#ifdef DEBUG_PUSH
8361 xmlGenericError(xmlGenericErrorContext,
8362 "PP: entering MISC\n");
8363#endif
8364 }
8365 break;
8366 case XML_PARSER_MISC:
8367 SKIP_BLANKS;
8368 if (ctxt->input->buf == NULL)
8369 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8370 else
8371 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8372 if (avail < 2)
8373 goto done;
8374 cur = ctxt->input->cur[0];
8375 next = ctxt->input->cur[1];
8376 if ((cur == '<') && (next == '?')) {
8377 if ((!terminate) &&
8378 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8379 goto done;
8380#ifdef DEBUG_PUSH
8381 xmlGenericError(xmlGenericErrorContext,
8382 "PP: Parsing PI\n");
8383#endif
8384 xmlParsePI(ctxt);
8385 } else if ((cur == '<') && (next == '!') &&
8386 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8387 if ((!terminate) &&
8388 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8389 goto done;
8390#ifdef DEBUG_PUSH
8391 xmlGenericError(xmlGenericErrorContext,
8392 "PP: Parsing Comment\n");
8393#endif
8394 xmlParseComment(ctxt);
8395 ctxt->instate = XML_PARSER_MISC;
8396 } else if ((cur == '<') && (next == '!') &&
8397 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8398 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8399 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8400 (ctxt->input->cur[8] == 'E')) {
8401 if ((!terminate) &&
8402 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8403 goto done;
8404#ifdef DEBUG_PUSH
8405 xmlGenericError(xmlGenericErrorContext,
8406 "PP: Parsing internal subset\n");
8407#endif
8408 ctxt->inSubset = 1;
8409 xmlParseDocTypeDecl(ctxt);
8410 if (RAW == '[') {
8411 ctxt->instate = XML_PARSER_DTD;
8412#ifdef DEBUG_PUSH
8413 xmlGenericError(xmlGenericErrorContext,
8414 "PP: entering DTD\n");
8415#endif
8416 } else {
8417 /*
8418 * Create and update the external subset.
8419 */
8420 ctxt->inSubset = 2;
8421 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8422 (ctxt->sax->externalSubset != NULL))
8423 ctxt->sax->externalSubset(ctxt->userData,
8424 ctxt->intSubName, ctxt->extSubSystem,
8425 ctxt->extSubURI);
8426 ctxt->inSubset = 0;
8427 ctxt->instate = XML_PARSER_PROLOG;
8428#ifdef DEBUG_PUSH
8429 xmlGenericError(xmlGenericErrorContext,
8430 "PP: entering PROLOG\n");
8431#endif
8432 }
8433 } else if ((cur == '<') && (next == '!') &&
8434 (avail < 9)) {
8435 goto done;
8436 } else {
8437 ctxt->instate = XML_PARSER_START_TAG;
8438#ifdef DEBUG_PUSH
8439 xmlGenericError(xmlGenericErrorContext,
8440 "PP: entering START_TAG\n");
8441#endif
8442 }
8443 break;
8444 case XML_PARSER_IGNORE:
8445 xmlGenericError(xmlGenericErrorContext,
8446 "PP: internal error, state == IGNORE");
8447 ctxt->instate = XML_PARSER_DTD;
8448#ifdef DEBUG_PUSH
8449 xmlGenericError(xmlGenericErrorContext,
8450 "PP: entering DTD\n");
8451#endif
8452 break;
8453 case XML_PARSER_PROLOG:
8454 SKIP_BLANKS;
8455 if (ctxt->input->buf == NULL)
8456 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8457 else
8458 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8459 if (avail < 2)
8460 goto done;
8461 cur = ctxt->input->cur[0];
8462 next = ctxt->input->cur[1];
8463 if ((cur == '<') && (next == '?')) {
8464 if ((!terminate) &&
8465 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8466 goto done;
8467#ifdef DEBUG_PUSH
8468 xmlGenericError(xmlGenericErrorContext,
8469 "PP: Parsing PI\n");
8470#endif
8471 xmlParsePI(ctxt);
8472 } else if ((cur == '<') && (next == '!') &&
8473 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8474 if ((!terminate) &&
8475 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8476 goto done;
8477#ifdef DEBUG_PUSH
8478 xmlGenericError(xmlGenericErrorContext,
8479 "PP: Parsing Comment\n");
8480#endif
8481 xmlParseComment(ctxt);
8482 ctxt->instate = XML_PARSER_PROLOG;
8483 } else if ((cur == '<') && (next == '!') &&
8484 (avail < 4)) {
8485 goto done;
8486 } else {
8487 ctxt->instate = XML_PARSER_START_TAG;
8488#ifdef DEBUG_PUSH
8489 xmlGenericError(xmlGenericErrorContext,
8490 "PP: entering START_TAG\n");
8491#endif
8492 }
8493 break;
8494 case XML_PARSER_EPILOG:
8495 SKIP_BLANKS;
8496 if (ctxt->input->buf == NULL)
8497 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8498 else
8499 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8500 if (avail < 2)
8501 goto done;
8502 cur = ctxt->input->cur[0];
8503 next = ctxt->input->cur[1];
8504 if ((cur == '<') && (next == '?')) {
8505 if ((!terminate) &&
8506 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8507 goto done;
8508#ifdef DEBUG_PUSH
8509 xmlGenericError(xmlGenericErrorContext,
8510 "PP: Parsing PI\n");
8511#endif
8512 xmlParsePI(ctxt);
8513 ctxt->instate = XML_PARSER_EPILOG;
8514 } else if ((cur == '<') && (next == '!') &&
8515 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8516 if ((!terminate) &&
8517 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8518 goto done;
8519#ifdef DEBUG_PUSH
8520 xmlGenericError(xmlGenericErrorContext,
8521 "PP: Parsing Comment\n");
8522#endif
8523 xmlParseComment(ctxt);
8524 ctxt->instate = XML_PARSER_EPILOG;
8525 } else if ((cur == '<') && (next == '!') &&
8526 (avail < 4)) {
8527 goto done;
8528 } else {
8529 ctxt->errNo = XML_ERR_DOCUMENT_END;
8530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8531 ctxt->sax->error(ctxt->userData,
8532 "Extra content at the end of the document\n");
8533 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008534 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008535 ctxt->instate = XML_PARSER_EOF;
8536#ifdef DEBUG_PUSH
8537 xmlGenericError(xmlGenericErrorContext,
8538 "PP: entering EOF\n");
8539#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008540 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008541 ctxt->sax->endDocument(ctxt->userData);
8542 goto done;
8543 }
8544 break;
8545 case XML_PARSER_START_TAG: {
8546 xmlChar *name, *oldname;
8547
8548 if ((avail < 2) && (ctxt->inputNr == 1))
8549 goto done;
8550 cur = ctxt->input->cur[0];
8551 if (cur != '<') {
8552 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8554 ctxt->sax->error(ctxt->userData,
8555 "Start tag expect, '<' not found\n");
8556 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008557 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008558 ctxt->instate = XML_PARSER_EOF;
8559#ifdef DEBUG_PUSH
8560 xmlGenericError(xmlGenericErrorContext,
8561 "PP: entering EOF\n");
8562#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008563 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008564 ctxt->sax->endDocument(ctxt->userData);
8565 goto done;
8566 }
8567 if ((!terminate) &&
8568 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8569 goto done;
8570 if (ctxt->spaceNr == 0)
8571 spacePush(ctxt, -1);
8572 else
8573 spacePush(ctxt, *ctxt->space);
8574 name = xmlParseStartTag(ctxt);
8575 if (name == NULL) {
8576 spacePop(ctxt);
8577 ctxt->instate = XML_PARSER_EOF;
8578#ifdef DEBUG_PUSH
8579 xmlGenericError(xmlGenericErrorContext,
8580 "PP: entering EOF\n");
8581#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008582 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008583 ctxt->sax->endDocument(ctxt->userData);
8584 goto done;
8585 }
8586 namePush(ctxt, xmlStrdup(name));
8587
8588 /*
8589 * [ VC: Root Element Type ]
8590 * The Name in the document type declaration must match
8591 * the element type of the root element.
8592 */
8593 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8594 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8595 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8596
8597 /*
8598 * Check for an Empty Element.
8599 */
8600 if ((RAW == '/') && (NXT(1) == '>')) {
8601 SKIP(2);
8602 if ((ctxt->sax != NULL) &&
8603 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8604 ctxt->sax->endElement(ctxt->userData, name);
8605 xmlFree(name);
8606 oldname = namePop(ctxt);
8607 spacePop(ctxt);
8608 if (oldname != NULL) {
8609#ifdef DEBUG_STACK
8610 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8611#endif
8612 xmlFree(oldname);
8613 }
8614 if (ctxt->name == NULL) {
8615 ctxt->instate = XML_PARSER_EPILOG;
8616#ifdef DEBUG_PUSH
8617 xmlGenericError(xmlGenericErrorContext,
8618 "PP: entering EPILOG\n");
8619#endif
8620 } else {
8621 ctxt->instate = XML_PARSER_CONTENT;
8622#ifdef DEBUG_PUSH
8623 xmlGenericError(xmlGenericErrorContext,
8624 "PP: entering CONTENT\n");
8625#endif
8626 }
8627 break;
8628 }
8629 if (RAW == '>') {
8630 NEXT;
8631 } else {
8632 ctxt->errNo = XML_ERR_GT_REQUIRED;
8633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8634 ctxt->sax->error(ctxt->userData,
8635 "Couldn't find end of Start Tag %s\n",
8636 name);
8637 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008638 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008639
8640 /*
8641 * end of parsing of this node.
8642 */
8643 nodePop(ctxt);
8644 oldname = namePop(ctxt);
8645 spacePop(ctxt);
8646 if (oldname != NULL) {
8647#ifdef DEBUG_STACK
8648 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8649#endif
8650 xmlFree(oldname);
8651 }
8652 }
8653 xmlFree(name);
8654 ctxt->instate = XML_PARSER_CONTENT;
8655#ifdef DEBUG_PUSH
8656 xmlGenericError(xmlGenericErrorContext,
8657 "PP: entering CONTENT\n");
8658#endif
8659 break;
8660 }
8661 case XML_PARSER_CONTENT: {
8662 const xmlChar *test;
8663 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008664 if ((avail < 2) && (ctxt->inputNr == 1))
8665 goto done;
8666 cur = ctxt->input->cur[0];
8667 next = ctxt->input->cur[1];
8668
8669 test = CUR_PTR;
8670 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008671 if ((cur == '<') && (next == '?')) {
8672 if ((!terminate) &&
8673 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8674 goto done;
8675#ifdef DEBUG_PUSH
8676 xmlGenericError(xmlGenericErrorContext,
8677 "PP: Parsing PI\n");
8678#endif
8679 xmlParsePI(ctxt);
8680 } else if ((cur == '<') && (next == '!') &&
8681 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8682 if ((!terminate) &&
8683 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8684 goto done;
8685#ifdef DEBUG_PUSH
8686 xmlGenericError(xmlGenericErrorContext,
8687 "PP: Parsing Comment\n");
8688#endif
8689 xmlParseComment(ctxt);
8690 ctxt->instate = XML_PARSER_CONTENT;
8691 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8692 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8693 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8694 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8695 (ctxt->input->cur[8] == '[')) {
8696 SKIP(9);
8697 ctxt->instate = XML_PARSER_CDATA_SECTION;
8698#ifdef DEBUG_PUSH
8699 xmlGenericError(xmlGenericErrorContext,
8700 "PP: entering CDATA_SECTION\n");
8701#endif
8702 break;
8703 } else if ((cur == '<') && (next == '!') &&
8704 (avail < 9)) {
8705 goto done;
8706 } else if ((cur == '<') && (next == '/')) {
8707 ctxt->instate = XML_PARSER_END_TAG;
8708#ifdef DEBUG_PUSH
8709 xmlGenericError(xmlGenericErrorContext,
8710 "PP: entering END_TAG\n");
8711#endif
8712 break;
8713 } else if (cur == '<') {
8714 ctxt->instate = XML_PARSER_START_TAG;
8715#ifdef DEBUG_PUSH
8716 xmlGenericError(xmlGenericErrorContext,
8717 "PP: entering START_TAG\n");
8718#endif
8719 break;
8720 } else if (cur == '&') {
8721 if ((!terminate) &&
8722 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8723 goto done;
8724#ifdef DEBUG_PUSH
8725 xmlGenericError(xmlGenericErrorContext,
8726 "PP: Parsing Reference\n");
8727#endif
8728 xmlParseReference(ctxt);
8729 } else {
8730 /* TODO Avoid the extra copy, handle directly !!! */
8731 /*
8732 * Goal of the following test is:
8733 * - minimize calls to the SAX 'character' callback
8734 * when they are mergeable
8735 * - handle an problem for isBlank when we only parse
8736 * a sequence of blank chars and the next one is
8737 * not available to check against '<' presence.
8738 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008739 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008740 * of the parser.
8741 */
8742 if ((ctxt->inputNr == 1) &&
8743 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8744 if ((!terminate) &&
8745 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8746 goto done;
8747 }
8748 ctxt->checkIndex = 0;
8749#ifdef DEBUG_PUSH
8750 xmlGenericError(xmlGenericErrorContext,
8751 "PP: Parsing char data\n");
8752#endif
8753 xmlParseCharData(ctxt, 0);
8754 }
8755 /*
8756 * Pop-up of finished entities.
8757 */
8758 while ((RAW == 0) && (ctxt->inputNr > 1))
8759 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008760 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008761 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8763 ctxt->sax->error(ctxt->userData,
8764 "detected an error in element content\n");
8765 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008766 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008767 ctxt->instate = XML_PARSER_EOF;
8768 break;
8769 }
8770 break;
8771 }
8772 case XML_PARSER_CDATA_SECTION: {
8773 /*
8774 * The Push mode need to have the SAX callback for
8775 * cdataBlock merge back contiguous callbacks.
8776 */
8777 int base;
8778
8779 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8780 if (base < 0) {
8781 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8782 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8783 if (ctxt->sax->cdataBlock != NULL)
8784 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8785 XML_PARSER_BIG_BUFFER_SIZE);
8786 }
8787 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8788 ctxt->checkIndex = 0;
8789 }
8790 goto done;
8791 } else {
8792 if ((ctxt->sax != NULL) && (base > 0) &&
8793 (!ctxt->disableSAX)) {
8794 if (ctxt->sax->cdataBlock != NULL)
8795 ctxt->sax->cdataBlock(ctxt->userData,
8796 ctxt->input->cur, base);
8797 }
8798 SKIP(base + 3);
8799 ctxt->checkIndex = 0;
8800 ctxt->instate = XML_PARSER_CONTENT;
8801#ifdef DEBUG_PUSH
8802 xmlGenericError(xmlGenericErrorContext,
8803 "PP: entering CONTENT\n");
8804#endif
8805 }
8806 break;
8807 }
8808 case XML_PARSER_END_TAG:
8809 if (avail < 2)
8810 goto done;
8811 if ((!terminate) &&
8812 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8813 goto done;
8814 xmlParseEndTag(ctxt);
8815 if (ctxt->name == NULL) {
8816 ctxt->instate = XML_PARSER_EPILOG;
8817#ifdef DEBUG_PUSH
8818 xmlGenericError(xmlGenericErrorContext,
8819 "PP: entering EPILOG\n");
8820#endif
8821 } else {
8822 ctxt->instate = XML_PARSER_CONTENT;
8823#ifdef DEBUG_PUSH
8824 xmlGenericError(xmlGenericErrorContext,
8825 "PP: entering CONTENT\n");
8826#endif
8827 }
8828 break;
8829 case XML_PARSER_DTD: {
8830 /*
8831 * Sorry but progressive parsing of the internal subset
8832 * is not expected to be supported. We first check that
8833 * the full content of the internal subset is available and
8834 * the parsing is launched only at that point.
8835 * Internal subset ends up with "']' S? '>'" in an unescaped
8836 * section and not in a ']]>' sequence which are conditional
8837 * sections (whoever argued to keep that crap in XML deserve
8838 * a place in hell !).
8839 */
8840 int base, i;
8841 xmlChar *buf;
8842 xmlChar quote = 0;
8843
8844 base = ctxt->input->cur - ctxt->input->base;
8845 if (base < 0) return(0);
8846 if (ctxt->checkIndex > base)
8847 base = ctxt->checkIndex;
8848 buf = ctxt->input->buf->buffer->content;
8849 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8850 base++) {
8851 if (quote != 0) {
8852 if (buf[base] == quote)
8853 quote = 0;
8854 continue;
8855 }
8856 if (buf[base] == '"') {
8857 quote = '"';
8858 continue;
8859 }
8860 if (buf[base] == '\'') {
8861 quote = '\'';
8862 continue;
8863 }
8864 if (buf[base] == ']') {
8865 if ((unsigned int) base +1 >=
8866 ctxt->input->buf->buffer->use)
8867 break;
8868 if (buf[base + 1] == ']') {
8869 /* conditional crap, skip both ']' ! */
8870 base++;
8871 continue;
8872 }
8873 for (i = 0;
8874 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8875 i++) {
8876 if (buf[base + i] == '>')
8877 goto found_end_int_subset;
8878 }
8879 break;
8880 }
8881 }
8882 /*
8883 * We didn't found the end of the Internal subset
8884 */
8885 if (quote == 0)
8886 ctxt->checkIndex = base;
8887#ifdef DEBUG_PUSH
8888 if (next == 0)
8889 xmlGenericError(xmlGenericErrorContext,
8890 "PP: lookup of int subset end filed\n");
8891#endif
8892 goto done;
8893
8894found_end_int_subset:
8895 xmlParseInternalSubset(ctxt);
8896 ctxt->inSubset = 2;
8897 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8898 (ctxt->sax->externalSubset != NULL))
8899 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8900 ctxt->extSubSystem, ctxt->extSubURI);
8901 ctxt->inSubset = 0;
8902 ctxt->instate = XML_PARSER_PROLOG;
8903 ctxt->checkIndex = 0;
8904#ifdef DEBUG_PUSH
8905 xmlGenericError(xmlGenericErrorContext,
8906 "PP: entering PROLOG\n");
8907#endif
8908 break;
8909 }
8910 case XML_PARSER_COMMENT:
8911 xmlGenericError(xmlGenericErrorContext,
8912 "PP: internal error, state == COMMENT\n");
8913 ctxt->instate = XML_PARSER_CONTENT;
8914#ifdef DEBUG_PUSH
8915 xmlGenericError(xmlGenericErrorContext,
8916 "PP: entering CONTENT\n");
8917#endif
8918 break;
8919 case XML_PARSER_PI:
8920 xmlGenericError(xmlGenericErrorContext,
8921 "PP: internal error, state == PI\n");
8922 ctxt->instate = XML_PARSER_CONTENT;
8923#ifdef DEBUG_PUSH
8924 xmlGenericError(xmlGenericErrorContext,
8925 "PP: entering CONTENT\n");
8926#endif
8927 break;
8928 case XML_PARSER_ENTITY_DECL:
8929 xmlGenericError(xmlGenericErrorContext,
8930 "PP: internal error, state == ENTITY_DECL\n");
8931 ctxt->instate = XML_PARSER_DTD;
8932#ifdef DEBUG_PUSH
8933 xmlGenericError(xmlGenericErrorContext,
8934 "PP: entering DTD\n");
8935#endif
8936 break;
8937 case XML_PARSER_ENTITY_VALUE:
8938 xmlGenericError(xmlGenericErrorContext,
8939 "PP: internal error, state == ENTITY_VALUE\n");
8940 ctxt->instate = XML_PARSER_CONTENT;
8941#ifdef DEBUG_PUSH
8942 xmlGenericError(xmlGenericErrorContext,
8943 "PP: entering DTD\n");
8944#endif
8945 break;
8946 case XML_PARSER_ATTRIBUTE_VALUE:
8947 xmlGenericError(xmlGenericErrorContext,
8948 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8949 ctxt->instate = XML_PARSER_START_TAG;
8950#ifdef DEBUG_PUSH
8951 xmlGenericError(xmlGenericErrorContext,
8952 "PP: entering START_TAG\n");
8953#endif
8954 break;
8955 case XML_PARSER_SYSTEM_LITERAL:
8956 xmlGenericError(xmlGenericErrorContext,
8957 "PP: internal error, state == SYSTEM_LITERAL\n");
8958 ctxt->instate = XML_PARSER_START_TAG;
8959#ifdef DEBUG_PUSH
8960 xmlGenericError(xmlGenericErrorContext,
8961 "PP: entering START_TAG\n");
8962#endif
8963 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008964 case XML_PARSER_PUBLIC_LITERAL:
8965 xmlGenericError(xmlGenericErrorContext,
8966 "PP: internal error, state == PUBLIC_LITERAL\n");
8967 ctxt->instate = XML_PARSER_START_TAG;
8968#ifdef DEBUG_PUSH
8969 xmlGenericError(xmlGenericErrorContext,
8970 "PP: entering START_TAG\n");
8971#endif
8972 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008973 }
8974 }
8975done:
8976#ifdef DEBUG_PUSH
8977 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8978#endif
8979 return(ret);
8980}
8981
8982/**
Owen Taylor3473f882001-02-23 17:55:21 +00008983 * xmlParseChunk:
8984 * @ctxt: an XML parser context
8985 * @chunk: an char array
8986 * @size: the size in byte of the chunk
8987 * @terminate: last chunk indicator
8988 *
8989 * Parse a Chunk of memory
8990 *
8991 * Returns zero if no error, the xmlParserErrors otherwise.
8992 */
8993int
8994xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8995 int terminate) {
8996 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8997 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8998 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8999 int cur = ctxt->input->cur - ctxt->input->base;
9000
9001 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9002 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9003 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009004 ctxt->input->end =
9005 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009006#ifdef DEBUG_PUSH
9007 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9008#endif
9009
9010 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9011 xmlParseTryOrFinish(ctxt, terminate);
9012 } else if (ctxt->instate != XML_PARSER_EOF) {
9013 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9014 xmlParserInputBufferPtr in = ctxt->input->buf;
9015 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9016 (in->raw != NULL)) {
9017 int nbchars;
9018
9019 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9020 if (nbchars < 0) {
9021 xmlGenericError(xmlGenericErrorContext,
9022 "xmlParseChunk: encoder error\n");
9023 return(XML_ERR_INVALID_ENCODING);
9024 }
9025 }
9026 }
9027 }
9028 xmlParseTryOrFinish(ctxt, terminate);
9029 if (terminate) {
9030 /*
9031 * Check for termination
9032 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009033 int avail = 0;
9034 if (ctxt->input->buf == NULL)
9035 avail = ctxt->input->length -
9036 (ctxt->input->cur - ctxt->input->base);
9037 else
9038 avail = ctxt->input->buf->buffer->use -
9039 (ctxt->input->cur - ctxt->input->base);
9040
Owen Taylor3473f882001-02-23 17:55:21 +00009041 if ((ctxt->instate != XML_PARSER_EOF) &&
9042 (ctxt->instate != XML_PARSER_EPILOG)) {
9043 ctxt->errNo = XML_ERR_DOCUMENT_END;
9044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9045 ctxt->sax->error(ctxt->userData,
9046 "Extra content at the end of the document\n");
9047 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009048 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009049 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009050 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9051 ctxt->errNo = XML_ERR_DOCUMENT_END;
9052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9053 ctxt->sax->error(ctxt->userData,
9054 "Extra content at the end of the document\n");
9055 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009056 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009057
9058 }
Owen Taylor3473f882001-02-23 17:55:21 +00009059 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009060 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009061 ctxt->sax->endDocument(ctxt->userData);
9062 }
9063 ctxt->instate = XML_PARSER_EOF;
9064 }
9065 return((xmlParserErrors) ctxt->errNo);
9066}
9067
9068/************************************************************************
9069 * *
9070 * I/O front end functions to the parser *
9071 * *
9072 ************************************************************************/
9073
9074/**
9075 * xmlStopParser:
9076 * @ctxt: an XML parser context
9077 *
9078 * Blocks further parser processing
9079 */
9080void
9081xmlStopParser(xmlParserCtxtPtr ctxt) {
9082 ctxt->instate = XML_PARSER_EOF;
9083 if (ctxt->input != NULL)
9084 ctxt->input->cur = BAD_CAST"";
9085}
9086
9087/**
9088 * xmlCreatePushParserCtxt:
9089 * @sax: a SAX handler
9090 * @user_data: The user data returned on SAX callbacks
9091 * @chunk: a pointer to an array of chars
9092 * @size: number of chars in the array
9093 * @filename: an optional file name or URI
9094 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009095 * Create a parser context for using the XML parser in push mode.
9096 * If @buffer and @size are non-NULL, the data is used to detect
9097 * the encoding. The remaining characters will be parsed so they
9098 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009099 * To allow content encoding detection, @size should be >= 4
9100 * The value of @filename is used for fetching external entities
9101 * and error/warning reports.
9102 *
9103 * Returns the new parser context or NULL
9104 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009105
Owen Taylor3473f882001-02-23 17:55:21 +00009106xmlParserCtxtPtr
9107xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9108 const char *chunk, int size, const char *filename) {
9109 xmlParserCtxtPtr ctxt;
9110 xmlParserInputPtr inputStream;
9111 xmlParserInputBufferPtr buf;
9112 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9113
9114 /*
9115 * plug some encoding conversion routines
9116 */
9117 if ((chunk != NULL) && (size >= 4))
9118 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9119
9120 buf = xmlAllocParserInputBuffer(enc);
9121 if (buf == NULL) return(NULL);
9122
9123 ctxt = xmlNewParserCtxt();
9124 if (ctxt == NULL) {
9125 xmlFree(buf);
9126 return(NULL);
9127 }
9128 if (sax != NULL) {
9129 if (ctxt->sax != &xmlDefaultSAXHandler)
9130 xmlFree(ctxt->sax);
9131 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9132 if (ctxt->sax == NULL) {
9133 xmlFree(buf);
9134 xmlFree(ctxt);
9135 return(NULL);
9136 }
9137 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9138 if (user_data != NULL)
9139 ctxt->userData = user_data;
9140 }
9141 if (filename == NULL) {
9142 ctxt->directory = NULL;
9143 } else {
9144 ctxt->directory = xmlParserGetDirectory(filename);
9145 }
9146
9147 inputStream = xmlNewInputStream(ctxt);
9148 if (inputStream == NULL) {
9149 xmlFreeParserCtxt(ctxt);
9150 return(NULL);
9151 }
9152
9153 if (filename == NULL)
9154 inputStream->filename = NULL;
9155 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009156 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009157 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009158 inputStream->buf = buf;
9159 inputStream->base = inputStream->buf->buffer->content;
9160 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009161 inputStream->end =
9162 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009163
9164 inputPush(ctxt, inputStream);
9165
9166 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9167 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009168 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9169 int cur = ctxt->input->cur - ctxt->input->base;
9170
Owen Taylor3473f882001-02-23 17:55:21 +00009171 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009172
9173 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9174 ctxt->input->cur = ctxt->input->base + cur;
9175 ctxt->input->end =
9176 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009177#ifdef DEBUG_PUSH
9178 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9179#endif
9180 }
9181
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009182 if (enc != XML_CHAR_ENCODING_NONE) {
9183 xmlSwitchEncoding(ctxt, enc);
9184 }
9185
Owen Taylor3473f882001-02-23 17:55:21 +00009186 return(ctxt);
9187}
9188
9189/**
9190 * xmlCreateIOParserCtxt:
9191 * @sax: a SAX handler
9192 * @user_data: The user data returned on SAX callbacks
9193 * @ioread: an I/O read function
9194 * @ioclose: an I/O close function
9195 * @ioctx: an I/O handler
9196 * @enc: the charset encoding if known
9197 *
9198 * Create a parser context for using the XML parser with an existing
9199 * I/O stream
9200 *
9201 * Returns the new parser context or NULL
9202 */
9203xmlParserCtxtPtr
9204xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9205 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9206 void *ioctx, xmlCharEncoding enc) {
9207 xmlParserCtxtPtr ctxt;
9208 xmlParserInputPtr inputStream;
9209 xmlParserInputBufferPtr buf;
9210
9211 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9212 if (buf == NULL) return(NULL);
9213
9214 ctxt = xmlNewParserCtxt();
9215 if (ctxt == NULL) {
9216 xmlFree(buf);
9217 return(NULL);
9218 }
9219 if (sax != NULL) {
9220 if (ctxt->sax != &xmlDefaultSAXHandler)
9221 xmlFree(ctxt->sax);
9222 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9223 if (ctxt->sax == NULL) {
9224 xmlFree(buf);
9225 xmlFree(ctxt);
9226 return(NULL);
9227 }
9228 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9229 if (user_data != NULL)
9230 ctxt->userData = user_data;
9231 }
9232
9233 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9234 if (inputStream == NULL) {
9235 xmlFreeParserCtxt(ctxt);
9236 return(NULL);
9237 }
9238 inputPush(ctxt, inputStream);
9239
9240 return(ctxt);
9241}
9242
9243/************************************************************************
9244 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009245 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009246 * *
9247 ************************************************************************/
9248
9249/**
9250 * xmlIOParseDTD:
9251 * @sax: the SAX handler block or NULL
9252 * @input: an Input Buffer
9253 * @enc: the charset encoding if known
9254 *
9255 * Load and parse a DTD
9256 *
9257 * Returns the resulting xmlDtdPtr or NULL in case of error.
9258 * @input will be freed at parsing end.
9259 */
9260
9261xmlDtdPtr
9262xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9263 xmlCharEncoding enc) {
9264 xmlDtdPtr ret = NULL;
9265 xmlParserCtxtPtr ctxt;
9266 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009267 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009268
9269 if (input == NULL)
9270 return(NULL);
9271
9272 ctxt = xmlNewParserCtxt();
9273 if (ctxt == NULL) {
9274 return(NULL);
9275 }
9276
9277 /*
9278 * Set-up the SAX context
9279 */
9280 if (sax != NULL) {
9281 if (ctxt->sax != NULL)
9282 xmlFree(ctxt->sax);
9283 ctxt->sax = sax;
9284 ctxt->userData = NULL;
9285 }
9286
9287 /*
9288 * generate a parser input from the I/O handler
9289 */
9290
9291 pinput = xmlNewIOInputStream(ctxt, input, enc);
9292 if (pinput == NULL) {
9293 if (sax != NULL) ctxt->sax = NULL;
9294 xmlFreeParserCtxt(ctxt);
9295 return(NULL);
9296 }
9297
9298 /*
9299 * plug some encoding conversion routines here.
9300 */
9301 xmlPushInput(ctxt, pinput);
9302
9303 pinput->filename = NULL;
9304 pinput->line = 1;
9305 pinput->col = 1;
9306 pinput->base = ctxt->input->cur;
9307 pinput->cur = ctxt->input->cur;
9308 pinput->free = NULL;
9309
9310 /*
9311 * let's parse that entity knowing it's an external subset.
9312 */
9313 ctxt->inSubset = 2;
9314 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9315 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9316 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009317
9318 if (enc == XML_CHAR_ENCODING_NONE) {
9319 /*
9320 * Get the 4 first bytes and decode the charset
9321 * if enc != XML_CHAR_ENCODING_NONE
9322 * plug some encoding conversion routines.
9323 */
9324 start[0] = RAW;
9325 start[1] = NXT(1);
9326 start[2] = NXT(2);
9327 start[3] = NXT(3);
9328 enc = xmlDetectCharEncoding(start, 4);
9329 if (enc != XML_CHAR_ENCODING_NONE) {
9330 xmlSwitchEncoding(ctxt, enc);
9331 }
9332 }
9333
Owen Taylor3473f882001-02-23 17:55:21 +00009334 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9335
9336 if (ctxt->myDoc != NULL) {
9337 if (ctxt->wellFormed) {
9338 ret = ctxt->myDoc->extSubset;
9339 ctxt->myDoc->extSubset = NULL;
9340 } else {
9341 ret = NULL;
9342 }
9343 xmlFreeDoc(ctxt->myDoc);
9344 ctxt->myDoc = NULL;
9345 }
9346 if (sax != NULL) ctxt->sax = NULL;
9347 xmlFreeParserCtxt(ctxt);
9348
9349 return(ret);
9350}
9351
9352/**
9353 * xmlSAXParseDTD:
9354 * @sax: the SAX handler block
9355 * @ExternalID: a NAME* containing the External ID of the DTD
9356 * @SystemID: a NAME* containing the URL to the DTD
9357 *
9358 * Load and parse an external subset.
9359 *
9360 * Returns the resulting xmlDtdPtr or NULL in case of error.
9361 */
9362
9363xmlDtdPtr
9364xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9365 const xmlChar *SystemID) {
9366 xmlDtdPtr ret = NULL;
9367 xmlParserCtxtPtr ctxt;
9368 xmlParserInputPtr input = NULL;
9369 xmlCharEncoding enc;
9370
9371 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9372
9373 ctxt = xmlNewParserCtxt();
9374 if (ctxt == NULL) {
9375 return(NULL);
9376 }
9377
9378 /*
9379 * Set-up the SAX context
9380 */
9381 if (sax != NULL) {
9382 if (ctxt->sax != NULL)
9383 xmlFree(ctxt->sax);
9384 ctxt->sax = sax;
9385 ctxt->userData = NULL;
9386 }
9387
9388 /*
9389 * Ask the Entity resolver to load the damn thing
9390 */
9391
9392 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9393 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9394 if (input == NULL) {
9395 if (sax != NULL) ctxt->sax = NULL;
9396 xmlFreeParserCtxt(ctxt);
9397 return(NULL);
9398 }
9399
9400 /*
9401 * plug some encoding conversion routines here.
9402 */
9403 xmlPushInput(ctxt, input);
9404 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9405 xmlSwitchEncoding(ctxt, enc);
9406
9407 if (input->filename == NULL)
9408 input->filename = (char *) xmlStrdup(SystemID);
9409 input->line = 1;
9410 input->col = 1;
9411 input->base = ctxt->input->cur;
9412 input->cur = ctxt->input->cur;
9413 input->free = NULL;
9414
9415 /*
9416 * let's parse that entity knowing it's an external subset.
9417 */
9418 ctxt->inSubset = 2;
9419 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9420 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9421 ExternalID, SystemID);
9422 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9423
9424 if (ctxt->myDoc != NULL) {
9425 if (ctxt->wellFormed) {
9426 ret = ctxt->myDoc->extSubset;
9427 ctxt->myDoc->extSubset = NULL;
9428 } else {
9429 ret = NULL;
9430 }
9431 xmlFreeDoc(ctxt->myDoc);
9432 ctxt->myDoc = NULL;
9433 }
9434 if (sax != NULL) ctxt->sax = NULL;
9435 xmlFreeParserCtxt(ctxt);
9436
9437 return(ret);
9438}
9439
9440/**
9441 * xmlParseDTD:
9442 * @ExternalID: a NAME* containing the External ID of the DTD
9443 * @SystemID: a NAME* containing the URL to the DTD
9444 *
9445 * Load and parse an external subset.
9446 *
9447 * Returns the resulting xmlDtdPtr or NULL in case of error.
9448 */
9449
9450xmlDtdPtr
9451xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9452 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9453}
9454
9455/************************************************************************
9456 * *
9457 * Front ends when parsing an Entity *
9458 * *
9459 ************************************************************************/
9460
9461/**
Owen Taylor3473f882001-02-23 17:55:21 +00009462 * xmlParseCtxtExternalEntity:
9463 * @ctx: the existing parsing context
9464 * @URL: the URL for the entity to load
9465 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009466 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009467 *
9468 * Parse an external general entity within an existing parsing context
9469 * An external general parsed entity is well-formed if it matches the
9470 * production labeled extParsedEnt.
9471 *
9472 * [78] extParsedEnt ::= TextDecl? content
9473 *
9474 * Returns 0 if the entity is well formed, -1 in case of args problem and
9475 * the parser error code otherwise
9476 */
9477
9478int
9479xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009480 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009481 xmlParserCtxtPtr ctxt;
9482 xmlDocPtr newDoc;
9483 xmlSAXHandlerPtr oldsax = NULL;
9484 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009485 xmlChar start[4];
9486 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009487
9488 if (ctx->depth > 40) {
9489 return(XML_ERR_ENTITY_LOOP);
9490 }
9491
Daniel Veillardcda96922001-08-21 10:56:31 +00009492 if (lst != NULL)
9493 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009494 if ((URL == NULL) && (ID == NULL))
9495 return(-1);
9496 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9497 return(-1);
9498
9499
9500 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9501 if (ctxt == NULL) return(-1);
9502 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +00009503 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +00009504 oldsax = ctxt->sax;
9505 ctxt->sax = ctx->sax;
9506 newDoc = xmlNewDoc(BAD_CAST "1.0");
9507 if (newDoc == NULL) {
9508 xmlFreeParserCtxt(ctxt);
9509 return(-1);
9510 }
9511 if (ctx->myDoc != NULL) {
9512 newDoc->intSubset = ctx->myDoc->intSubset;
9513 newDoc->extSubset = ctx->myDoc->extSubset;
9514 }
9515 if (ctx->myDoc->URL != NULL) {
9516 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9517 }
9518 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9519 if (newDoc->children == NULL) {
9520 ctxt->sax = oldsax;
9521 xmlFreeParserCtxt(ctxt);
9522 newDoc->intSubset = NULL;
9523 newDoc->extSubset = NULL;
9524 xmlFreeDoc(newDoc);
9525 return(-1);
9526 }
9527 nodePush(ctxt, newDoc->children);
9528 if (ctx->myDoc == NULL) {
9529 ctxt->myDoc = newDoc;
9530 } else {
9531 ctxt->myDoc = ctx->myDoc;
9532 newDoc->children->doc = ctx->myDoc;
9533 }
9534
Daniel Veillard87a764e2001-06-20 17:41:10 +00009535 /*
9536 * Get the 4 first bytes and decode the charset
9537 * if enc != XML_CHAR_ENCODING_NONE
9538 * plug some encoding conversion routines.
9539 */
9540 GROW
9541 start[0] = RAW;
9542 start[1] = NXT(1);
9543 start[2] = NXT(2);
9544 start[3] = NXT(3);
9545 enc = xmlDetectCharEncoding(start, 4);
9546 if (enc != XML_CHAR_ENCODING_NONE) {
9547 xmlSwitchEncoding(ctxt, enc);
9548 }
9549
Owen Taylor3473f882001-02-23 17:55:21 +00009550 /*
9551 * Parse a possible text declaration first
9552 */
Owen Taylor3473f882001-02-23 17:55:21 +00009553 if ((RAW == '<') && (NXT(1) == '?') &&
9554 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9555 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9556 xmlParseTextDecl(ctxt);
9557 }
9558
9559 /*
9560 * Doing validity checking on chunk doesn't make sense
9561 */
9562 ctxt->instate = XML_PARSER_CONTENT;
9563 ctxt->validate = ctx->validate;
9564 ctxt->loadsubset = ctx->loadsubset;
9565 ctxt->depth = ctx->depth + 1;
9566 ctxt->replaceEntities = ctx->replaceEntities;
9567 if (ctxt->validate) {
9568 ctxt->vctxt.error = ctx->vctxt.error;
9569 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009570 } else {
9571 ctxt->vctxt.error = NULL;
9572 ctxt->vctxt.warning = NULL;
9573 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009574 ctxt->vctxt.nodeTab = NULL;
9575 ctxt->vctxt.nodeNr = 0;
9576 ctxt->vctxt.nodeMax = 0;
9577 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009578
9579 xmlParseContent(ctxt);
9580
9581 if ((RAW == '<') && (NXT(1) == '/')) {
9582 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9584 ctxt->sax->error(ctxt->userData,
9585 "chunk is not well balanced\n");
9586 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009587 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009588 } else if (RAW != 0) {
9589 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9591 ctxt->sax->error(ctxt->userData,
9592 "extra content at the end of well balanced chunk\n");
9593 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009594 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009595 }
9596 if (ctxt->node != newDoc->children) {
9597 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9599 ctxt->sax->error(ctxt->userData,
9600 "chunk is not well balanced\n");
9601 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009602 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009603 }
9604
9605 if (!ctxt->wellFormed) {
9606 if (ctxt->errNo == 0)
9607 ret = 1;
9608 else
9609 ret = ctxt->errNo;
9610 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009611 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009612 xmlNodePtr cur;
9613
9614 /*
9615 * Return the newly created nodeset after unlinking it from
9616 * they pseudo parent.
9617 */
9618 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009619 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009620 while (cur != NULL) {
9621 cur->parent = NULL;
9622 cur = cur->next;
9623 }
9624 newDoc->children->children = NULL;
9625 }
9626 ret = 0;
9627 }
9628 ctxt->sax = oldsax;
9629 xmlFreeParserCtxt(ctxt);
9630 newDoc->intSubset = NULL;
9631 newDoc->extSubset = NULL;
9632 xmlFreeDoc(newDoc);
9633
9634 return(ret);
9635}
9636
9637/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009638 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009639 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009640 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009641 * @sax: the SAX handler bloc (possibly NULL)
9642 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9643 * @depth: Used for loop detection, use 0
9644 * @URL: the URL for the entity to load
9645 * @ID: the System ID for the entity to load
9646 * @list: the return value for the set of parsed nodes
9647 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009648 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009649 *
9650 * Returns 0 if the entity is well formed, -1 in case of args problem and
9651 * the parser error code otherwise
9652 */
9653
Daniel Veillard257d9102001-05-08 10:41:44 +00009654static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009655xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9656 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009657 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009658 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009659 xmlParserCtxtPtr ctxt;
9660 xmlDocPtr newDoc;
9661 xmlSAXHandlerPtr oldsax = NULL;
9662 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009663 xmlChar start[4];
9664 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009665
9666 if (depth > 40) {
9667 return(XML_ERR_ENTITY_LOOP);
9668 }
9669
9670
9671
9672 if (list != NULL)
9673 *list = NULL;
9674 if ((URL == NULL) && (ID == NULL))
9675 return(-1);
9676 if (doc == NULL) /* @@ relax but check for dereferences */
9677 return(-1);
9678
9679
9680 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9681 if (ctxt == NULL) return(-1);
9682 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009683 if (oldctxt != NULL) {
9684 ctxt->_private = oldctxt->_private;
9685 ctxt->loadsubset = oldctxt->loadsubset;
9686 ctxt->validate = oldctxt->validate;
9687 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009688 ctxt->record_info = oldctxt->record_info;
9689 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
9690 ctxt->node_seq.length = oldctxt->node_seq.length;
9691 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009692 } else {
9693 /*
9694 * Doing validity checking on chunk without context
9695 * doesn't make sense
9696 */
9697 ctxt->_private = NULL;
9698 ctxt->validate = 0;
9699 ctxt->external = 2;
9700 ctxt->loadsubset = 0;
9701 }
Owen Taylor3473f882001-02-23 17:55:21 +00009702 if (sax != NULL) {
9703 oldsax = ctxt->sax;
9704 ctxt->sax = sax;
9705 if (user_data != NULL)
9706 ctxt->userData = user_data;
9707 }
9708 newDoc = xmlNewDoc(BAD_CAST "1.0");
9709 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009710 ctxt->node_seq.maximum = 0;
9711 ctxt->node_seq.length = 0;
9712 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009713 xmlFreeParserCtxt(ctxt);
9714 return(-1);
9715 }
9716 if (doc != NULL) {
9717 newDoc->intSubset = doc->intSubset;
9718 newDoc->extSubset = doc->extSubset;
9719 }
9720 if (doc->URL != NULL) {
9721 newDoc->URL = xmlStrdup(doc->URL);
9722 }
9723 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9724 if (newDoc->children == NULL) {
9725 if (sax != NULL)
9726 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009727 ctxt->node_seq.maximum = 0;
9728 ctxt->node_seq.length = 0;
9729 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009730 xmlFreeParserCtxt(ctxt);
9731 newDoc->intSubset = NULL;
9732 newDoc->extSubset = NULL;
9733 xmlFreeDoc(newDoc);
9734 return(-1);
9735 }
9736 nodePush(ctxt, newDoc->children);
9737 if (doc == NULL) {
9738 ctxt->myDoc = newDoc;
9739 } else {
9740 ctxt->myDoc = doc;
9741 newDoc->children->doc = doc;
9742 }
9743
Daniel Veillard87a764e2001-06-20 17:41:10 +00009744 /*
9745 * Get the 4 first bytes and decode the charset
9746 * if enc != XML_CHAR_ENCODING_NONE
9747 * plug some encoding conversion routines.
9748 */
9749 GROW;
9750 start[0] = RAW;
9751 start[1] = NXT(1);
9752 start[2] = NXT(2);
9753 start[3] = NXT(3);
9754 enc = xmlDetectCharEncoding(start, 4);
9755 if (enc != XML_CHAR_ENCODING_NONE) {
9756 xmlSwitchEncoding(ctxt, enc);
9757 }
9758
Owen Taylor3473f882001-02-23 17:55:21 +00009759 /*
9760 * Parse a possible text declaration first
9761 */
Owen Taylor3473f882001-02-23 17:55:21 +00009762 if ((RAW == '<') && (NXT(1) == '?') &&
9763 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9764 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9765 xmlParseTextDecl(ctxt);
9766 }
9767
Owen Taylor3473f882001-02-23 17:55:21 +00009768 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009769 ctxt->depth = depth;
9770
9771 xmlParseContent(ctxt);
9772
Daniel Veillard561b7f82002-03-20 21:55:57 +00009773 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009774 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9776 ctxt->sax->error(ctxt->userData,
9777 "chunk is not well balanced\n");
9778 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009779 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009780 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009781 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9783 ctxt->sax->error(ctxt->userData,
9784 "extra content at the end of well balanced chunk\n");
9785 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009786 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009787 }
9788 if (ctxt->node != newDoc->children) {
9789 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9791 ctxt->sax->error(ctxt->userData,
9792 "chunk is not well balanced\n");
9793 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009794 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009795 }
9796
9797 if (!ctxt->wellFormed) {
9798 if (ctxt->errNo == 0)
9799 ret = 1;
9800 else
9801 ret = ctxt->errNo;
9802 } else {
9803 if (list != NULL) {
9804 xmlNodePtr cur;
9805
9806 /*
9807 * Return the newly created nodeset after unlinking it from
9808 * they pseudo parent.
9809 */
9810 cur = newDoc->children->children;
9811 *list = cur;
9812 while (cur != NULL) {
9813 cur->parent = NULL;
9814 cur = cur->next;
9815 }
9816 newDoc->children->children = NULL;
9817 }
9818 ret = 0;
9819 }
9820 if (sax != NULL)
9821 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009822 ctxt->node_seq.maximum = 0;
9823 ctxt->node_seq.length = 0;
9824 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009825 xmlFreeParserCtxt(ctxt);
9826 newDoc->intSubset = NULL;
9827 newDoc->extSubset = NULL;
9828 xmlFreeDoc(newDoc);
9829
9830 return(ret);
9831}
9832
9833/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009834 * xmlParseExternalEntity:
9835 * @doc: the document the chunk pertains to
9836 * @sax: the SAX handler bloc (possibly NULL)
9837 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9838 * @depth: Used for loop detection, use 0
9839 * @URL: the URL for the entity to load
9840 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009841 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009842 *
9843 * Parse an external general entity
9844 * An external general parsed entity is well-formed if it matches the
9845 * production labeled extParsedEnt.
9846 *
9847 * [78] extParsedEnt ::= TextDecl? content
9848 *
9849 * Returns 0 if the entity is well formed, -1 in case of args problem and
9850 * the parser error code otherwise
9851 */
9852
9853int
9854xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009855 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009856 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009857 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009858}
9859
9860/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009861 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009862 * @doc: the document the chunk pertains to
9863 * @sax: the SAX handler bloc (possibly NULL)
9864 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9865 * @depth: Used for loop detection, use 0
9866 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009867 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009868 *
9869 * Parse a well-balanced chunk of an XML document
9870 * called by the parser
9871 * The allowed sequence for the Well Balanced Chunk is the one defined by
9872 * the content production in the XML grammar:
9873 *
9874 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9875 *
9876 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9877 * the parser error code otherwise
9878 */
9879
9880int
9881xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009882 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009883 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9884 depth, string, lst, 0 );
9885}
9886
9887/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009888 * xmlParseBalancedChunkMemoryInternal:
9889 * @oldctxt: the existing parsing context
9890 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9891 * @user_data: the user data field for the parser context
9892 * @lst: the return value for the set of parsed nodes
9893 *
9894 *
9895 * Parse a well-balanced chunk of an XML document
9896 * called by the parser
9897 * The allowed sequence for the Well Balanced Chunk is the one defined by
9898 * the content production in the XML grammar:
9899 *
9900 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9901 *
9902 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9903 * the parser error code otherwise
9904 *
9905 * In case recover is set to 1, the nodelist will not be empty even if
9906 * the parsed chunk is not well balanced.
9907 */
9908static int
9909xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9910 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9911 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009912 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009913 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009914 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009915 int size;
9916 int ret = 0;
9917
9918 if (oldctxt->depth > 40) {
9919 return(XML_ERR_ENTITY_LOOP);
9920 }
9921
9922
9923 if (lst != NULL)
9924 *lst = NULL;
9925 if (string == NULL)
9926 return(-1);
9927
9928 size = xmlStrlen(string);
9929
9930 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9931 if (ctxt == NULL) return(-1);
9932 if (user_data != NULL)
9933 ctxt->userData = user_data;
9934 else
9935 ctxt->userData = ctxt;
9936
9937 oldsax = ctxt->sax;
9938 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +00009939 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009940 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009941 newDoc = xmlNewDoc(BAD_CAST "1.0");
9942 if (newDoc == NULL) {
9943 ctxt->sax = oldsax;
9944 xmlFreeParserCtxt(ctxt);
9945 return(-1);
9946 }
Daniel Veillard328f48c2002-11-15 15:24:34 +00009947 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009948 } else {
9949 ctxt->myDoc = oldctxt->myDoc;
9950 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009951 }
Daniel Veillard9bc53102002-11-25 13:20:04 +00009952 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +00009953 BAD_CAST "pseudoroot", NULL);
9954 if (ctxt->myDoc->children == NULL) {
9955 ctxt->sax = oldsax;
9956 xmlFreeParserCtxt(ctxt);
9957 if (newDoc != NULL)
9958 xmlFreeDoc(newDoc);
9959 return(-1);
9960 }
9961 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009962 ctxt->instate = XML_PARSER_CONTENT;
9963 ctxt->depth = oldctxt->depth + 1;
9964
Daniel Veillard328f48c2002-11-15 15:24:34 +00009965 ctxt->validate = 0;
9966 ctxt->loadsubset = oldctxt->loadsubset;
9967
Daniel Veillard68e9e742002-11-16 15:35:11 +00009968 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009969 if ((RAW == '<') && (NXT(1) == '/')) {
9970 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9972 ctxt->sax->error(ctxt->userData,
9973 "chunk is not well balanced\n");
9974 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009975 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009976 } else if (RAW != 0) {
9977 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9979 ctxt->sax->error(ctxt->userData,
9980 "extra content at the end of well balanced chunk\n");
9981 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009982 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009983 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009984 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +00009985 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9987 ctxt->sax->error(ctxt->userData,
9988 "chunk is not well balanced\n");
9989 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009990 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009991 }
9992
9993 if (!ctxt->wellFormed) {
9994 if (ctxt->errNo == 0)
9995 ret = 1;
9996 else
9997 ret = ctxt->errNo;
9998 } else {
9999 ret = 0;
10000 }
10001
10002 if ((lst != NULL) && (ret == 0)) {
10003 xmlNodePtr cur;
10004
10005 /*
10006 * Return the newly created nodeset after unlinking it from
10007 * they pseudo parent.
10008 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010009 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010010 *lst = cur;
10011 while (cur != NULL) {
Daniel Veillard8d589042003-02-04 15:07:21 +000010012 if (oldctxt->validate && oldctxt->wellFormed &&
10013 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10014 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10015 oldctxt->myDoc, cur);
10016 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010017 cur->parent = NULL;
10018 cur = cur->next;
10019 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010020 ctxt->myDoc->children->children = NULL;
10021 }
10022 if (ctxt->myDoc != NULL) {
10023 xmlFreeNode(ctxt->myDoc->children);
10024 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010025 }
10026
10027 ctxt->sax = oldsax;
10028 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010029 if (newDoc != NULL)
10030 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010031
10032 return(ret);
10033}
10034
10035/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010036 * xmlParseBalancedChunkMemoryRecover:
10037 * @doc: the document the chunk pertains to
10038 * @sax: the SAX handler bloc (possibly NULL)
10039 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10040 * @depth: Used for loop detection, use 0
10041 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10042 * @lst: the return value for the set of parsed nodes
10043 * @recover: return nodes even if the data is broken (use 0)
10044 *
10045 *
10046 * Parse a well-balanced chunk of an XML document
10047 * called by the parser
10048 * The allowed sequence for the Well Balanced Chunk is the one defined by
10049 * the content production in the XML grammar:
10050 *
10051 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10052 *
10053 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10054 * the parser error code otherwise
10055 *
10056 * In case recover is set to 1, the nodelist will not be empty even if
10057 * the parsed chunk is not well balanced.
10058 */
10059int
10060xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10061 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10062 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010063 xmlParserCtxtPtr ctxt;
10064 xmlDocPtr newDoc;
10065 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010066 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010067 int size;
10068 int ret = 0;
10069
10070 if (depth > 40) {
10071 return(XML_ERR_ENTITY_LOOP);
10072 }
10073
10074
Daniel Veillardcda96922001-08-21 10:56:31 +000010075 if (lst != NULL)
10076 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010077 if (string == NULL)
10078 return(-1);
10079
10080 size = xmlStrlen(string);
10081
10082 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10083 if (ctxt == NULL) return(-1);
10084 ctxt->userData = ctxt;
10085 if (sax != NULL) {
10086 oldsax = ctxt->sax;
10087 ctxt->sax = sax;
10088 if (user_data != NULL)
10089 ctxt->userData = user_data;
10090 }
10091 newDoc = xmlNewDoc(BAD_CAST "1.0");
10092 if (newDoc == NULL) {
10093 xmlFreeParserCtxt(ctxt);
10094 return(-1);
10095 }
10096 if (doc != NULL) {
10097 newDoc->intSubset = doc->intSubset;
10098 newDoc->extSubset = doc->extSubset;
10099 }
10100 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10101 if (newDoc->children == NULL) {
10102 if (sax != NULL)
10103 ctxt->sax = oldsax;
10104 xmlFreeParserCtxt(ctxt);
10105 newDoc->intSubset = NULL;
10106 newDoc->extSubset = NULL;
10107 xmlFreeDoc(newDoc);
10108 return(-1);
10109 }
10110 nodePush(ctxt, newDoc->children);
10111 if (doc == NULL) {
10112 ctxt->myDoc = newDoc;
10113 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010114 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010115 newDoc->children->doc = doc;
10116 }
10117 ctxt->instate = XML_PARSER_CONTENT;
10118 ctxt->depth = depth;
10119
10120 /*
10121 * Doing validity checking on chunk doesn't make sense
10122 */
10123 ctxt->validate = 0;
10124 ctxt->loadsubset = 0;
10125
Daniel Veillardb39bc392002-10-26 19:29:51 +000010126 if ( doc != NULL ){
10127 content = doc->children;
10128 doc->children = NULL;
10129 xmlParseContent(ctxt);
10130 doc->children = content;
10131 }
10132 else {
10133 xmlParseContent(ctxt);
10134 }
Owen Taylor3473f882001-02-23 17:55:21 +000010135 if ((RAW == '<') && (NXT(1) == '/')) {
10136 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10138 ctxt->sax->error(ctxt->userData,
10139 "chunk is not well balanced\n");
10140 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010141 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010142 } else if (RAW != 0) {
10143 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10145 ctxt->sax->error(ctxt->userData,
10146 "extra content at the end of well balanced chunk\n");
10147 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010148 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010149 }
10150 if (ctxt->node != newDoc->children) {
10151 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10153 ctxt->sax->error(ctxt->userData,
10154 "chunk is not well balanced\n");
10155 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010156 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010157 }
10158
10159 if (!ctxt->wellFormed) {
10160 if (ctxt->errNo == 0)
10161 ret = 1;
10162 else
10163 ret = ctxt->errNo;
10164 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010165 ret = 0;
10166 }
10167
10168 if (lst != NULL && (ret == 0 || recover == 1)) {
10169 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010170
10171 /*
10172 * Return the newly created nodeset after unlinking it from
10173 * they pseudo parent.
10174 */
10175 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010176 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010177 while (cur != NULL) {
10178 cur->parent = NULL;
10179 cur = cur->next;
10180 }
10181 newDoc->children->children = NULL;
10182 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010183
Owen Taylor3473f882001-02-23 17:55:21 +000010184 if (sax != NULL)
10185 ctxt->sax = oldsax;
10186 xmlFreeParserCtxt(ctxt);
10187 newDoc->intSubset = NULL;
10188 newDoc->extSubset = NULL;
10189 xmlFreeDoc(newDoc);
10190
10191 return(ret);
10192}
10193
10194/**
10195 * xmlSAXParseEntity:
10196 * @sax: the SAX handler block
10197 * @filename: the filename
10198 *
10199 * parse an XML external entity out of context and build a tree.
10200 * It use the given SAX function block to handle the parsing callback.
10201 * If sax is NULL, fallback to the default DOM tree building routines.
10202 *
10203 * [78] extParsedEnt ::= TextDecl? content
10204 *
10205 * This correspond to a "Well Balanced" chunk
10206 *
10207 * Returns the resulting document tree
10208 */
10209
10210xmlDocPtr
10211xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10212 xmlDocPtr ret;
10213 xmlParserCtxtPtr ctxt;
10214 char *directory = NULL;
10215
10216 ctxt = xmlCreateFileParserCtxt(filename);
10217 if (ctxt == NULL) {
10218 return(NULL);
10219 }
10220 if (sax != NULL) {
10221 if (ctxt->sax != NULL)
10222 xmlFree(ctxt->sax);
10223 ctxt->sax = sax;
10224 ctxt->userData = NULL;
10225 }
10226
10227 if ((ctxt->directory == NULL) && (directory == NULL))
10228 directory = xmlParserGetDirectory(filename);
10229
10230 xmlParseExtParsedEnt(ctxt);
10231
10232 if (ctxt->wellFormed)
10233 ret = ctxt->myDoc;
10234 else {
10235 ret = NULL;
10236 xmlFreeDoc(ctxt->myDoc);
10237 ctxt->myDoc = NULL;
10238 }
10239 if (sax != NULL)
10240 ctxt->sax = NULL;
10241 xmlFreeParserCtxt(ctxt);
10242
10243 return(ret);
10244}
10245
10246/**
10247 * xmlParseEntity:
10248 * @filename: the filename
10249 *
10250 * parse an XML external entity out of context and build a tree.
10251 *
10252 * [78] extParsedEnt ::= TextDecl? content
10253 *
10254 * This correspond to a "Well Balanced" chunk
10255 *
10256 * Returns the resulting document tree
10257 */
10258
10259xmlDocPtr
10260xmlParseEntity(const char *filename) {
10261 return(xmlSAXParseEntity(NULL, filename));
10262}
10263
10264/**
10265 * xmlCreateEntityParserCtxt:
10266 * @URL: the entity URL
10267 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010268 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010269 *
10270 * Create a parser context for an external entity
10271 * Automatic support for ZLIB/Compress compressed document is provided
10272 * by default if found at compile-time.
10273 *
10274 * Returns the new parser context or NULL
10275 */
10276xmlParserCtxtPtr
10277xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10278 const xmlChar *base) {
10279 xmlParserCtxtPtr ctxt;
10280 xmlParserInputPtr inputStream;
10281 char *directory = NULL;
10282 xmlChar *uri;
10283
10284 ctxt = xmlNewParserCtxt();
10285 if (ctxt == NULL) {
10286 return(NULL);
10287 }
10288
10289 uri = xmlBuildURI(URL, base);
10290
10291 if (uri == NULL) {
10292 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10293 if (inputStream == NULL) {
10294 xmlFreeParserCtxt(ctxt);
10295 return(NULL);
10296 }
10297
10298 inputPush(ctxt, inputStream);
10299
10300 if ((ctxt->directory == NULL) && (directory == NULL))
10301 directory = xmlParserGetDirectory((char *)URL);
10302 if ((ctxt->directory == NULL) && (directory != NULL))
10303 ctxt->directory = directory;
10304 } else {
10305 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10306 if (inputStream == NULL) {
10307 xmlFree(uri);
10308 xmlFreeParserCtxt(ctxt);
10309 return(NULL);
10310 }
10311
10312 inputPush(ctxt, inputStream);
10313
10314 if ((ctxt->directory == NULL) && (directory == NULL))
10315 directory = xmlParserGetDirectory((char *)uri);
10316 if ((ctxt->directory == NULL) && (directory != NULL))
10317 ctxt->directory = directory;
10318 xmlFree(uri);
10319 }
10320
10321 return(ctxt);
10322}
10323
10324/************************************************************************
10325 * *
10326 * Front ends when parsing from a file *
10327 * *
10328 ************************************************************************/
10329
10330/**
10331 * xmlCreateFileParserCtxt:
10332 * @filename: the filename
10333 *
10334 * Create a parser context for a file content.
10335 * Automatic support for ZLIB/Compress compressed document is provided
10336 * by default if found at compile-time.
10337 *
10338 * Returns the new parser context or NULL
10339 */
10340xmlParserCtxtPtr
10341xmlCreateFileParserCtxt(const char *filename)
10342{
10343 xmlParserCtxtPtr ctxt;
10344 xmlParserInputPtr inputStream;
Igor Zlatkovicce076162003-02-23 13:39:39 +000010345 char *canonicFilename;
Owen Taylor3473f882001-02-23 17:55:21 +000010346 char *directory = NULL;
10347
Owen Taylor3473f882001-02-23 17:55:21 +000010348 ctxt = xmlNewParserCtxt();
10349 if (ctxt == NULL) {
10350 if (xmlDefaultSAXHandler.error != NULL) {
10351 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10352 }
10353 return(NULL);
10354 }
10355
Igor Zlatkovicce076162003-02-23 13:39:39 +000010356 canonicFilename = xmlCanonicPath(filename);
10357 if (canonicFilename == NULL) {
10358 if (xmlDefaultSAXHandler.error != NULL) {
10359 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10360 }
10361 return(NULL);
10362 }
10363
10364 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
10365 xmlFree(canonicFilename);
Owen Taylor3473f882001-02-23 17:55:21 +000010366 if (inputStream == NULL) {
10367 xmlFreeParserCtxt(ctxt);
10368 return(NULL);
10369 }
10370
Owen Taylor3473f882001-02-23 17:55:21 +000010371 inputPush(ctxt, inputStream);
10372 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010373 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010374 if ((ctxt->directory == NULL) && (directory != NULL))
10375 ctxt->directory = directory;
10376
10377 return(ctxt);
10378}
10379
10380/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010381 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010382 * @sax: the SAX handler block
10383 * @filename: the filename
10384 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10385 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010386 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010387 *
10388 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10389 * compressed document is provided by default if found at compile-time.
10390 * It use the given SAX function block to handle the parsing callback.
10391 * If sax is NULL, fallback to the default DOM tree building routines.
10392 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010393 * User data (void *) is stored within the parser context in the
10394 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010395 *
Owen Taylor3473f882001-02-23 17:55:21 +000010396 * Returns the resulting document tree
10397 */
10398
10399xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010400xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10401 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010402 xmlDocPtr ret;
10403 xmlParserCtxtPtr ctxt;
10404 char *directory = NULL;
10405
Daniel Veillard635ef722001-10-29 11:48:19 +000010406 xmlInitParser();
10407
Owen Taylor3473f882001-02-23 17:55:21 +000010408 ctxt = xmlCreateFileParserCtxt(filename);
10409 if (ctxt == NULL) {
10410 return(NULL);
10411 }
10412 if (sax != NULL) {
10413 if (ctxt->sax != NULL)
10414 xmlFree(ctxt->sax);
10415 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010416 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010417 if (data!=NULL) {
10418 ctxt->_private=data;
10419 }
Owen Taylor3473f882001-02-23 17:55:21 +000010420
10421 if ((ctxt->directory == NULL) && (directory == NULL))
10422 directory = xmlParserGetDirectory(filename);
10423 if ((ctxt->directory == NULL) && (directory != NULL))
10424 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10425
Daniel Veillarddad3f682002-11-17 16:47:27 +000010426 ctxt->recovery = recovery;
10427
Owen Taylor3473f882001-02-23 17:55:21 +000010428 xmlParseDocument(ctxt);
10429
10430 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10431 else {
10432 ret = NULL;
10433 xmlFreeDoc(ctxt->myDoc);
10434 ctxt->myDoc = NULL;
10435 }
10436 if (sax != NULL)
10437 ctxt->sax = NULL;
10438 xmlFreeParserCtxt(ctxt);
10439
10440 return(ret);
10441}
10442
10443/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010444 * xmlSAXParseFile:
10445 * @sax: the SAX handler block
10446 * @filename: the filename
10447 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10448 * documents
10449 *
10450 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10451 * compressed document is provided by default if found at compile-time.
10452 * It use the given SAX function block to handle the parsing callback.
10453 * If sax is NULL, fallback to the default DOM tree building routines.
10454 *
10455 * Returns the resulting document tree
10456 */
10457
10458xmlDocPtr
10459xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10460 int recovery) {
10461 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10462}
10463
10464/**
Owen Taylor3473f882001-02-23 17:55:21 +000010465 * xmlRecoverDoc:
10466 * @cur: a pointer to an array of xmlChar
10467 *
10468 * parse an XML in-memory document and build a tree.
10469 * In the case the document is not Well Formed, a tree is built anyway
10470 *
10471 * Returns the resulting document tree
10472 */
10473
10474xmlDocPtr
10475xmlRecoverDoc(xmlChar *cur) {
10476 return(xmlSAXParseDoc(NULL, cur, 1));
10477}
10478
10479/**
10480 * xmlParseFile:
10481 * @filename: the filename
10482 *
10483 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10484 * compressed document is provided by default if found at compile-time.
10485 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010486 * Returns the resulting document tree if the file was wellformed,
10487 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010488 */
10489
10490xmlDocPtr
10491xmlParseFile(const char *filename) {
10492 return(xmlSAXParseFile(NULL, filename, 0));
10493}
10494
10495/**
10496 * xmlRecoverFile:
10497 * @filename: the filename
10498 *
10499 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10500 * compressed document is provided by default if found at compile-time.
10501 * In the case the document is not Well Formed, a tree is built anyway
10502 *
10503 * Returns the resulting document tree
10504 */
10505
10506xmlDocPtr
10507xmlRecoverFile(const char *filename) {
10508 return(xmlSAXParseFile(NULL, filename, 1));
10509}
10510
10511
10512/**
10513 * xmlSetupParserForBuffer:
10514 * @ctxt: an XML parser context
10515 * @buffer: a xmlChar * buffer
10516 * @filename: a file name
10517 *
10518 * Setup the parser context to parse a new buffer; Clears any prior
10519 * contents from the parser context. The buffer parameter must not be
10520 * NULL, but the filename parameter can be
10521 */
10522void
10523xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10524 const char* filename)
10525{
10526 xmlParserInputPtr input;
10527
10528 input = xmlNewInputStream(ctxt);
10529 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010530 xmlGenericError(xmlGenericErrorContext,
10531 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010532 xmlFree(ctxt);
10533 return;
10534 }
10535
10536 xmlClearParserCtxt(ctxt);
10537 if (filename != NULL)
10538 input->filename = xmlMemStrdup(filename);
10539 input->base = buffer;
10540 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010541 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010542 inputPush(ctxt, input);
10543}
10544
10545/**
10546 * xmlSAXUserParseFile:
10547 * @sax: a SAX handler
10548 * @user_data: The user data returned on SAX callbacks
10549 * @filename: a file name
10550 *
10551 * parse an XML file and call the given SAX handler routines.
10552 * Automatic support for ZLIB/Compress compressed document is provided
10553 *
10554 * Returns 0 in case of success or a error number otherwise
10555 */
10556int
10557xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10558 const char *filename) {
10559 int ret = 0;
10560 xmlParserCtxtPtr ctxt;
10561
10562 ctxt = xmlCreateFileParserCtxt(filename);
10563 if (ctxt == NULL) return -1;
10564 if (ctxt->sax != &xmlDefaultSAXHandler)
10565 xmlFree(ctxt->sax);
10566 ctxt->sax = sax;
10567 if (user_data != NULL)
10568 ctxt->userData = user_data;
10569
10570 xmlParseDocument(ctxt);
10571
10572 if (ctxt->wellFormed)
10573 ret = 0;
10574 else {
10575 if (ctxt->errNo != 0)
10576 ret = ctxt->errNo;
10577 else
10578 ret = -1;
10579 }
10580 if (sax != NULL)
10581 ctxt->sax = NULL;
10582 xmlFreeParserCtxt(ctxt);
10583
10584 return ret;
10585}
10586
10587/************************************************************************
10588 * *
10589 * Front ends when parsing from memory *
10590 * *
10591 ************************************************************************/
10592
10593/**
10594 * xmlCreateMemoryParserCtxt:
10595 * @buffer: a pointer to a char array
10596 * @size: the size of the array
10597 *
10598 * Create a parser context for an XML in-memory document.
10599 *
10600 * Returns the new parser context or NULL
10601 */
10602xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010603xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010604 xmlParserCtxtPtr ctxt;
10605 xmlParserInputPtr input;
10606 xmlParserInputBufferPtr buf;
10607
10608 if (buffer == NULL)
10609 return(NULL);
10610 if (size <= 0)
10611 return(NULL);
10612
10613 ctxt = xmlNewParserCtxt();
10614 if (ctxt == NULL)
10615 return(NULL);
10616
10617 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010618 if (buf == NULL) {
10619 xmlFreeParserCtxt(ctxt);
10620 return(NULL);
10621 }
Owen Taylor3473f882001-02-23 17:55:21 +000010622
10623 input = xmlNewInputStream(ctxt);
10624 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010625 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010626 xmlFreeParserCtxt(ctxt);
10627 return(NULL);
10628 }
10629
10630 input->filename = NULL;
10631 input->buf = buf;
10632 input->base = input->buf->buffer->content;
10633 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010634 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010635
10636 inputPush(ctxt, input);
10637 return(ctxt);
10638}
10639
10640/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010641 * xmlSAXParseMemoryWithData:
10642 * @sax: the SAX handler block
10643 * @buffer: an pointer to a char array
10644 * @size: the size of the array
10645 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10646 * documents
10647 * @data: the userdata
10648 *
10649 * parse an XML in-memory block and use the given SAX function block
10650 * to handle the parsing callback. If sax is NULL, fallback to the default
10651 * DOM tree building routines.
10652 *
10653 * User data (void *) is stored within the parser context in the
10654 * context's _private member, so it is available nearly everywhere in libxml
10655 *
10656 * Returns the resulting document tree
10657 */
10658
10659xmlDocPtr
10660xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10661 int size, int recovery, void *data) {
10662 xmlDocPtr ret;
10663 xmlParserCtxtPtr ctxt;
10664
10665 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10666 if (ctxt == NULL) return(NULL);
10667 if (sax != NULL) {
10668 if (ctxt->sax != NULL)
10669 xmlFree(ctxt->sax);
10670 ctxt->sax = sax;
10671 }
10672 if (data!=NULL) {
10673 ctxt->_private=data;
10674 }
10675
10676 xmlParseDocument(ctxt);
10677
10678 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10679 else {
10680 ret = NULL;
10681 xmlFreeDoc(ctxt->myDoc);
10682 ctxt->myDoc = NULL;
10683 }
10684 if (sax != NULL)
10685 ctxt->sax = NULL;
10686 xmlFreeParserCtxt(ctxt);
10687
10688 return(ret);
10689}
10690
10691/**
Owen Taylor3473f882001-02-23 17:55:21 +000010692 * xmlSAXParseMemory:
10693 * @sax: the SAX handler block
10694 * @buffer: an pointer to a char array
10695 * @size: the size of the array
10696 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10697 * documents
10698 *
10699 * parse an XML in-memory block and use the given SAX function block
10700 * to handle the parsing callback. If sax is NULL, fallback to the default
10701 * DOM tree building routines.
10702 *
10703 * Returns the resulting document tree
10704 */
10705xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010706xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10707 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010708 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010709}
10710
10711/**
10712 * xmlParseMemory:
10713 * @buffer: an pointer to a char array
10714 * @size: the size of the array
10715 *
10716 * parse an XML in-memory block and build a tree.
10717 *
10718 * Returns the resulting document tree
10719 */
10720
Daniel Veillard50822cb2001-07-26 20:05:51 +000010721xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010722 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10723}
10724
10725/**
10726 * xmlRecoverMemory:
10727 * @buffer: an pointer to a char array
10728 * @size: the size of the array
10729 *
10730 * parse an XML in-memory block and build a tree.
10731 * In the case the document is not Well Formed, a tree is built anyway
10732 *
10733 * Returns the resulting document tree
10734 */
10735
Daniel Veillard50822cb2001-07-26 20:05:51 +000010736xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010737 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10738}
10739
10740/**
10741 * xmlSAXUserParseMemory:
10742 * @sax: a SAX handler
10743 * @user_data: The user data returned on SAX callbacks
10744 * @buffer: an in-memory XML document input
10745 * @size: the length of the XML document in bytes
10746 *
10747 * A better SAX parsing routine.
10748 * parse an XML in-memory buffer and call the given SAX handler routines.
10749 *
10750 * Returns 0 in case of success or a error number otherwise
10751 */
10752int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010753 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010754 int ret = 0;
10755 xmlParserCtxtPtr ctxt;
10756 xmlSAXHandlerPtr oldsax = NULL;
10757
Daniel Veillard9e923512002-08-14 08:48:52 +000010758 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010759 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10760 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010761 oldsax = ctxt->sax;
10762 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010763 if (user_data != NULL)
10764 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010765
10766 xmlParseDocument(ctxt);
10767
10768 if (ctxt->wellFormed)
10769 ret = 0;
10770 else {
10771 if (ctxt->errNo != 0)
10772 ret = ctxt->errNo;
10773 else
10774 ret = -1;
10775 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010776 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010777 xmlFreeParserCtxt(ctxt);
10778
10779 return ret;
10780}
10781
10782/**
10783 * xmlCreateDocParserCtxt:
10784 * @cur: a pointer to an array of xmlChar
10785 *
10786 * Creates a parser context for an XML in-memory document.
10787 *
10788 * Returns the new parser context or NULL
10789 */
10790xmlParserCtxtPtr
10791xmlCreateDocParserCtxt(xmlChar *cur) {
10792 int len;
10793
10794 if (cur == NULL)
10795 return(NULL);
10796 len = xmlStrlen(cur);
10797 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10798}
10799
10800/**
10801 * xmlSAXParseDoc:
10802 * @sax: the SAX handler block
10803 * @cur: a pointer to an array of xmlChar
10804 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10805 * documents
10806 *
10807 * parse an XML in-memory document and build a tree.
10808 * It use the given SAX function block to handle the parsing callback.
10809 * If sax is NULL, fallback to the default DOM tree building routines.
10810 *
10811 * Returns the resulting document tree
10812 */
10813
10814xmlDocPtr
10815xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10816 xmlDocPtr ret;
10817 xmlParserCtxtPtr ctxt;
10818
10819 if (cur == NULL) return(NULL);
10820
10821
10822 ctxt = xmlCreateDocParserCtxt(cur);
10823 if (ctxt == NULL) return(NULL);
10824 if (sax != NULL) {
10825 ctxt->sax = sax;
10826 ctxt->userData = NULL;
10827 }
10828
10829 xmlParseDocument(ctxt);
10830 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10831 else {
10832 ret = NULL;
10833 xmlFreeDoc(ctxt->myDoc);
10834 ctxt->myDoc = NULL;
10835 }
10836 if (sax != NULL)
10837 ctxt->sax = NULL;
10838 xmlFreeParserCtxt(ctxt);
10839
10840 return(ret);
10841}
10842
10843/**
10844 * xmlParseDoc:
10845 * @cur: a pointer to an array of xmlChar
10846 *
10847 * parse an XML in-memory document and build a tree.
10848 *
10849 * Returns the resulting document tree
10850 */
10851
10852xmlDocPtr
10853xmlParseDoc(xmlChar *cur) {
10854 return(xmlSAXParseDoc(NULL, cur, 0));
10855}
10856
Daniel Veillard8107a222002-01-13 14:10:10 +000010857/************************************************************************
10858 * *
10859 * Specific function to keep track of entities references *
10860 * and used by the XSLT debugger *
10861 * *
10862 ************************************************************************/
10863
10864static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10865
10866/**
10867 * xmlAddEntityReference:
10868 * @ent : A valid entity
10869 * @firstNode : A valid first node for children of entity
10870 * @lastNode : A valid last node of children entity
10871 *
10872 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10873 */
10874static void
10875xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10876 xmlNodePtr lastNode)
10877{
10878 if (xmlEntityRefFunc != NULL) {
10879 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10880 }
10881}
10882
10883
10884/**
10885 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000010886 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000010887 *
10888 * Set the function to call call back when a xml reference has been made
10889 */
10890void
10891xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10892{
10893 xmlEntityRefFunc = func;
10894}
Owen Taylor3473f882001-02-23 17:55:21 +000010895
10896/************************************************************************
10897 * *
10898 * Miscellaneous *
10899 * *
10900 ************************************************************************/
10901
10902#ifdef LIBXML_XPATH_ENABLED
10903#include <libxml/xpath.h>
10904#endif
10905
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010906extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010907static int xmlParserInitialized = 0;
10908
10909/**
10910 * xmlInitParser:
10911 *
10912 * Initialization function for the XML parser.
10913 * This is not reentrant. Call once before processing in case of
10914 * use in multithreaded programs.
10915 */
10916
10917void
10918xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010919 if (xmlParserInitialized != 0)
10920 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010921
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010922 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10923 (xmlGenericError == NULL))
10924 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010925 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010926 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010927 xmlInitCharEncodingHandlers();
10928 xmlInitializePredefinedEntities();
10929 xmlDefaultSAXHandlerInit();
10930 xmlRegisterDefaultInputCallbacks();
10931 xmlRegisterDefaultOutputCallbacks();
10932#ifdef LIBXML_HTML_ENABLED
10933 htmlInitAutoClose();
10934 htmlDefaultSAXHandlerInit();
10935#endif
10936#ifdef LIBXML_XPATH_ENABLED
10937 xmlXPathInit();
10938#endif
10939 xmlParserInitialized = 1;
10940}
10941
10942/**
10943 * xmlCleanupParser:
10944 *
10945 * Cleanup function for the XML parser. It tries to reclaim all
10946 * parsing related global memory allocated for the parser processing.
10947 * It doesn't deallocate any document related memory. Calling this
10948 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000010949 * One should call xmlCleanupParser() only when the process has
10950 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000010951 */
10952
10953void
10954xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010955 xmlCleanupCharEncodingHandlers();
10956 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010957#ifdef LIBXML_CATALOG_ENABLED
10958 xmlCatalogCleanup();
10959#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010960 xmlCleanupThreads();
10961 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010962}