blob: 83db22bfe85cbd37391cf21712e1faf390693bed [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard21a0f912001-02-25 19:54:14 +000088#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000089#define XML_PARSER_BUFFER_SIZE 100
90
Daniel Veillard5997aca2002-03-18 18:36:20 +000091#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
92
Owen Taylor3473f882001-02-23 17:55:21 +000093/*
Owen Taylor3473f882001-02-23 17:55:21 +000094 * List of XML prefixed PI allowed by W3C specs
95 */
96
Daniel Veillardb44025c2001-10-11 22:55:55 +000097static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000098 "xml-stylesheet",
99 NULL
100};
101
102/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000103xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
104 const xmlChar **str);
105
Daniel Veillard257d9102001-05-08 10:41:44 +0000106static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000107xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
108 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000109 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000111
Daniel Veillard8107a222002-01-13 14:10:10 +0000112static void
113xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
114 xmlNodePtr lastNode);
115
Daniel Veillard328f48c2002-11-15 15:24:34 +0000116static int
117xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
118 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000119/************************************************************************
120 * *
121 * Parser stacks related functions and macros *
122 * *
123 ************************************************************************/
124
125xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
126 const xmlChar ** str);
127
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128/**
129 * inputPush:
130 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000131 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000132 *
133 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000134 *
135 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000137extern int
138inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
139{
140 if (ctxt->inputNr >= ctxt->inputMax) {
141 ctxt->inputMax *= 2;
142 ctxt->inputTab =
143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
144 ctxt->inputMax *
145 sizeof(ctxt->inputTab[0]));
146 if (ctxt->inputTab == NULL) {
147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
148 return (0);
149 }
150 }
151 ctxt->inputTab[ctxt->inputNr] = value;
152 ctxt->input = value;
153 return (ctxt->inputNr++);
154}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000155/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000156 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000157 * @ctxt: an XML parser context
158 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000159 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000160 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000161 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000163extern xmlParserInputPtr
164inputPop(xmlParserCtxtPtr ctxt)
165{
166 xmlParserInputPtr ret;
167
168 if (ctxt->inputNr <= 0)
169 return (0);
170 ctxt->inputNr--;
171 if (ctxt->inputNr > 0)
172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
173 else
174 ctxt->input = NULL;
175 ret = ctxt->inputTab[ctxt->inputNr];
176 ctxt->inputTab[ctxt->inputNr] = 0;
177 return (ret);
178}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179/**
180 * nodePush:
181 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000182 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 *
184 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000185 *
186 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000187 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000188extern int
189nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
190{
191 if (ctxt->nodeNr >= ctxt->nodeMax) {
192 ctxt->nodeMax *= 2;
193 ctxt->nodeTab =
194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
195 ctxt->nodeMax *
196 sizeof(ctxt->nodeTab[0]));
197 if (ctxt->nodeTab == NULL) {
198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
199 return (0);
200 }
201 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202#ifdef MAX_DEPTH
203 if (ctxt->nodeNr > MAX_DEPTH) {
204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
205 ctxt->sax->error(ctxt->userData,
206 "Excessive depth in document: change MAX_DEPTH = %d\n",
207 MAX_DEPTH);
208 ctxt->wellFormed = 0;
209 ctxt->instate = XML_PARSER_EOF;
210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
211 return(0);
212 }
213#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000214 ctxt->nodeTab[ctxt->nodeNr] = value;
215 ctxt->node = value;
216 return (ctxt->nodeNr++);
217}
218/**
219 * nodePop:
220 * @ctxt: an XML parser context
221 *
222 * Pops the top element node from the node stack
223 *
224 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000226extern xmlNodePtr
227nodePop(xmlParserCtxtPtr ctxt)
228{
229 xmlNodePtr ret;
230
231 if (ctxt->nodeNr <= 0)
232 return (0);
233 ctxt->nodeNr--;
234 if (ctxt->nodeNr > 0)
235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
236 else
237 ctxt->node = NULL;
238 ret = ctxt->nodeTab[ctxt->nodeNr];
239 ctxt->nodeTab[ctxt->nodeNr] = 0;
240 return (ret);
241}
242/**
243 * namePush:
244 * @ctxt: an XML parser context
245 * @value: the element name
246 *
247 * Pushes a new element name on top of the name stack
248 *
249 * Returns 0 in case of error, the index in the stack otherwise
250 */
251extern int
252namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
253{
254 if (ctxt->nameNr >= ctxt->nameMax) {
255 ctxt->nameMax *= 2;
256 ctxt->nameTab =
257 (xmlChar * *)xmlRealloc(ctxt->nameTab,
258 ctxt->nameMax *
259 sizeof(ctxt->nameTab[0]));
260 if (ctxt->nameTab == NULL) {
261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
262 return (0);
263 }
264 }
265 ctxt->nameTab[ctxt->nameNr] = value;
266 ctxt->name = value;
267 return (ctxt->nameNr++);
268}
269/**
270 * namePop:
271 * @ctxt: an XML parser context
272 *
273 * Pops the top element name from the name stack
274 *
275 * Returns the name just removed
276 */
277extern xmlChar *
278namePop(xmlParserCtxtPtr ctxt)
279{
280 xmlChar *ret;
281
282 if (ctxt->nameNr <= 0)
283 return (0);
284 ctxt->nameNr--;
285 if (ctxt->nameNr > 0)
286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
287 else
288 ctxt->name = NULL;
289 ret = ctxt->nameTab[ctxt->nameNr];
290 ctxt->nameTab[ctxt->nameNr] = 0;
291 return (ret);
292}
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000294static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000295 if (ctxt->spaceNr >= ctxt->spaceMax) {
296 ctxt->spaceMax *= 2;
297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
299 if (ctxt->spaceTab == NULL) {
300 xmlGenericError(xmlGenericErrorContext,
301 "realloc failed !\n");
302 return(0);
303 }
304 }
305 ctxt->spaceTab[ctxt->spaceNr] = val;
306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
307 return(ctxt->spaceNr++);
308}
309
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000311 int ret;
312 if (ctxt->spaceNr <= 0) return(0);
313 ctxt->spaceNr--;
314 if (ctxt->spaceNr > 0)
315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
316 else
317 ctxt->space = NULL;
318 ret = ctxt->spaceTab[ctxt->spaceNr];
319 ctxt->spaceTab[ctxt->spaceNr] = -1;
320 return(ret);
321}
322
323/*
324 * Macros for accessing the content. Those should be used only by the parser,
325 * and not exported.
326 *
327 * Dirty macros, i.e. one often need to make assumption on the context to
328 * use them
329 *
330 * CUR_PTR return the current pointer to the xmlChar to be parsed.
331 * To be used with extreme caution since operations consuming
332 * characters may move the input buffer to a different location !
333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
334 * This should be used internally by the parser
335 * only to compare to ASCII values otherwise it would break when
336 * running with UTF-8 encoding.
337 * RAW same as CUR but in the input buffer, bypass any token
338 * extraction that may have been done
339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
340 * to compare on ASCII based substring.
341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
342 * strings within the parser.
343 *
344 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
345 *
346 * NEXT Skip to the next character, this does the proper decoding
347 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000348 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000349 * CUR_CHAR(l) returns the current unicode character (int), set l
350 * to the number of xmlChars used for the encoding [0-5].
351 * CUR_SCHAR same but operate on a string instead of the context
352 * COPY_BUF copy the current unicode char to the target buffer, increment
353 * the index
354 * GROW, SHRINK handling of input buffers
355 */
356
Daniel Veillardfdc91562002-07-01 21:52:03 +0000357#define RAW (*ctxt->input->cur)
358#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000359#define NXT(val) ctxt->input->cur[(val)]
360#define CUR_PTR ctxt->input->cur
361
362#define SKIP(val) do { \
363 ctxt->nbChars += (val),ctxt->input->cur += (val); \
364 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000365 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000366 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
367 xmlPopInput(ctxt); \
368 } while (0)
369
Daniel Veillard46de64e2002-05-29 08:21:33 +0000370#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
371 xmlSHRINK (ctxt);
372
373static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
374 xmlParserInputShrink(ctxt->input);
375 if ((*ctxt->input->cur == 0) &&
376 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
377 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000378 }
Owen Taylor3473f882001-02-23 17:55:21 +0000379
Daniel Veillard46de64e2002-05-29 08:21:33 +0000380#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
381 xmlGROW (ctxt);
382
383static void xmlGROW (xmlParserCtxtPtr ctxt) {
384 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
385 if ((*ctxt->input->cur == 0) &&
386 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
387 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000388 }
Owen Taylor3473f882001-02-23 17:55:21 +0000389
390#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
391
392#define NEXT xmlNextChar(ctxt)
393
Daniel Veillard21a0f912001-02-25 19:54:14 +0000394#define NEXT1 { \
395 ctxt->input->cur++; \
396 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000397 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000398 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
399 }
400
Owen Taylor3473f882001-02-23 17:55:21 +0000401#define NEXTL(l) do { \
402 if (*(ctxt->input->cur) == '\n') { \
403 ctxt->input->line++; ctxt->input->col = 1; \
404 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000405 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000406 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000407 } while (0)
408
409#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
410#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
411
412#define COPY_BUF(l,b,i,v) \
413 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000414 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000415
416/**
417 * xmlSkipBlankChars:
418 * @ctxt: the XML parser context
419 *
420 * skip all blanks character found at that point in the input streams.
421 * It pops up finished entities in the process if allowable at that point.
422 *
423 * Returns the number of space chars skipped
424 */
425
426int
427xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000428 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000429
430 /*
431 * It's Okay to use CUR/NEXT here since all the blanks are on
432 * the ASCII range.
433 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000434 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
435 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000436 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000437 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000438 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000439 cur = ctxt->input->cur;
440 while (IS_BLANK(*cur)) {
441 if (*cur == '\n') {
442 ctxt->input->line++; ctxt->input->col = 1;
443 }
444 cur++;
445 res++;
446 if (*cur == 0) {
447 ctxt->input->cur = cur;
448 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
449 cur = ctxt->input->cur;
450 }
451 }
452 ctxt->input->cur = cur;
453 } else {
454 int cur;
455 do {
456 cur = CUR;
457 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
458 NEXT;
459 cur = CUR;
460 res++;
461 }
462 while ((cur == 0) && (ctxt->inputNr > 1) &&
463 (ctxt->instate != XML_PARSER_COMMENT)) {
464 xmlPopInput(ctxt);
465 cur = CUR;
466 }
467 /*
468 * Need to handle support of entities branching here
469 */
470 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
471 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
472 }
Owen Taylor3473f882001-02-23 17:55:21 +0000473 return(res);
474}
475
476/************************************************************************
477 * *
478 * Commodity functions to handle entities *
479 * *
480 ************************************************************************/
481
482/**
483 * xmlPopInput:
484 * @ctxt: an XML parser context
485 *
486 * xmlPopInput: the current input pointed by ctxt->input came to an end
487 * pop it and return the next char.
488 *
489 * Returns the current xmlChar in the parser context
490 */
491xmlChar
492xmlPopInput(xmlParserCtxtPtr ctxt) {
493 if (ctxt->inputNr == 1) return(0); /* End of main Input */
494 if (xmlParserDebugEntities)
495 xmlGenericError(xmlGenericErrorContext,
496 "Popping input %d\n", ctxt->inputNr);
497 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000498 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000499 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
500 return(xmlPopInput(ctxt));
501 return(CUR);
502}
503
504/**
505 * xmlPushInput:
506 * @ctxt: an XML parser context
507 * @input: an XML parser input fragment (entity, XML fragment ...).
508 *
509 * xmlPushInput: switch to a new input stream which is stacked on top
510 * of the previous one(s).
511 */
512void
513xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
514 if (input == NULL) return;
515
516 if (xmlParserDebugEntities) {
517 if ((ctxt->input != NULL) && (ctxt->input->filename))
518 xmlGenericError(xmlGenericErrorContext,
519 "%s(%d): ", ctxt->input->filename,
520 ctxt->input->line);
521 xmlGenericError(xmlGenericErrorContext,
522 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
523 }
524 inputPush(ctxt, input);
525 GROW;
526}
527
528/**
529 * xmlParseCharRef:
530 * @ctxt: an XML parser context
531 *
532 * parse Reference declarations
533 *
534 * [66] CharRef ::= '&#' [0-9]+ ';' |
535 * '&#x' [0-9a-fA-F]+ ';'
536 *
537 * [ WFC: Legal Character ]
538 * Characters referred to using character references must match the
539 * production for Char.
540 *
541 * Returns the value parsed (as an int), 0 in case of error
542 */
543int
544xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000545 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000546 int count = 0;
547
Owen Taylor3473f882001-02-23 17:55:21 +0000548 /*
549 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
550 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000551 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000552 (NXT(2) == 'x')) {
553 SKIP(3);
554 GROW;
555 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000556 if (count++ > 20) {
557 count = 0;
558 GROW;
559 }
560 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000561 val = val * 16 + (CUR - '0');
562 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
563 val = val * 16 + (CUR - 'a') + 10;
564 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
565 val = val * 16 + (CUR - 'A') + 10;
566 else {
567 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
569 ctxt->sax->error(ctxt->userData,
570 "xmlParseCharRef: invalid hexadecimal value\n");
571 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000572 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000573 val = 0;
574 break;
575 }
576 NEXT;
577 count++;
578 }
579 if (RAW == ';') {
580 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
581 ctxt->nbChars ++;
582 ctxt->input->cur++;
583 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000584 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000585 SKIP(2);
586 GROW;
587 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000588 if (count++ > 20) {
589 count = 0;
590 GROW;
591 }
592 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000593 val = val * 10 + (CUR - '0');
594 else {
595 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
597 ctxt->sax->error(ctxt->userData,
598 "xmlParseCharRef: invalid decimal value\n");
599 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000600 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000601 val = 0;
602 break;
603 }
604 NEXT;
605 count++;
606 }
607 if (RAW == ';') {
608 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
609 ctxt->nbChars ++;
610 ctxt->input->cur++;
611 }
612 } else {
613 ctxt->errNo = XML_ERR_INVALID_CHARREF;
614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
615 ctxt->sax->error(ctxt->userData,
616 "xmlParseCharRef: invalid value\n");
617 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000618 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000619 }
620
621 /*
622 * [ WFC: Legal Character ]
623 * Characters referred to using character references must match the
624 * production for Char.
625 */
626 if (IS_CHAR(val)) {
627 return(val);
628 } else {
629 ctxt->errNo = XML_ERR_INVALID_CHAR;
630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000631 ctxt->sax->error(ctxt->userData,
632 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000633 val);
634 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000635 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000636 }
637 return(0);
638}
639
640/**
641 * xmlParseStringCharRef:
642 * @ctxt: an XML parser context
643 * @str: a pointer to an index in the string
644 *
645 * parse Reference declarations, variant parsing from a string rather
646 * than an an input flow.
647 *
648 * [66] CharRef ::= '&#' [0-9]+ ';' |
649 * '&#x' [0-9a-fA-F]+ ';'
650 *
651 * [ WFC: Legal Character ]
652 * Characters referred to using character references must match the
653 * production for Char.
654 *
655 * Returns the value parsed (as an int), 0 in case of error, str will be
656 * updated to the current value of the index
657 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000658static int
Owen Taylor3473f882001-02-23 17:55:21 +0000659xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
660 const xmlChar *ptr;
661 xmlChar cur;
662 int val = 0;
663
664 if ((str == NULL) || (*str == NULL)) return(0);
665 ptr = *str;
666 cur = *ptr;
667 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
668 ptr += 3;
669 cur = *ptr;
670 while (cur != ';') { /* Non input consuming loop */
671 if ((cur >= '0') && (cur <= '9'))
672 val = val * 16 + (cur - '0');
673 else if ((cur >= 'a') && (cur <= 'f'))
674 val = val * 16 + (cur - 'a') + 10;
675 else if ((cur >= 'A') && (cur <= 'F'))
676 val = val * 16 + (cur - 'A') + 10;
677 else {
678 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
680 ctxt->sax->error(ctxt->userData,
681 "xmlParseStringCharRef: invalid hexadecimal value\n");
682 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000683 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000684 val = 0;
685 break;
686 }
687 ptr++;
688 cur = *ptr;
689 }
690 if (cur == ';')
691 ptr++;
692 } else if ((cur == '&') && (ptr[1] == '#')){
693 ptr += 2;
694 cur = *ptr;
695 while (cur != ';') { /* Non input consuming loops */
696 if ((cur >= '0') && (cur <= '9'))
697 val = val * 10 + (cur - '0');
698 else {
699 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
701 ctxt->sax->error(ctxt->userData,
702 "xmlParseStringCharRef: invalid decimal value\n");
703 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000704 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000705 val = 0;
706 break;
707 }
708 ptr++;
709 cur = *ptr;
710 }
711 if (cur == ';')
712 ptr++;
713 } else {
714 ctxt->errNo = XML_ERR_INVALID_CHARREF;
715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
716 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000717 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000718 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000719 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000720 return(0);
721 }
722 *str = ptr;
723
724 /*
725 * [ WFC: Legal Character ]
726 * Characters referred to using character references must match the
727 * production for Char.
728 */
729 if (IS_CHAR(val)) {
730 return(val);
731 } else {
732 ctxt->errNo = XML_ERR_INVALID_CHAR;
733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
734 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000735 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000736 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000737 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000738 }
739 return(0);
740}
741
742/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000743 * xmlNewBlanksWrapperInputStream:
744 * @ctxt: an XML parser context
745 * @entity: an Entity pointer
746 *
747 * Create a new input stream for wrapping
748 * blanks around a PEReference
749 *
750 * Returns the new input stream or NULL
751 */
752
753static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
754
Daniel Veillardf4862f02002-09-10 11:13:43 +0000755static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000756xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
757 xmlParserInputPtr input;
758 xmlChar *buffer;
759 size_t length;
760 if (entity == NULL) {
761 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
763 ctxt->sax->error(ctxt->userData,
764 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
765 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
766 return(NULL);
767 }
768 if (xmlParserDebugEntities)
769 xmlGenericError(xmlGenericErrorContext,
770 "new blanks wrapper for entity: %s\n", entity->name);
771 input = xmlNewInputStream(ctxt);
772 if (input == NULL) {
773 return(NULL);
774 }
775 length = xmlStrlen(entity->name) + 5;
776 buffer = xmlMalloc(length);
777 if (buffer == NULL) {
778 return(NULL);
779 }
780 buffer [0] = ' ';
781 buffer [1] = '%';
782 buffer [length-3] = ';';
783 buffer [length-2] = ' ';
784 buffer [length-1] = 0;
785 memcpy(buffer + 2, entity->name, length - 5);
786 input->free = deallocblankswrapper;
787 input->base = buffer;
788 input->cur = buffer;
789 input->length = length;
790 input->end = &buffer[length];
791 return(input);
792}
793
794/**
Owen Taylor3473f882001-02-23 17:55:21 +0000795 * xmlParserHandlePEReference:
796 * @ctxt: the parser context
797 *
798 * [69] PEReference ::= '%' Name ';'
799 *
800 * [ WFC: No Recursion ]
801 * A parsed entity must not contain a recursive
802 * reference to itself, either directly or indirectly.
803 *
804 * [ WFC: Entity Declared ]
805 * In a document without any DTD, a document with only an internal DTD
806 * subset which contains no parameter entity references, or a document
807 * with "standalone='yes'", ... ... The declaration of a parameter
808 * entity must precede any reference to it...
809 *
810 * [ VC: Entity Declared ]
811 * In a document with an external subset or external parameter entities
812 * with "standalone='no'", ... ... The declaration of a parameter entity
813 * must precede any reference to it...
814 *
815 * [ WFC: In DTD ]
816 * Parameter-entity references may only appear in the DTD.
817 * NOTE: misleading but this is handled.
818 *
819 * A PEReference may have been detected in the current input stream
820 * the handling is done accordingly to
821 * http://www.w3.org/TR/REC-xml#entproc
822 * i.e.
823 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000824 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000825 */
826void
827xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
828 xmlChar *name;
829 xmlEntityPtr entity = NULL;
830 xmlParserInputPtr input;
831
Owen Taylor3473f882001-02-23 17:55:21 +0000832 if (RAW != '%') return;
833 switch(ctxt->instate) {
834 case XML_PARSER_CDATA_SECTION:
835 return;
836 case XML_PARSER_COMMENT:
837 return;
838 case XML_PARSER_START_TAG:
839 return;
840 case XML_PARSER_END_TAG:
841 return;
842 case XML_PARSER_EOF:
843 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
844 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
845 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
846 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000847 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000848 return;
849 case XML_PARSER_PROLOG:
850 case XML_PARSER_START:
851 case XML_PARSER_MISC:
852 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
854 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
855 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000856 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000857 return;
858 case XML_PARSER_ENTITY_DECL:
859 case XML_PARSER_CONTENT:
860 case XML_PARSER_ATTRIBUTE_VALUE:
861 case XML_PARSER_PI:
862 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000863 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000864 /* we just ignore it there */
865 return;
866 case XML_PARSER_EPILOG:
867 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
869 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
870 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000871 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000872 return;
873 case XML_PARSER_ENTITY_VALUE:
874 /*
875 * NOTE: in the case of entity values, we don't do the
876 * substitution here since we need the literal
877 * entity value to be able to save the internal
878 * subset of the document.
879 * This will be handled by xmlStringDecodeEntities
880 */
881 return;
882 case XML_PARSER_DTD:
883 /*
884 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
885 * In the internal DTD subset, parameter-entity references
886 * can occur only where markup declarations can occur, not
887 * within markup declarations.
888 * In that case this is handled in xmlParseMarkupDecl
889 */
890 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
891 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000892 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
893 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000894 break;
895 case XML_PARSER_IGNORE:
896 return;
897 }
898
899 NEXT;
900 name = xmlParseName(ctxt);
901 if (xmlParserDebugEntities)
902 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000903 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000904 if (name == NULL) {
905 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000907 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000908 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000909 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000910 } else {
911 if (RAW == ';') {
912 NEXT;
913 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
914 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
915 if (entity == NULL) {
916
917 /*
918 * [ WFC: Entity Declared ]
919 * In a document without any DTD, a document with only an
920 * internal DTD subset which contains no parameter entity
921 * references, or a document with "standalone='yes'", ...
922 * ... The declaration of a parameter entity must precede
923 * any reference to it...
924 */
925 if ((ctxt->standalone == 1) ||
926 ((ctxt->hasExternalSubset == 0) &&
927 (ctxt->hasPErefs == 0))) {
928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
929 ctxt->sax->error(ctxt->userData,
930 "PEReference: %%%s; not found\n", name);
931 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000932 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000933 } else {
934 /*
935 * [ VC: Entity Declared ]
936 * In a document with an external subset or external
937 * parameter entities with "standalone='no'", ...
938 * ... The declaration of a parameter entity must precede
939 * any reference to it...
940 */
941 if ((!ctxt->disableSAX) &&
942 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
943 ctxt->vctxt.error(ctxt->vctxt.userData,
944 "PEReference: %%%s; not found\n", name);
945 } else if ((!ctxt->disableSAX) &&
946 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
947 ctxt->sax->warning(ctxt->userData,
948 "PEReference: %%%s; not found\n", name);
949 ctxt->valid = 0;
950 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000951 } else if (ctxt->input->free != deallocblankswrapper) {
952 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
953 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000954 } else {
955 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
956 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000957 xmlChar start[4];
958 xmlCharEncoding enc;
959
Owen Taylor3473f882001-02-23 17:55:21 +0000960 /*
961 * handle the extra spaces added before and after
962 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000963 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000964 */
965 input = xmlNewEntityInputStream(ctxt, entity);
966 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000967
968 /*
969 * Get the 4 first bytes and decode the charset
970 * if enc != XML_CHAR_ENCODING_NONE
971 * plug some encoding conversion routines.
972 */
973 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000974 if (entity->length >= 4) {
975 start[0] = RAW;
976 start[1] = NXT(1);
977 start[2] = NXT(2);
978 start[3] = NXT(3);
979 enc = xmlDetectCharEncoding(start, 4);
980 if (enc != XML_CHAR_ENCODING_NONE) {
981 xmlSwitchEncoding(ctxt, enc);
982 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000983 }
984
Owen Taylor3473f882001-02-23 17:55:21 +0000985 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
986 (RAW == '<') && (NXT(1) == '?') &&
987 (NXT(2) == 'x') && (NXT(3) == 'm') &&
988 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
989 xmlParseTextDecl(ctxt);
990 }
Owen Taylor3473f882001-02-23 17:55:21 +0000991 } else {
992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
993 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000994 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000995 name);
996 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000997 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000998 }
999 }
1000 } else {
1001 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1003 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001004 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001005 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001006 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001007 }
1008 xmlFree(name);
1009 }
1010}
1011
1012/*
1013 * Macro used to grow the current buffer.
1014 */
1015#define growBuffer(buffer) { \
1016 buffer##_size *= 2; \
1017 buffer = (xmlChar *) \
1018 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1019 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001020 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001021 return(NULL); \
1022 } \
1023}
1024
1025/**
1026 * xmlStringDecodeEntities:
1027 * @ctxt: the parser context
1028 * @str: the input string
1029 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1030 * @end: an end marker xmlChar, 0 if none
1031 * @end2: an end marker xmlChar, 0 if none
1032 * @end3: an end marker xmlChar, 0 if none
1033 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001034 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001035 *
1036 * [67] Reference ::= EntityRef | CharRef
1037 *
1038 * [69] PEReference ::= '%' Name ';'
1039 *
1040 * Returns A newly allocated string with the substitution done. The caller
1041 * must deallocate it !
1042 */
1043xmlChar *
1044xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1045 xmlChar end, xmlChar end2, xmlChar end3) {
1046 xmlChar *buffer = NULL;
1047 int buffer_size = 0;
1048
1049 xmlChar *current = NULL;
1050 xmlEntityPtr ent;
1051 int c,l;
1052 int nbchars = 0;
1053
1054 if (str == NULL)
1055 return(NULL);
1056
1057 if (ctxt->depth > 40) {
1058 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1060 ctxt->sax->error(ctxt->userData,
1061 "Detected entity reference loop\n");
1062 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001063 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001064 return(NULL);
1065 }
1066
1067 /*
1068 * allocate a translation buffer.
1069 */
1070 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1071 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1072 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001073 xmlGenericError(xmlGenericErrorContext,
1074 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001075 return(NULL);
1076 }
1077
1078 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001079 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001080 * we are operating on already parsed values.
1081 */
1082 c = CUR_SCHAR(str, l);
1083 while ((c != 0) && (c != end) && /* non input consuming loop */
1084 (c != end2) && (c != end3)) {
1085
1086 if (c == 0) break;
1087 if ((c == '&') && (str[1] == '#')) {
1088 int val = xmlParseStringCharRef(ctxt, &str);
1089 if (val != 0) {
1090 COPY_BUF(0,buffer,nbchars,val);
1091 }
1092 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1093 if (xmlParserDebugEntities)
1094 xmlGenericError(xmlGenericErrorContext,
1095 "String decoding Entity Reference: %.30s\n",
1096 str);
1097 ent = xmlParseStringEntityRef(ctxt, &str);
1098 if ((ent != NULL) &&
1099 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1100 if (ent->content != NULL) {
1101 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1102 } else {
1103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1104 ctxt->sax->error(ctxt->userData,
1105 "internal error entity has no content\n");
1106 }
1107 } else if ((ent != NULL) && (ent->content != NULL)) {
1108 xmlChar *rep;
1109
1110 ctxt->depth++;
1111 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1112 0, 0, 0);
1113 ctxt->depth--;
1114 if (rep != NULL) {
1115 current = rep;
1116 while (*current != 0) { /* non input consuming loop */
1117 buffer[nbchars++] = *current++;
1118 if (nbchars >
1119 buffer_size - XML_PARSER_BUFFER_SIZE) {
1120 growBuffer(buffer);
1121 }
1122 }
1123 xmlFree(rep);
1124 }
1125 } else if (ent != NULL) {
1126 int i = xmlStrlen(ent->name);
1127 const xmlChar *cur = ent->name;
1128
1129 buffer[nbchars++] = '&';
1130 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1131 growBuffer(buffer);
1132 }
1133 for (;i > 0;i--)
1134 buffer[nbchars++] = *cur++;
1135 buffer[nbchars++] = ';';
1136 }
1137 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1138 if (xmlParserDebugEntities)
1139 xmlGenericError(xmlGenericErrorContext,
1140 "String decoding PE Reference: %.30s\n", str);
1141 ent = xmlParseStringPEReference(ctxt, &str);
1142 if (ent != NULL) {
1143 xmlChar *rep;
1144
1145 ctxt->depth++;
1146 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1147 0, 0, 0);
1148 ctxt->depth--;
1149 if (rep != NULL) {
1150 current = rep;
1151 while (*current != 0) { /* non input consuming loop */
1152 buffer[nbchars++] = *current++;
1153 if (nbchars >
1154 buffer_size - XML_PARSER_BUFFER_SIZE) {
1155 growBuffer(buffer);
1156 }
1157 }
1158 xmlFree(rep);
1159 }
1160 }
1161 } else {
1162 COPY_BUF(l,buffer,nbchars,c);
1163 str += l;
1164 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1165 growBuffer(buffer);
1166 }
1167 }
1168 c = CUR_SCHAR(str, l);
1169 }
1170 buffer[nbchars++] = 0;
1171 return(buffer);
1172}
1173
1174
1175/************************************************************************
1176 * *
1177 * Commodity functions to handle xmlChars *
1178 * *
1179 ************************************************************************/
1180
1181/**
1182 * xmlStrndup:
1183 * @cur: the input xmlChar *
1184 * @len: the len of @cur
1185 *
1186 * a strndup for array of xmlChar's
1187 *
1188 * Returns a new xmlChar * or NULL
1189 */
1190xmlChar *
1191xmlStrndup(const xmlChar *cur, int len) {
1192 xmlChar *ret;
1193
1194 if ((cur == NULL) || (len < 0)) return(NULL);
1195 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1196 if (ret == NULL) {
1197 xmlGenericError(xmlGenericErrorContext,
1198 "malloc of %ld byte failed\n",
1199 (len + 1) * (long)sizeof(xmlChar));
1200 return(NULL);
1201 }
1202 memcpy(ret, cur, len * sizeof(xmlChar));
1203 ret[len] = 0;
1204 return(ret);
1205}
1206
1207/**
1208 * xmlStrdup:
1209 * @cur: the input xmlChar *
1210 *
1211 * a strdup for array of xmlChar's. Since they are supposed to be
1212 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1213 * a termination mark of '0'.
1214 *
1215 * Returns a new xmlChar * or NULL
1216 */
1217xmlChar *
1218xmlStrdup(const xmlChar *cur) {
1219 const xmlChar *p = cur;
1220
1221 if (cur == NULL) return(NULL);
1222 while (*p != 0) p++; /* non input consuming */
1223 return(xmlStrndup(cur, p - cur));
1224}
1225
1226/**
1227 * xmlCharStrndup:
1228 * @cur: the input char *
1229 * @len: the len of @cur
1230 *
1231 * a strndup for char's to xmlChar's
1232 *
1233 * Returns a new xmlChar * or NULL
1234 */
1235
1236xmlChar *
1237xmlCharStrndup(const char *cur, int len) {
1238 int i;
1239 xmlChar *ret;
1240
1241 if ((cur == NULL) || (len < 0)) return(NULL);
1242 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1243 if (ret == NULL) {
1244 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1245 (len + 1) * (long)sizeof(xmlChar));
1246 return(NULL);
1247 }
1248 for (i = 0;i < len;i++)
1249 ret[i] = (xmlChar) cur[i];
1250 ret[len] = 0;
1251 return(ret);
1252}
1253
1254/**
1255 * xmlCharStrdup:
1256 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001257 *
1258 * a strdup for char's to xmlChar's
1259 *
1260 * Returns a new xmlChar * or NULL
1261 */
1262
1263xmlChar *
1264xmlCharStrdup(const char *cur) {
1265 const char *p = cur;
1266
1267 if (cur == NULL) return(NULL);
1268 while (*p != '\0') p++; /* non input consuming */
1269 return(xmlCharStrndup(cur, p - cur));
1270}
1271
1272/**
1273 * xmlStrcmp:
1274 * @str1: the first xmlChar *
1275 * @str2: the second xmlChar *
1276 *
1277 * a strcmp for xmlChar's
1278 *
1279 * Returns the integer result of the comparison
1280 */
1281
1282int
1283xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1284 register int tmp;
1285
1286 if (str1 == str2) return(0);
1287 if (str1 == NULL) return(-1);
1288 if (str2 == NULL) return(1);
1289 do {
1290 tmp = *str1++ - *str2;
1291 if (tmp != 0) return(tmp);
1292 } while (*str2++ != 0);
1293 return 0;
1294}
1295
1296/**
1297 * xmlStrEqual:
1298 * @str1: the first xmlChar *
1299 * @str2: the second xmlChar *
1300 *
1301 * Check if both string are equal of have same content
1302 * Should be a bit more readable and faster than xmlStrEqual()
1303 *
1304 * Returns 1 if they are equal, 0 if they are different
1305 */
1306
1307int
1308xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1309 if (str1 == str2) return(1);
1310 if (str1 == NULL) return(0);
1311 if (str2 == NULL) return(0);
1312 do {
1313 if (*str1++ != *str2) return(0);
1314 } while (*str2++);
1315 return(1);
1316}
1317
1318/**
1319 * xmlStrncmp:
1320 * @str1: the first xmlChar *
1321 * @str2: the second xmlChar *
1322 * @len: the max comparison length
1323 *
1324 * a strncmp for xmlChar's
1325 *
1326 * Returns the integer result of the comparison
1327 */
1328
1329int
1330xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1331 register int tmp;
1332
1333 if (len <= 0) return(0);
1334 if (str1 == str2) return(0);
1335 if (str1 == NULL) return(-1);
1336 if (str2 == NULL) return(1);
1337 do {
1338 tmp = *str1++ - *str2;
1339 if (tmp != 0 || --len == 0) return(tmp);
1340 } while (*str2++ != 0);
1341 return 0;
1342}
1343
Daniel Veillardb44025c2001-10-11 22:55:55 +00001344static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001345 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1346 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1347 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1348 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1349 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1350 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1351 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1352 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1353 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1354 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1355 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1356 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1357 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1358 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1359 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1360 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1361 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1362 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1363 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1364 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1365 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1366 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1367 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1368 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1369 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1370 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1371 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1372 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1373 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1374 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1375 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1376 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1377};
1378
1379/**
1380 * xmlStrcasecmp:
1381 * @str1: the first xmlChar *
1382 * @str2: the second xmlChar *
1383 *
1384 * a strcasecmp for xmlChar's
1385 *
1386 * Returns the integer result of the comparison
1387 */
1388
1389int
1390xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1391 register int tmp;
1392
1393 if (str1 == str2) return(0);
1394 if (str1 == NULL) return(-1);
1395 if (str2 == NULL) return(1);
1396 do {
1397 tmp = casemap[*str1++] - casemap[*str2];
1398 if (tmp != 0) return(tmp);
1399 } while (*str2++ != 0);
1400 return 0;
1401}
1402
1403/**
1404 * xmlStrncasecmp:
1405 * @str1: the first xmlChar *
1406 * @str2: the second xmlChar *
1407 * @len: the max comparison length
1408 *
1409 * a strncasecmp for xmlChar's
1410 *
1411 * Returns the integer result of the comparison
1412 */
1413
1414int
1415xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1416 register int tmp;
1417
1418 if (len <= 0) return(0);
1419 if (str1 == str2) return(0);
1420 if (str1 == NULL) return(-1);
1421 if (str2 == NULL) return(1);
1422 do {
1423 tmp = casemap[*str1++] - casemap[*str2];
1424 if (tmp != 0 || --len == 0) return(tmp);
1425 } while (*str2++ != 0);
1426 return 0;
1427}
1428
1429/**
1430 * xmlStrchr:
1431 * @str: the xmlChar * array
1432 * @val: the xmlChar to search
1433 *
1434 * a strchr for xmlChar's
1435 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001436 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001437 */
1438
1439const xmlChar *
1440xmlStrchr(const xmlChar *str, xmlChar val) {
1441 if (str == NULL) return(NULL);
1442 while (*str != 0) { /* non input consuming */
1443 if (*str == val) return((xmlChar *) str);
1444 str++;
1445 }
1446 return(NULL);
1447}
1448
1449/**
1450 * xmlStrstr:
1451 * @str: the xmlChar * array (haystack)
1452 * @val: the xmlChar to search (needle)
1453 *
1454 * a strstr for xmlChar's
1455 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001456 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001457 */
1458
1459const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001460xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001461 int n;
1462
1463 if (str == NULL) return(NULL);
1464 if (val == NULL) return(NULL);
1465 n = xmlStrlen(val);
1466
1467 if (n == 0) return(str);
1468 while (*str != 0) { /* non input consuming */
1469 if (*str == *val) {
1470 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1471 }
1472 str++;
1473 }
1474 return(NULL);
1475}
1476
1477/**
1478 * xmlStrcasestr:
1479 * @str: the xmlChar * array (haystack)
1480 * @val: the xmlChar to search (needle)
1481 *
1482 * a case-ignoring strstr for xmlChar's
1483 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001484 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001485 */
1486
1487const xmlChar *
1488xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1489 int n;
1490
1491 if (str == NULL) return(NULL);
1492 if (val == NULL) return(NULL);
1493 n = xmlStrlen(val);
1494
1495 if (n == 0) return(str);
1496 while (*str != 0) { /* non input consuming */
1497 if (casemap[*str] == casemap[*val])
1498 if (!xmlStrncasecmp(str, val, n)) return(str);
1499 str++;
1500 }
1501 return(NULL);
1502}
1503
1504/**
1505 * xmlStrsub:
1506 * @str: the xmlChar * array (haystack)
1507 * @start: the index of the first char (zero based)
1508 * @len: the length of the substring
1509 *
1510 * Extract a substring of a given string
1511 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001512 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001513 */
1514
1515xmlChar *
1516xmlStrsub(const xmlChar *str, int start, int len) {
1517 int i;
1518
1519 if (str == NULL) return(NULL);
1520 if (start < 0) return(NULL);
1521 if (len < 0) return(NULL);
1522
1523 for (i = 0;i < start;i++) {
1524 if (*str == 0) return(NULL);
1525 str++;
1526 }
1527 if (*str == 0) return(NULL);
1528 return(xmlStrndup(str, len));
1529}
1530
1531/**
1532 * xmlStrlen:
1533 * @str: the xmlChar * array
1534 *
1535 * length of a xmlChar's string
1536 *
1537 * Returns the number of xmlChar contained in the ARRAY.
1538 */
1539
1540int
1541xmlStrlen(const xmlChar *str) {
1542 int len = 0;
1543
1544 if (str == NULL) return(0);
1545 while (*str != 0) { /* non input consuming */
1546 str++;
1547 len++;
1548 }
1549 return(len);
1550}
1551
1552/**
1553 * xmlStrncat:
1554 * @cur: the original xmlChar * array
1555 * @add: the xmlChar * array added
1556 * @len: the length of @add
1557 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001558 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001559 * first bytes of @add.
1560 *
1561 * Returns a new xmlChar *, the original @cur is reallocated if needed
1562 * and should not be freed
1563 */
1564
1565xmlChar *
1566xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1567 int size;
1568 xmlChar *ret;
1569
1570 if ((add == NULL) || (len == 0))
1571 return(cur);
1572 if (cur == NULL)
1573 return(xmlStrndup(add, len));
1574
1575 size = xmlStrlen(cur);
1576 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1577 if (ret == NULL) {
1578 xmlGenericError(xmlGenericErrorContext,
1579 "xmlStrncat: realloc of %ld byte failed\n",
1580 (size + len + 1) * (long)sizeof(xmlChar));
1581 return(cur);
1582 }
1583 memcpy(&ret[size], add, len * sizeof(xmlChar));
1584 ret[size + len] = 0;
1585 return(ret);
1586}
1587
1588/**
1589 * xmlStrcat:
1590 * @cur: the original xmlChar * array
1591 * @add: the xmlChar * array added
1592 *
1593 * a strcat for array of xmlChar's. Since they are supposed to be
1594 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1595 * a termination mark of '0'.
1596 *
1597 * Returns a new xmlChar * containing the concatenated string.
1598 */
1599xmlChar *
1600xmlStrcat(xmlChar *cur, const xmlChar *add) {
1601 const xmlChar *p = add;
1602
1603 if (add == NULL) return(cur);
1604 if (cur == NULL)
1605 return(xmlStrdup(add));
1606
1607 while (*p != 0) p++; /* non input consuming */
1608 return(xmlStrncat(cur, add, p - add));
1609}
1610
1611/************************************************************************
1612 * *
1613 * Commodity functions, cleanup needed ? *
1614 * *
1615 ************************************************************************/
1616
1617/**
1618 * areBlanks:
1619 * @ctxt: an XML parser context
1620 * @str: a xmlChar *
1621 * @len: the size of @str
1622 *
1623 * Is this a sequence of blank chars that one can ignore ?
1624 *
1625 * Returns 1 if ignorable 0 otherwise.
1626 */
1627
1628static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1629 int i, ret;
1630 xmlNodePtr lastChild;
1631
Daniel Veillard05c13a22001-09-09 08:38:09 +00001632 /*
1633 * Don't spend time trying to differentiate them, the same callback is
1634 * used !
1635 */
1636 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001637 return(0);
1638
Owen Taylor3473f882001-02-23 17:55:21 +00001639 /*
1640 * Check for xml:space value.
1641 */
1642 if (*(ctxt->space) == 1)
1643 return(0);
1644
1645 /*
1646 * Check that the string is made of blanks
1647 */
1648 for (i = 0;i < len;i++)
1649 if (!(IS_BLANK(str[i]))) return(0);
1650
1651 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001652 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001653 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001654 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001655 if (ctxt->myDoc != NULL) {
1656 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1657 if (ret == 0) return(1);
1658 if (ret == 1) return(0);
1659 }
1660
1661 /*
1662 * Otherwise, heuristic :-\
1663 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001664 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001665 if ((ctxt->node->children == NULL) &&
1666 (RAW == '<') && (NXT(1) == '/')) return(0);
1667
1668 lastChild = xmlGetLastChild(ctxt->node);
1669 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001670 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1671 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001672 } else if (xmlNodeIsText(lastChild))
1673 return(0);
1674 else if ((ctxt->node->children != NULL) &&
1675 (xmlNodeIsText(ctxt->node->children)))
1676 return(0);
1677 return(1);
1678}
1679
Owen Taylor3473f882001-02-23 17:55:21 +00001680/************************************************************************
1681 * *
1682 * Extra stuff for namespace support *
1683 * Relates to http://www.w3.org/TR/WD-xml-names *
1684 * *
1685 ************************************************************************/
1686
1687/**
1688 * xmlSplitQName:
1689 * @ctxt: an XML parser context
1690 * @name: an XML parser context
1691 * @prefix: a xmlChar **
1692 *
1693 * parse an UTF8 encoded XML qualified name string
1694 *
1695 * [NS 5] QName ::= (Prefix ':')? LocalPart
1696 *
1697 * [NS 6] Prefix ::= NCName
1698 *
1699 * [NS 7] LocalPart ::= NCName
1700 *
1701 * Returns the local part, and prefix is updated
1702 * to get the Prefix if any.
1703 */
1704
1705xmlChar *
1706xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1707 xmlChar buf[XML_MAX_NAMELEN + 5];
1708 xmlChar *buffer = NULL;
1709 int len = 0;
1710 int max = XML_MAX_NAMELEN;
1711 xmlChar *ret = NULL;
1712 const xmlChar *cur = name;
1713 int c;
1714
1715 *prefix = NULL;
1716
1717#ifndef XML_XML_NAMESPACE
1718 /* xml: prefix is not really a namespace */
1719 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1720 (cur[2] == 'l') && (cur[3] == ':'))
1721 return(xmlStrdup(name));
1722#endif
1723
1724 /* nasty but valid */
1725 if (cur[0] == ':')
1726 return(xmlStrdup(name));
1727
1728 c = *cur++;
1729 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1730 buf[len++] = c;
1731 c = *cur++;
1732 }
1733 if (len >= max) {
1734 /*
1735 * Okay someone managed to make a huge name, so he's ready to pay
1736 * for the processing speed.
1737 */
1738 max = len * 2;
1739
1740 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1741 if (buffer == NULL) {
1742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1743 ctxt->sax->error(ctxt->userData,
1744 "xmlSplitQName: out of memory\n");
1745 return(NULL);
1746 }
1747 memcpy(buffer, buf, len);
1748 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1749 if (len + 10 > max) {
1750 max *= 2;
1751 buffer = (xmlChar *) xmlRealloc(buffer,
1752 max * sizeof(xmlChar));
1753 if (buffer == NULL) {
1754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1755 ctxt->sax->error(ctxt->userData,
1756 "xmlSplitQName: out of memory\n");
1757 return(NULL);
1758 }
1759 }
1760 buffer[len++] = c;
1761 c = *cur++;
1762 }
1763 buffer[len] = 0;
1764 }
1765
1766 if (buffer == NULL)
1767 ret = xmlStrndup(buf, len);
1768 else {
1769 ret = buffer;
1770 buffer = NULL;
1771 max = XML_MAX_NAMELEN;
1772 }
1773
1774
1775 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001776 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001777 if (c == 0) return(ret);
1778 *prefix = ret;
1779 len = 0;
1780
Daniel Veillardbb284f42002-10-16 18:02:47 +00001781 /*
1782 * Check that the first character is proper to start
1783 * a new name
1784 */
1785 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1786 ((c >= 0x41) && (c <= 0x5A)) ||
1787 (c == '_') || (c == ':'))) {
1788 int l;
1789 int first = CUR_SCHAR(cur, l);
1790
1791 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001792 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1793 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001794 ctxt->sax->error(ctxt->userData,
1795 "Name %s is not XML Namespace compliant\n",
1796 name);
1797 }
1798 }
1799 cur++;
1800
Owen Taylor3473f882001-02-23 17:55:21 +00001801 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1802 buf[len++] = c;
1803 c = *cur++;
1804 }
1805 if (len >= max) {
1806 /*
1807 * Okay someone managed to make a huge name, so he's ready to pay
1808 * for the processing speed.
1809 */
1810 max = len * 2;
1811
1812 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1813 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001814 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1815 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001816 ctxt->sax->error(ctxt->userData,
1817 "xmlSplitQName: out of memory\n");
1818 return(NULL);
1819 }
1820 memcpy(buffer, buf, len);
1821 while (c != 0) { /* tested bigname2.xml */
1822 if (len + 10 > max) {
1823 max *= 2;
1824 buffer = (xmlChar *) xmlRealloc(buffer,
1825 max * sizeof(xmlChar));
1826 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001827 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1828 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001829 ctxt->sax->error(ctxt->userData,
1830 "xmlSplitQName: out of memory\n");
1831 return(NULL);
1832 }
1833 }
1834 buffer[len++] = c;
1835 c = *cur++;
1836 }
1837 buffer[len] = 0;
1838 }
1839
1840 if (buffer == NULL)
1841 ret = xmlStrndup(buf, len);
1842 else {
1843 ret = buffer;
1844 }
1845 }
1846
1847 return(ret);
1848}
1849
1850/************************************************************************
1851 * *
1852 * The parser itself *
1853 * Relates to http://www.w3.org/TR/REC-xml *
1854 * *
1855 ************************************************************************/
1856
Daniel Veillard76d66f42001-05-16 21:05:17 +00001857static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001858/**
1859 * xmlParseName:
1860 * @ctxt: an XML parser context
1861 *
1862 * parse an XML name.
1863 *
1864 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1865 * CombiningChar | Extender
1866 *
1867 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1868 *
1869 * [6] Names ::= Name (S Name)*
1870 *
1871 * Returns the Name parsed or NULL
1872 */
1873
1874xmlChar *
1875xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001876 const xmlChar *in;
1877 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001878 int count = 0;
1879
1880 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001881
1882 /*
1883 * Accelerator for simple ASCII names
1884 */
1885 in = ctxt->input->cur;
1886 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1887 ((*in >= 0x41) && (*in <= 0x5A)) ||
1888 (*in == '_') || (*in == ':')) {
1889 in++;
1890 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1891 ((*in >= 0x41) && (*in <= 0x5A)) ||
1892 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001893 (*in == '_') || (*in == '-') ||
1894 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001895 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001896 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001897 count = in - ctxt->input->cur;
1898 ret = xmlStrndup(ctxt->input->cur, count);
1899 ctxt->input->cur = in;
1900 return(ret);
1901 }
1902 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001903 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001904}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001905
Daniel Veillard46de64e2002-05-29 08:21:33 +00001906/**
1907 * xmlParseNameAndCompare:
1908 * @ctxt: an XML parser context
1909 *
1910 * parse an XML name and compares for match
1911 * (specialized for endtag parsing)
1912 *
1913 *
1914 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1915 * and the name for mismatch
1916 */
1917
Daniel Veillardf4862f02002-09-10 11:13:43 +00001918static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001919xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1920 const xmlChar *cmp = other;
1921 const xmlChar *in;
1922 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001923
1924 GROW;
1925
1926 in = ctxt->input->cur;
1927 while (*in != 0 && *in == *cmp) {
1928 ++in;
1929 ++cmp;
1930 }
1931 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1932 /* success */
1933 ctxt->input->cur = in;
1934 return (xmlChar*) 1;
1935 }
1936 /* failure (or end of input buffer), check with full function */
1937 ret = xmlParseName (ctxt);
1938 if (ret != 0 && xmlStrEqual (ret, other)) {
1939 xmlFree (ret);
1940 return (xmlChar*) 1;
1941 }
1942 return ret;
1943}
1944
Daniel Veillard76d66f42001-05-16 21:05:17 +00001945static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001946xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1947 xmlChar buf[XML_MAX_NAMELEN + 5];
1948 int len = 0, l;
1949 int c;
1950 int count = 0;
1951
1952 /*
1953 * Handler for more complex cases
1954 */
1955 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001956 c = CUR_CHAR(l);
1957 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1958 (!IS_LETTER(c) && (c != '_') &&
1959 (c != ':'))) {
1960 return(NULL);
1961 }
1962
1963 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1964 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1965 (c == '.') || (c == '-') ||
1966 (c == '_') || (c == ':') ||
1967 (IS_COMBINING(c)) ||
1968 (IS_EXTENDER(c)))) {
1969 if (count++ > 100) {
1970 count = 0;
1971 GROW;
1972 }
1973 COPY_BUF(l,buf,len,c);
1974 NEXTL(l);
1975 c = CUR_CHAR(l);
1976 if (len >= XML_MAX_NAMELEN) {
1977 /*
1978 * Okay someone managed to make a huge name, so he's ready to pay
1979 * for the processing speed.
1980 */
1981 xmlChar *buffer;
1982 int max = len * 2;
1983
1984 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1985 if (buffer == NULL) {
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001988 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001989 return(NULL);
1990 }
1991 memcpy(buffer, buf, len);
1992 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1993 (c == '.') || (c == '-') ||
1994 (c == '_') || (c == ':') ||
1995 (IS_COMBINING(c)) ||
1996 (IS_EXTENDER(c))) {
1997 if (count++ > 100) {
1998 count = 0;
1999 GROW;
2000 }
2001 if (len + 10 > max) {
2002 max *= 2;
2003 buffer = (xmlChar *) xmlRealloc(buffer,
2004 max * sizeof(xmlChar));
2005 if (buffer == NULL) {
2006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2007 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00002008 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002009 return(NULL);
2010 }
2011 }
2012 COPY_BUF(l,buffer,len,c);
2013 NEXTL(l);
2014 c = CUR_CHAR(l);
2015 }
2016 buffer[len] = 0;
2017 return(buffer);
2018 }
2019 }
2020 return(xmlStrndup(buf, len));
2021}
2022
2023/**
2024 * xmlParseStringName:
2025 * @ctxt: an XML parser context
2026 * @str: a pointer to the string pointer (IN/OUT)
2027 *
2028 * parse an XML name.
2029 *
2030 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2031 * CombiningChar | Extender
2032 *
2033 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2034 *
2035 * [6] Names ::= Name (S Name)*
2036 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002037 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002038 * is updated to the current location in the string.
2039 */
2040
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002041static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002042xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2043 xmlChar buf[XML_MAX_NAMELEN + 5];
2044 const xmlChar *cur = *str;
2045 int len = 0, l;
2046 int c;
2047
2048 c = CUR_SCHAR(cur, l);
2049 if (!IS_LETTER(c) && (c != '_') &&
2050 (c != ':')) {
2051 return(NULL);
2052 }
2053
2054 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2055 (c == '.') || (c == '-') ||
2056 (c == '_') || (c == ':') ||
2057 (IS_COMBINING(c)) ||
2058 (IS_EXTENDER(c))) {
2059 COPY_BUF(l,buf,len,c);
2060 cur += l;
2061 c = CUR_SCHAR(cur, l);
2062 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2063 /*
2064 * Okay someone managed to make a huge name, so he's ready to pay
2065 * for the processing speed.
2066 */
2067 xmlChar *buffer;
2068 int max = len * 2;
2069
2070 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2071 if (buffer == NULL) {
2072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2073 ctxt->sax->error(ctxt->userData,
2074 "xmlParseStringName: out of memory\n");
2075 return(NULL);
2076 }
2077 memcpy(buffer, buf, len);
2078 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2079 (c == '.') || (c == '-') ||
2080 (c == '_') || (c == ':') ||
2081 (IS_COMBINING(c)) ||
2082 (IS_EXTENDER(c))) {
2083 if (len + 10 > max) {
2084 max *= 2;
2085 buffer = (xmlChar *) xmlRealloc(buffer,
2086 max * sizeof(xmlChar));
2087 if (buffer == NULL) {
2088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2089 ctxt->sax->error(ctxt->userData,
2090 "xmlParseStringName: out of memory\n");
2091 return(NULL);
2092 }
2093 }
2094 COPY_BUF(l,buffer,len,c);
2095 cur += l;
2096 c = CUR_SCHAR(cur, l);
2097 }
2098 buffer[len] = 0;
2099 *str = cur;
2100 return(buffer);
2101 }
2102 }
2103 *str = cur;
2104 return(xmlStrndup(buf, len));
2105}
2106
2107/**
2108 * xmlParseNmtoken:
2109 * @ctxt: an XML parser context
2110 *
2111 * parse an XML Nmtoken.
2112 *
2113 * [7] Nmtoken ::= (NameChar)+
2114 *
2115 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2116 *
2117 * Returns the Nmtoken parsed or NULL
2118 */
2119
2120xmlChar *
2121xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2122 xmlChar buf[XML_MAX_NAMELEN + 5];
2123 int len = 0, l;
2124 int c;
2125 int count = 0;
2126
2127 GROW;
2128 c = CUR_CHAR(l);
2129
2130 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2131 (c == '.') || (c == '-') ||
2132 (c == '_') || (c == ':') ||
2133 (IS_COMBINING(c)) ||
2134 (IS_EXTENDER(c))) {
2135 if (count++ > 100) {
2136 count = 0;
2137 GROW;
2138 }
2139 COPY_BUF(l,buf,len,c);
2140 NEXTL(l);
2141 c = CUR_CHAR(l);
2142 if (len >= XML_MAX_NAMELEN) {
2143 /*
2144 * Okay someone managed to make a huge token, so he's ready to pay
2145 * for the processing speed.
2146 */
2147 xmlChar *buffer;
2148 int max = len * 2;
2149
2150 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2151 if (buffer == NULL) {
2152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2153 ctxt->sax->error(ctxt->userData,
2154 "xmlParseNmtoken: out of memory\n");
2155 return(NULL);
2156 }
2157 memcpy(buffer, buf, len);
2158 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2159 (c == '.') || (c == '-') ||
2160 (c == '_') || (c == ':') ||
2161 (IS_COMBINING(c)) ||
2162 (IS_EXTENDER(c))) {
2163 if (count++ > 100) {
2164 count = 0;
2165 GROW;
2166 }
2167 if (len + 10 > max) {
2168 max *= 2;
2169 buffer = (xmlChar *) xmlRealloc(buffer,
2170 max * sizeof(xmlChar));
2171 if (buffer == NULL) {
2172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2173 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002174 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002175 return(NULL);
2176 }
2177 }
2178 COPY_BUF(l,buffer,len,c);
2179 NEXTL(l);
2180 c = CUR_CHAR(l);
2181 }
2182 buffer[len] = 0;
2183 return(buffer);
2184 }
2185 }
2186 if (len == 0)
2187 return(NULL);
2188 return(xmlStrndup(buf, len));
2189}
2190
2191/**
2192 * xmlParseEntityValue:
2193 * @ctxt: an XML parser context
2194 * @orig: if non-NULL store a copy of the original entity value
2195 *
2196 * parse a value for ENTITY declarations
2197 *
2198 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2199 * "'" ([^%&'] | PEReference | Reference)* "'"
2200 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002201 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002202 */
2203
2204xmlChar *
2205xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2206 xmlChar *buf = NULL;
2207 int len = 0;
2208 int size = XML_PARSER_BUFFER_SIZE;
2209 int c, l;
2210 xmlChar stop;
2211 xmlChar *ret = NULL;
2212 const xmlChar *cur = NULL;
2213 xmlParserInputPtr input;
2214
2215 if (RAW == '"') stop = '"';
2216 else if (RAW == '\'') stop = '\'';
2217 else {
2218 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2220 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002223 return(NULL);
2224 }
2225 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2226 if (buf == NULL) {
2227 xmlGenericError(xmlGenericErrorContext,
2228 "malloc of %d byte failed\n", size);
2229 return(NULL);
2230 }
2231
2232 /*
2233 * The content of the entity definition is copied in a buffer.
2234 */
2235
2236 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2237 input = ctxt->input;
2238 GROW;
2239 NEXT;
2240 c = CUR_CHAR(l);
2241 /*
2242 * NOTE: 4.4.5 Included in Literal
2243 * When a parameter entity reference appears in a literal entity
2244 * value, ... a single or double quote character in the replacement
2245 * text is always treated as a normal data character and will not
2246 * terminate the literal.
2247 * In practice it means we stop the loop only when back at parsing
2248 * the initial entity and the quote is found
2249 */
2250 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2251 (ctxt->input != input))) {
2252 if (len + 5 >= size) {
2253 size *= 2;
2254 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2255 if (buf == NULL) {
2256 xmlGenericError(xmlGenericErrorContext,
2257 "realloc of %d byte failed\n", size);
2258 return(NULL);
2259 }
2260 }
2261 COPY_BUF(l,buf,len,c);
2262 NEXTL(l);
2263 /*
2264 * Pop-up of finished entities.
2265 */
2266 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2267 xmlPopInput(ctxt);
2268
2269 GROW;
2270 c = CUR_CHAR(l);
2271 if (c == 0) {
2272 GROW;
2273 c = CUR_CHAR(l);
2274 }
2275 }
2276 buf[len] = 0;
2277
2278 /*
2279 * Raise problem w.r.t. '&' and '%' being used in non-entities
2280 * reference constructs. Note Charref will be handled in
2281 * xmlStringDecodeEntities()
2282 */
2283 cur = buf;
2284 while (*cur != 0) { /* non input consuming */
2285 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2286 xmlChar *name;
2287 xmlChar tmp = *cur;
2288
2289 cur++;
2290 name = xmlParseStringName(ctxt, &cur);
2291 if ((name == NULL) || (*cur != ';')) {
2292 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2294 ctxt->sax->error(ctxt->userData,
2295 "EntityValue: '%c' forbidden except for entities references\n",
2296 tmp);
2297 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002298 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002299 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002300 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2301 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002302 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2304 ctxt->sax->error(ctxt->userData,
2305 "EntityValue: PEReferences forbidden in internal subset\n",
2306 tmp);
2307 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002308 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002309 }
2310 if (name != NULL)
2311 xmlFree(name);
2312 }
2313 cur++;
2314 }
2315
2316 /*
2317 * Then PEReference entities are substituted.
2318 */
2319 if (c != stop) {
2320 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2322 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2323 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002324 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002325 xmlFree(buf);
2326 } else {
2327 NEXT;
2328 /*
2329 * NOTE: 4.4.7 Bypassed
2330 * When a general entity reference appears in the EntityValue in
2331 * an entity declaration, it is bypassed and left as is.
2332 * so XML_SUBSTITUTE_REF is not set here.
2333 */
2334 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2335 0, 0, 0);
2336 if (orig != NULL)
2337 *orig = buf;
2338 else
2339 xmlFree(buf);
2340 }
2341
2342 return(ret);
2343}
2344
2345/**
2346 * xmlParseAttValue:
2347 * @ctxt: an XML parser context
2348 *
2349 * parse a value for an attribute
2350 * Note: the parser won't do substitution of entities here, this
2351 * will be handled later in xmlStringGetNodeList
2352 *
2353 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2354 * "'" ([^<&'] | Reference)* "'"
2355 *
2356 * 3.3.3 Attribute-Value Normalization:
2357 * Before the value of an attribute is passed to the application or
2358 * checked for validity, the XML processor must normalize it as follows:
2359 * - a character reference is processed by appending the referenced
2360 * character to the attribute value
2361 * - an entity reference is processed by recursively processing the
2362 * replacement text of the entity
2363 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2364 * appending #x20 to the normalized value, except that only a single
2365 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2366 * parsed entity or the literal entity value of an internal parsed entity
2367 * - other characters are processed by appending them to the normalized value
2368 * If the declared value is not CDATA, then the XML processor must further
2369 * process the normalized attribute value by discarding any leading and
2370 * trailing space (#x20) characters, and by replacing sequences of space
2371 * (#x20) characters by a single space (#x20) character.
2372 * All attributes for which no declaration has been read should be treated
2373 * by a non-validating parser as if declared CDATA.
2374 *
2375 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2376 */
2377
2378xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002379xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2380
2381xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002382xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2383 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002384 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002385 xmlChar *ret = NULL;
2386 SHRINK;
2387 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002388 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002389 if (*in != '"' && *in != '\'') {
2390 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2392 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2393 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002394 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002395 return(NULL);
2396 }
2397 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2398 limit = *in;
2399 ++in;
2400
2401 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2402 *in != '&' && *in != '<'
2403 ) {
2404 ++in;
2405 }
2406 if (*in != limit) {
2407 return xmlParseAttValueComplex(ctxt);
2408 }
2409 ++in;
2410 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2411 CUR_PTR = in;
2412 return ret;
2413}
2414
Daniel Veillard01c13b52002-12-10 15:19:08 +00002415/**
2416 * xmlParseAttValueComplex:
2417 * @ctxt: an XML parser context
2418 *
2419 * parse a value for an attribute, this is the fallback function
2420 * of xmlParseAttValue() when the attribute parsing requires handling
2421 * of non-ASCII characters.
2422 *
2423 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2424 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002425xmlChar *
2426xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2427 xmlChar limit = 0;
2428 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002429 int len = 0;
2430 int buf_size = 0;
2431 int c, l;
2432 xmlChar *current = NULL;
2433 xmlEntityPtr ent;
2434
2435
2436 SHRINK;
2437 if (NXT(0) == '"') {
2438 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2439 limit = '"';
2440 NEXT;
2441 } else if (NXT(0) == '\'') {
2442 limit = '\'';
2443 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2444 NEXT;
2445 } else {
2446 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2448 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2449 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002450 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002451 return(NULL);
2452 }
2453
2454 /*
2455 * allocate a translation buffer.
2456 */
2457 buf_size = XML_PARSER_BUFFER_SIZE;
2458 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2459 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002460 xmlGenericError(xmlGenericErrorContext,
2461 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002462 return(NULL);
2463 }
2464
2465 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002466 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002467 */
2468 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002469 while ((NXT(0) != limit) && /* checked */
2470 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002471 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002472 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002473 if (NXT(1) == '#') {
2474 int val = xmlParseCharRef(ctxt);
2475 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002476 if (ctxt->replaceEntities) {
2477 if (len > buf_size - 10) {
2478 growBuffer(buf);
2479 }
2480 buf[len++] = '&';
2481 } else {
2482 /*
2483 * The reparsing will be done in xmlStringGetNodeList()
2484 * called by the attribute() function in SAX.c
2485 */
2486 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002487
Daniel Veillard319a7422001-09-11 09:27:09 +00002488 if (len > buf_size - 10) {
2489 growBuffer(buf);
2490 }
2491 current = &buffer[0];
2492 while (*current != 0) { /* non input consuming */
2493 buf[len++] = *current++;
2494 }
Owen Taylor3473f882001-02-23 17:55:21 +00002495 }
2496 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002497 if (len > buf_size - 10) {
2498 growBuffer(buf);
2499 }
Owen Taylor3473f882001-02-23 17:55:21 +00002500 len += xmlCopyChar(0, &buf[len], val);
2501 }
2502 } else {
2503 ent = xmlParseEntityRef(ctxt);
2504 if ((ent != NULL) &&
2505 (ctxt->replaceEntities != 0)) {
2506 xmlChar *rep;
2507
2508 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2509 rep = xmlStringDecodeEntities(ctxt, ent->content,
2510 XML_SUBSTITUTE_REF, 0, 0, 0);
2511 if (rep != NULL) {
2512 current = rep;
2513 while (*current != 0) { /* non input consuming */
2514 buf[len++] = *current++;
2515 if (len > buf_size - 10) {
2516 growBuffer(buf);
2517 }
2518 }
2519 xmlFree(rep);
2520 }
2521 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002522 if (len > buf_size - 10) {
2523 growBuffer(buf);
2524 }
Owen Taylor3473f882001-02-23 17:55:21 +00002525 if (ent->content != NULL)
2526 buf[len++] = ent->content[0];
2527 }
2528 } else if (ent != NULL) {
2529 int i = xmlStrlen(ent->name);
2530 const xmlChar *cur = ent->name;
2531
2532 /*
2533 * This may look absurd but is needed to detect
2534 * entities problems
2535 */
2536 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2537 (ent->content != NULL)) {
2538 xmlChar *rep;
2539 rep = xmlStringDecodeEntities(ctxt, ent->content,
2540 XML_SUBSTITUTE_REF, 0, 0, 0);
2541 if (rep != NULL)
2542 xmlFree(rep);
2543 }
2544
2545 /*
2546 * Just output the reference
2547 */
2548 buf[len++] = '&';
2549 if (len > buf_size - i - 10) {
2550 growBuffer(buf);
2551 }
2552 for (;i > 0;i--)
2553 buf[len++] = *cur++;
2554 buf[len++] = ';';
2555 }
2556 }
2557 } else {
2558 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2559 COPY_BUF(l,buf,len,0x20);
2560 if (len > buf_size - 10) {
2561 growBuffer(buf);
2562 }
2563 } else {
2564 COPY_BUF(l,buf,len,c);
2565 if (len > buf_size - 10) {
2566 growBuffer(buf);
2567 }
2568 }
2569 NEXTL(l);
2570 }
2571 GROW;
2572 c = CUR_CHAR(l);
2573 }
2574 buf[len++] = 0;
2575 if (RAW == '<') {
2576 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2578 ctxt->sax->error(ctxt->userData,
2579 "Unescaped '<' not allowed in attributes values\n");
2580 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002581 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002582 } else if (RAW != limit) {
2583 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2585 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2586 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002587 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002588 } else
2589 NEXT;
2590 return(buf);
2591}
2592
2593/**
2594 * xmlParseSystemLiteral:
2595 * @ctxt: an XML parser context
2596 *
2597 * parse an XML Literal
2598 *
2599 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2600 *
2601 * Returns the SystemLiteral parsed or NULL
2602 */
2603
2604xmlChar *
2605xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2606 xmlChar *buf = NULL;
2607 int len = 0;
2608 int size = XML_PARSER_BUFFER_SIZE;
2609 int cur, l;
2610 xmlChar stop;
2611 int state = ctxt->instate;
2612 int count = 0;
2613
2614 SHRINK;
2615 if (RAW == '"') {
2616 NEXT;
2617 stop = '"';
2618 } else if (RAW == '\'') {
2619 NEXT;
2620 stop = '\'';
2621 } else {
2622 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2624 ctxt->sax->error(ctxt->userData,
2625 "SystemLiteral \" or ' expected\n");
2626 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002627 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002628 return(NULL);
2629 }
2630
2631 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2632 if (buf == NULL) {
2633 xmlGenericError(xmlGenericErrorContext,
2634 "malloc of %d byte failed\n", size);
2635 return(NULL);
2636 }
2637 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2638 cur = CUR_CHAR(l);
2639 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2640 if (len + 5 >= size) {
2641 size *= 2;
2642 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2643 if (buf == NULL) {
2644 xmlGenericError(xmlGenericErrorContext,
2645 "realloc of %d byte failed\n", size);
2646 ctxt->instate = (xmlParserInputState) state;
2647 return(NULL);
2648 }
2649 }
2650 count++;
2651 if (count > 50) {
2652 GROW;
2653 count = 0;
2654 }
2655 COPY_BUF(l,buf,len,cur);
2656 NEXTL(l);
2657 cur = CUR_CHAR(l);
2658 if (cur == 0) {
2659 GROW;
2660 SHRINK;
2661 cur = CUR_CHAR(l);
2662 }
2663 }
2664 buf[len] = 0;
2665 ctxt->instate = (xmlParserInputState) state;
2666 if (!IS_CHAR(cur)) {
2667 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2669 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2670 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002671 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002672 } else {
2673 NEXT;
2674 }
2675 return(buf);
2676}
2677
2678/**
2679 * xmlParsePubidLiteral:
2680 * @ctxt: an XML parser context
2681 *
2682 * parse an XML public literal
2683 *
2684 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2685 *
2686 * Returns the PubidLiteral parsed or NULL.
2687 */
2688
2689xmlChar *
2690xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2691 xmlChar *buf = NULL;
2692 int len = 0;
2693 int size = XML_PARSER_BUFFER_SIZE;
2694 xmlChar cur;
2695 xmlChar stop;
2696 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002697 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002698
2699 SHRINK;
2700 if (RAW == '"') {
2701 NEXT;
2702 stop = '"';
2703 } else if (RAW == '\'') {
2704 NEXT;
2705 stop = '\'';
2706 } else {
2707 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2709 ctxt->sax->error(ctxt->userData,
2710 "SystemLiteral \" or ' expected\n");
2711 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002712 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002713 return(NULL);
2714 }
2715 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2716 if (buf == NULL) {
2717 xmlGenericError(xmlGenericErrorContext,
2718 "malloc of %d byte failed\n", size);
2719 return(NULL);
2720 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002721 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002722 cur = CUR;
2723 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2724 if (len + 1 >= size) {
2725 size *= 2;
2726 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2727 if (buf == NULL) {
2728 xmlGenericError(xmlGenericErrorContext,
2729 "realloc of %d byte failed\n", size);
2730 return(NULL);
2731 }
2732 }
2733 buf[len++] = cur;
2734 count++;
2735 if (count > 50) {
2736 GROW;
2737 count = 0;
2738 }
2739 NEXT;
2740 cur = CUR;
2741 if (cur == 0) {
2742 GROW;
2743 SHRINK;
2744 cur = CUR;
2745 }
2746 }
2747 buf[len] = 0;
2748 if (cur != stop) {
2749 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2751 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2752 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002753 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002754 } else {
2755 NEXT;
2756 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002757 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002758 return(buf);
2759}
2760
Daniel Veillard48b2f892001-02-25 16:11:03 +00002761void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002762/**
2763 * xmlParseCharData:
2764 * @ctxt: an XML parser context
2765 * @cdata: int indicating whether we are within a CDATA section
2766 *
2767 * parse a CharData section.
2768 * if we are within a CDATA section ']]>' marks an end of section.
2769 *
2770 * The right angle bracket (>) may be represented using the string "&gt;",
2771 * and must, for compatibility, be escaped using "&gt;" or a character
2772 * reference when it appears in the string "]]>" in content, when that
2773 * string is not marking the end of a CDATA section.
2774 *
2775 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2776 */
2777
2778void
2779xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002780 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002781 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002782 int line = ctxt->input->line;
2783 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002784
2785 SHRINK;
2786 GROW;
2787 /*
2788 * Accelerated common case where input don't need to be
2789 * modified before passing it to the handler.
2790 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002791 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002792 in = ctxt->input->cur;
2793 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002794get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002795 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2796 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002797 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002798 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002799 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002800 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002801 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002802 ctxt->input->line++;
2803 in++;
2804 }
2805 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002806 }
2807 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002808 if ((in[1] == ']') && (in[2] == '>')) {
2809 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2811 ctxt->sax->error(ctxt->userData,
2812 "Sequence ']]>' not allowed in content\n");
2813 ctxt->input->cur = in;
2814 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002815 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002816 return;
2817 }
2818 in++;
2819 goto get_more;
2820 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002821 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002822 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002823 if (IS_BLANK(*ctxt->input->cur)) {
2824 const xmlChar *tmp = ctxt->input->cur;
2825 ctxt->input->cur = in;
2826 if (areBlanks(ctxt, tmp, nbchar)) {
2827 if (ctxt->sax->ignorableWhitespace != NULL)
2828 ctxt->sax->ignorableWhitespace(ctxt->userData,
2829 tmp, nbchar);
2830 } else {
2831 if (ctxt->sax->characters != NULL)
2832 ctxt->sax->characters(ctxt->userData,
2833 tmp, nbchar);
2834 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002835 line = ctxt->input->line;
2836 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002837 } else {
2838 if (ctxt->sax->characters != NULL)
2839 ctxt->sax->characters(ctxt->userData,
2840 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002841 line = ctxt->input->line;
2842 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002843 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002844 }
2845 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002846 if (*in == 0xD) {
2847 in++;
2848 if (*in == 0xA) {
2849 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002850 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002851 ctxt->input->line++;
2852 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002853 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002854 in--;
2855 }
2856 if (*in == '<') {
2857 return;
2858 }
2859 if (*in == '&') {
2860 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002861 }
2862 SHRINK;
2863 GROW;
2864 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002865 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002866 nbchar = 0;
2867 }
Daniel Veillard50582112001-03-26 22:52:16 +00002868 ctxt->input->line = line;
2869 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002870 xmlParseCharDataComplex(ctxt, cdata);
2871}
2872
Daniel Veillard01c13b52002-12-10 15:19:08 +00002873/**
2874 * xmlParseCharDataComplex:
2875 * @ctxt: an XML parser context
2876 * @cdata: int indicating whether we are within a CDATA section
2877 *
2878 * parse a CharData section.this is the fallback function
2879 * of xmlParseCharData() when the parsing requires handling
2880 * of non-ASCII characters.
2881 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002882void
2883xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002884 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2885 int nbchar = 0;
2886 int cur, l;
2887 int count = 0;
2888
2889 SHRINK;
2890 GROW;
2891 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002892 while ((cur != '<') && /* checked */
2893 (cur != '&') &&
2894 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002895 if ((cur == ']') && (NXT(1) == ']') &&
2896 (NXT(2) == '>')) {
2897 if (cdata) break;
2898 else {
2899 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2901 ctxt->sax->error(ctxt->userData,
2902 "Sequence ']]>' not allowed in content\n");
2903 /* Should this be relaxed ??? I see a "must here */
2904 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002905 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002906 }
2907 }
2908 COPY_BUF(l,buf,nbchar,cur);
2909 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2910 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002911 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002912 */
2913 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2914 if (areBlanks(ctxt, buf, nbchar)) {
2915 if (ctxt->sax->ignorableWhitespace != NULL)
2916 ctxt->sax->ignorableWhitespace(ctxt->userData,
2917 buf, nbchar);
2918 } else {
2919 if (ctxt->sax->characters != NULL)
2920 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2921 }
2922 }
2923 nbchar = 0;
2924 }
2925 count++;
2926 if (count > 50) {
2927 GROW;
2928 count = 0;
2929 }
2930 NEXTL(l);
2931 cur = CUR_CHAR(l);
2932 }
2933 if (nbchar != 0) {
2934 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002935 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002936 */
2937 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2938 if (areBlanks(ctxt, buf, nbchar)) {
2939 if (ctxt->sax->ignorableWhitespace != NULL)
2940 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2941 } else {
2942 if (ctxt->sax->characters != NULL)
2943 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2944 }
2945 }
2946 }
2947}
2948
2949/**
2950 * xmlParseExternalID:
2951 * @ctxt: an XML parser context
2952 * @publicID: a xmlChar** receiving PubidLiteral
2953 * @strict: indicate whether we should restrict parsing to only
2954 * production [75], see NOTE below
2955 *
2956 * Parse an External ID or a Public ID
2957 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002958 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002959 * 'PUBLIC' S PubidLiteral S SystemLiteral
2960 *
2961 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2962 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2963 *
2964 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2965 *
2966 * Returns the function returns SystemLiteral and in the second
2967 * case publicID receives PubidLiteral, is strict is off
2968 * it is possible to return NULL and have publicID set.
2969 */
2970
2971xmlChar *
2972xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2973 xmlChar *URI = NULL;
2974
2975 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002976
2977 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002978 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2979 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2980 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2981 SKIP(6);
2982 if (!IS_BLANK(CUR)) {
2983 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2985 ctxt->sax->error(ctxt->userData,
2986 "Space required after 'SYSTEM'\n");
2987 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002988 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002989 }
2990 SKIP_BLANKS;
2991 URI = xmlParseSystemLiteral(ctxt);
2992 if (URI == NULL) {
2993 ctxt->errNo = XML_ERR_URI_REQUIRED;
2994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2995 ctxt->sax->error(ctxt->userData,
2996 "xmlParseExternalID: SYSTEM, no URI\n");
2997 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002998 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002999 }
3000 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3001 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3002 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3003 SKIP(6);
3004 if (!IS_BLANK(CUR)) {
3005 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3007 ctxt->sax->error(ctxt->userData,
3008 "Space required after 'PUBLIC'\n");
3009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003011 }
3012 SKIP_BLANKS;
3013 *publicID = xmlParsePubidLiteral(ctxt);
3014 if (*publicID == NULL) {
3015 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
3016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3017 ctxt->sax->error(ctxt->userData,
3018 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
3019 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003020 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003021 }
3022 if (strict) {
3023 /*
3024 * We don't handle [83] so "S SystemLiteral" is required.
3025 */
3026 if (!IS_BLANK(CUR)) {
3027 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3029 ctxt->sax->error(ctxt->userData,
3030 "Space required after the Public Identifier\n");
3031 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003032 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003033 }
3034 } else {
3035 /*
3036 * We handle [83] so we return immediately, if
3037 * "S SystemLiteral" is not detected. From a purely parsing
3038 * point of view that's a nice mess.
3039 */
3040 const xmlChar *ptr;
3041 GROW;
3042
3043 ptr = CUR_PTR;
3044 if (!IS_BLANK(*ptr)) return(NULL);
3045
3046 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3047 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3048 }
3049 SKIP_BLANKS;
3050 URI = xmlParseSystemLiteral(ctxt);
3051 if (URI == NULL) {
3052 ctxt->errNo = XML_ERR_URI_REQUIRED;
3053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3054 ctxt->sax->error(ctxt->userData,
3055 "xmlParseExternalID: PUBLIC, no URI\n");
3056 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003058 }
3059 }
3060 return(URI);
3061}
3062
3063/**
3064 * xmlParseComment:
3065 * @ctxt: an XML parser context
3066 *
3067 * Skip an XML (SGML) comment <!-- .... -->
3068 * The spec says that "For compatibility, the string "--" (double-hyphen)
3069 * must not occur within comments. "
3070 *
3071 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3072 */
3073void
3074xmlParseComment(xmlParserCtxtPtr ctxt) {
3075 xmlChar *buf = NULL;
3076 int len;
3077 int size = XML_PARSER_BUFFER_SIZE;
3078 int q, ql;
3079 int r, rl;
3080 int cur, l;
3081 xmlParserInputState state;
3082 xmlParserInputPtr input = ctxt->input;
3083 int count = 0;
3084
3085 /*
3086 * Check that there is a comment right here.
3087 */
3088 if ((RAW != '<') || (NXT(1) != '!') ||
3089 (NXT(2) != '-') || (NXT(3) != '-')) return;
3090
3091 state = ctxt->instate;
3092 ctxt->instate = XML_PARSER_COMMENT;
3093 SHRINK;
3094 SKIP(4);
3095 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3096 if (buf == NULL) {
3097 xmlGenericError(xmlGenericErrorContext,
3098 "malloc of %d byte failed\n", size);
3099 ctxt->instate = state;
3100 return;
3101 }
3102 q = CUR_CHAR(ql);
3103 NEXTL(ql);
3104 r = CUR_CHAR(rl);
3105 NEXTL(rl);
3106 cur = CUR_CHAR(l);
3107 len = 0;
3108 while (IS_CHAR(cur) && /* checked */
3109 ((cur != '>') ||
3110 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003111 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003112 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3114 ctxt->sax->error(ctxt->userData,
3115 "Comment must not contain '--' (double-hyphen)`\n");
3116 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003117 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003118 }
3119 if (len + 5 >= size) {
3120 size *= 2;
3121 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3122 if (buf == NULL) {
3123 xmlGenericError(xmlGenericErrorContext,
3124 "realloc of %d byte failed\n", size);
3125 ctxt->instate = state;
3126 return;
3127 }
3128 }
3129 COPY_BUF(ql,buf,len,q);
3130 q = r;
3131 ql = rl;
3132 r = cur;
3133 rl = l;
3134
3135 count++;
3136 if (count > 50) {
3137 GROW;
3138 count = 0;
3139 }
3140 NEXTL(l);
3141 cur = CUR_CHAR(l);
3142 if (cur == 0) {
3143 SHRINK;
3144 GROW;
3145 cur = CUR_CHAR(l);
3146 }
3147 }
3148 buf[len] = 0;
3149 if (!IS_CHAR(cur)) {
3150 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3152 ctxt->sax->error(ctxt->userData,
3153 "Comment not terminated \n<!--%.50s\n", buf);
3154 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003155 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003156 xmlFree(buf);
3157 } else {
3158 if (input != ctxt->input) {
3159 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3161 ctxt->sax->error(ctxt->userData,
3162"Comment doesn't start and stop in the same entity\n");
3163 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003164 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003165 }
3166 NEXT;
3167 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3168 (!ctxt->disableSAX))
3169 ctxt->sax->comment(ctxt->userData, buf);
3170 xmlFree(buf);
3171 }
3172 ctxt->instate = state;
3173}
3174
3175/**
3176 * xmlParsePITarget:
3177 * @ctxt: an XML parser context
3178 *
3179 * parse the name of a PI
3180 *
3181 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3182 *
3183 * Returns the PITarget name or NULL
3184 */
3185
3186xmlChar *
3187xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3188 xmlChar *name;
3189
3190 name = xmlParseName(ctxt);
3191 if ((name != NULL) &&
3192 ((name[0] == 'x') || (name[0] == 'X')) &&
3193 ((name[1] == 'm') || (name[1] == 'M')) &&
3194 ((name[2] == 'l') || (name[2] == 'L'))) {
3195 int i;
3196 if ((name[0] == 'x') && (name[1] == 'm') &&
3197 (name[2] == 'l') && (name[3] == 0)) {
3198 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3200 ctxt->sax->error(ctxt->userData,
3201 "XML declaration allowed only at the start of the document\n");
3202 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003203 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003204 return(name);
3205 } else if (name[3] == 0) {
3206 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3208 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3209 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003211 return(name);
3212 }
3213 for (i = 0;;i++) {
3214 if (xmlW3CPIs[i] == NULL) break;
3215 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3216 return(name);
3217 }
3218 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3219 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3220 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003221 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003222 }
3223 }
3224 return(name);
3225}
3226
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003227#ifdef LIBXML_CATALOG_ENABLED
3228/**
3229 * xmlParseCatalogPI:
3230 * @ctxt: an XML parser context
3231 * @catalog: the PI value string
3232 *
3233 * parse an XML Catalog Processing Instruction.
3234 *
3235 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3236 *
3237 * Occurs only if allowed by the user and if happening in the Misc
3238 * part of the document before any doctype informations
3239 * This will add the given catalog to the parsing context in order
3240 * to be used if there is a resolution need further down in the document
3241 */
3242
3243static void
3244xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3245 xmlChar *URL = NULL;
3246 const xmlChar *tmp, *base;
3247 xmlChar marker;
3248
3249 tmp = catalog;
3250 while (IS_BLANK(*tmp)) tmp++;
3251 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3252 goto error;
3253 tmp += 7;
3254 while (IS_BLANK(*tmp)) tmp++;
3255 if (*tmp != '=') {
3256 return;
3257 }
3258 tmp++;
3259 while (IS_BLANK(*tmp)) tmp++;
3260 marker = *tmp;
3261 if ((marker != '\'') && (marker != '"'))
3262 goto error;
3263 tmp++;
3264 base = tmp;
3265 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3266 if (*tmp == 0)
3267 goto error;
3268 URL = xmlStrndup(base, tmp - base);
3269 tmp++;
3270 while (IS_BLANK(*tmp)) tmp++;
3271 if (*tmp != 0)
3272 goto error;
3273
3274 if (URL != NULL) {
3275 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3276 xmlFree(URL);
3277 }
3278 return;
3279
3280error:
3281 ctxt->errNo = XML_WAR_CATALOG_PI;
3282 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3283 ctxt->sax->warning(ctxt->userData,
3284 "Catalog PI syntax error: %s\n", catalog);
3285 if (URL != NULL)
3286 xmlFree(URL);
3287}
3288#endif
3289
Owen Taylor3473f882001-02-23 17:55:21 +00003290/**
3291 * xmlParsePI:
3292 * @ctxt: an XML parser context
3293 *
3294 * parse an XML Processing Instruction.
3295 *
3296 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3297 *
3298 * The processing is transfered to SAX once parsed.
3299 */
3300
3301void
3302xmlParsePI(xmlParserCtxtPtr ctxt) {
3303 xmlChar *buf = NULL;
3304 int len = 0;
3305 int size = XML_PARSER_BUFFER_SIZE;
3306 int cur, l;
3307 xmlChar *target;
3308 xmlParserInputState state;
3309 int count = 0;
3310
3311 if ((RAW == '<') && (NXT(1) == '?')) {
3312 xmlParserInputPtr input = ctxt->input;
3313 state = ctxt->instate;
3314 ctxt->instate = XML_PARSER_PI;
3315 /*
3316 * this is a Processing Instruction.
3317 */
3318 SKIP(2);
3319 SHRINK;
3320
3321 /*
3322 * Parse the target name and check for special support like
3323 * namespace.
3324 */
3325 target = xmlParsePITarget(ctxt);
3326 if (target != NULL) {
3327 if ((RAW == '?') && (NXT(1) == '>')) {
3328 if (input != ctxt->input) {
3329 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3331 ctxt->sax->error(ctxt->userData,
3332 "PI declaration doesn't start and stop in the same entity\n");
3333 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003334 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003335 }
3336 SKIP(2);
3337
3338 /*
3339 * SAX: PI detected.
3340 */
3341 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3342 (ctxt->sax->processingInstruction != NULL))
3343 ctxt->sax->processingInstruction(ctxt->userData,
3344 target, NULL);
3345 ctxt->instate = state;
3346 xmlFree(target);
3347 return;
3348 }
3349 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3350 if (buf == NULL) {
3351 xmlGenericError(xmlGenericErrorContext,
3352 "malloc of %d byte failed\n", size);
3353 ctxt->instate = state;
3354 return;
3355 }
3356 cur = CUR;
3357 if (!IS_BLANK(cur)) {
3358 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3360 ctxt->sax->error(ctxt->userData,
3361 "xmlParsePI: PI %s space expected\n", target);
3362 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003363 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003364 }
3365 SKIP_BLANKS;
3366 cur = CUR_CHAR(l);
3367 while (IS_CHAR(cur) && /* checked */
3368 ((cur != '?') || (NXT(1) != '>'))) {
3369 if (len + 5 >= size) {
3370 size *= 2;
3371 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3372 if (buf == NULL) {
3373 xmlGenericError(xmlGenericErrorContext,
3374 "realloc of %d byte failed\n", size);
3375 ctxt->instate = state;
3376 return;
3377 }
3378 }
3379 count++;
3380 if (count > 50) {
3381 GROW;
3382 count = 0;
3383 }
3384 COPY_BUF(l,buf,len,cur);
3385 NEXTL(l);
3386 cur = CUR_CHAR(l);
3387 if (cur == 0) {
3388 SHRINK;
3389 GROW;
3390 cur = CUR_CHAR(l);
3391 }
3392 }
3393 buf[len] = 0;
3394 if (cur != '?') {
3395 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3397 ctxt->sax->error(ctxt->userData,
3398 "xmlParsePI: PI %s never end ...\n", target);
3399 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003400 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003401 } else {
3402 if (input != ctxt->input) {
3403 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3405 ctxt->sax->error(ctxt->userData,
3406 "PI declaration doesn't start and stop in the same entity\n");
3407 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003408 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003409 }
3410 SKIP(2);
3411
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003412#ifdef LIBXML_CATALOG_ENABLED
3413 if (((state == XML_PARSER_MISC) ||
3414 (state == XML_PARSER_START)) &&
3415 (xmlStrEqual(target, XML_CATALOG_PI))) {
3416 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3417 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3418 (allow == XML_CATA_ALLOW_ALL))
3419 xmlParseCatalogPI(ctxt, buf);
3420 }
3421#endif
3422
3423
Owen Taylor3473f882001-02-23 17:55:21 +00003424 /*
3425 * SAX: PI detected.
3426 */
3427 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3428 (ctxt->sax->processingInstruction != NULL))
3429 ctxt->sax->processingInstruction(ctxt->userData,
3430 target, buf);
3431 }
3432 xmlFree(buf);
3433 xmlFree(target);
3434 } else {
3435 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3437 ctxt->sax->error(ctxt->userData,
3438 "xmlParsePI : no target name\n");
3439 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003440 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003441 }
3442 ctxt->instate = state;
3443 }
3444}
3445
3446/**
3447 * xmlParseNotationDecl:
3448 * @ctxt: an XML parser context
3449 *
3450 * parse a notation declaration
3451 *
3452 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3453 *
3454 * Hence there is actually 3 choices:
3455 * 'PUBLIC' S PubidLiteral
3456 * 'PUBLIC' S PubidLiteral S SystemLiteral
3457 * and 'SYSTEM' S SystemLiteral
3458 *
3459 * See the NOTE on xmlParseExternalID().
3460 */
3461
3462void
3463xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3464 xmlChar *name;
3465 xmlChar *Pubid;
3466 xmlChar *Systemid;
3467
3468 if ((RAW == '<') && (NXT(1) == '!') &&
3469 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3470 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3471 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3472 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3473 xmlParserInputPtr input = ctxt->input;
3474 SHRINK;
3475 SKIP(10);
3476 if (!IS_BLANK(CUR)) {
3477 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3479 ctxt->sax->error(ctxt->userData,
3480 "Space required after '<!NOTATION'\n");
3481 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003482 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003483 return;
3484 }
3485 SKIP_BLANKS;
3486
Daniel Veillard76d66f42001-05-16 21:05:17 +00003487 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003488 if (name == NULL) {
3489 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3491 ctxt->sax->error(ctxt->userData,
3492 "NOTATION: Name expected here\n");
3493 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003494 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003495 return;
3496 }
3497 if (!IS_BLANK(CUR)) {
3498 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3500 ctxt->sax->error(ctxt->userData,
3501 "Space required after the NOTATION name'\n");
3502 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003503 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003504 return;
3505 }
3506 SKIP_BLANKS;
3507
3508 /*
3509 * Parse the IDs.
3510 */
3511 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3512 SKIP_BLANKS;
3513
3514 if (RAW == '>') {
3515 if (input != ctxt->input) {
3516 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3518 ctxt->sax->error(ctxt->userData,
3519"Notation declaration doesn't start and stop in the same entity\n");
3520 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003521 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003522 }
3523 NEXT;
3524 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3525 (ctxt->sax->notationDecl != NULL))
3526 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3527 } else {
3528 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3530 ctxt->sax->error(ctxt->userData,
3531 "'>' required to close NOTATION declaration\n");
3532 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003533 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003534 }
3535 xmlFree(name);
3536 if (Systemid != NULL) xmlFree(Systemid);
3537 if (Pubid != NULL) xmlFree(Pubid);
3538 }
3539}
3540
3541/**
3542 * xmlParseEntityDecl:
3543 * @ctxt: an XML parser context
3544 *
3545 * parse <!ENTITY declarations
3546 *
3547 * [70] EntityDecl ::= GEDecl | PEDecl
3548 *
3549 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3550 *
3551 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3552 *
3553 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3554 *
3555 * [74] PEDef ::= EntityValue | ExternalID
3556 *
3557 * [76] NDataDecl ::= S 'NDATA' S Name
3558 *
3559 * [ VC: Notation Declared ]
3560 * The Name must match the declared name of a notation.
3561 */
3562
3563void
3564xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3565 xmlChar *name = NULL;
3566 xmlChar *value = NULL;
3567 xmlChar *URI = NULL, *literal = NULL;
3568 xmlChar *ndata = NULL;
3569 int isParameter = 0;
3570 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003571 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003572
3573 GROW;
3574 if ((RAW == '<') && (NXT(1) == '!') &&
3575 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3576 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3577 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3578 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003579 SHRINK;
3580 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003581 skipped = SKIP_BLANKS;
3582 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003583 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3585 ctxt->sax->error(ctxt->userData,
3586 "Space required after '<!ENTITY'\n");
3587 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003588 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003589 }
Owen Taylor3473f882001-02-23 17:55:21 +00003590
3591 if (RAW == '%') {
3592 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003593 skipped = SKIP_BLANKS;
3594 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003595 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3597 ctxt->sax->error(ctxt->userData,
3598 "Space required after '%'\n");
3599 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003600 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003601 }
Owen Taylor3473f882001-02-23 17:55:21 +00003602 isParameter = 1;
3603 }
3604
Daniel Veillard76d66f42001-05-16 21:05:17 +00003605 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003606 if (name == NULL) {
3607 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3609 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3610 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003611 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003612 return;
3613 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003614 skipped = SKIP_BLANKS;
3615 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003616 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3618 ctxt->sax->error(ctxt->userData,
3619 "Space required after the entity name\n");
3620 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003621 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003622 }
Owen Taylor3473f882001-02-23 17:55:21 +00003623
Daniel Veillardf5582f12002-06-11 10:08:16 +00003624 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003625 /*
3626 * handle the various case of definitions...
3627 */
3628 if (isParameter) {
3629 if ((RAW == '"') || (RAW == '\'')) {
3630 value = xmlParseEntityValue(ctxt, &orig);
3631 if (value) {
3632 if ((ctxt->sax != NULL) &&
3633 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3634 ctxt->sax->entityDecl(ctxt->userData, name,
3635 XML_INTERNAL_PARAMETER_ENTITY,
3636 NULL, NULL, value);
3637 }
3638 } else {
3639 URI = xmlParseExternalID(ctxt, &literal, 1);
3640 if ((URI == NULL) && (literal == NULL)) {
3641 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3643 ctxt->sax->error(ctxt->userData,
3644 "Entity value required\n");
3645 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003646 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003647 }
3648 if (URI) {
3649 xmlURIPtr uri;
3650
3651 uri = xmlParseURI((const char *) URI);
3652 if (uri == NULL) {
3653 ctxt->errNo = XML_ERR_INVALID_URI;
3654 if ((ctxt->sax != NULL) &&
3655 (!ctxt->disableSAX) &&
3656 (ctxt->sax->error != NULL))
3657 ctxt->sax->error(ctxt->userData,
3658 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003659 /*
3660 * This really ought to be a well formedness error
3661 * but the XML Core WG decided otherwise c.f. issue
3662 * E26 of the XML erratas.
3663 */
Owen Taylor3473f882001-02-23 17:55:21 +00003664 } else {
3665 if (uri->fragment != NULL) {
3666 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3667 if ((ctxt->sax != NULL) &&
3668 (!ctxt->disableSAX) &&
3669 (ctxt->sax->error != NULL))
3670 ctxt->sax->error(ctxt->userData,
3671 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003672 /*
3673 * Okay this is foolish to block those but not
3674 * invalid URIs.
3675 */
Owen Taylor3473f882001-02-23 17:55:21 +00003676 ctxt->wellFormed = 0;
3677 } else {
3678 if ((ctxt->sax != NULL) &&
3679 (!ctxt->disableSAX) &&
3680 (ctxt->sax->entityDecl != NULL))
3681 ctxt->sax->entityDecl(ctxt->userData, name,
3682 XML_EXTERNAL_PARAMETER_ENTITY,
3683 literal, URI, NULL);
3684 }
3685 xmlFreeURI(uri);
3686 }
3687 }
3688 }
3689 } else {
3690 if ((RAW == '"') || (RAW == '\'')) {
3691 value = xmlParseEntityValue(ctxt, &orig);
3692 if ((ctxt->sax != NULL) &&
3693 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3694 ctxt->sax->entityDecl(ctxt->userData, name,
3695 XML_INTERNAL_GENERAL_ENTITY,
3696 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003697 /*
3698 * For expat compatibility in SAX mode.
3699 */
3700 if ((ctxt->myDoc == NULL) ||
3701 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3702 if (ctxt->myDoc == NULL) {
3703 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3704 }
3705 if (ctxt->myDoc->intSubset == NULL)
3706 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3707 BAD_CAST "fake", NULL, NULL);
3708
3709 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3710 NULL, NULL, value);
3711 }
Owen Taylor3473f882001-02-23 17:55:21 +00003712 } else {
3713 URI = xmlParseExternalID(ctxt, &literal, 1);
3714 if ((URI == NULL) && (literal == NULL)) {
3715 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3717 ctxt->sax->error(ctxt->userData,
3718 "Entity value required\n");
3719 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003720 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003721 }
3722 if (URI) {
3723 xmlURIPtr uri;
3724
3725 uri = xmlParseURI((const char *)URI);
3726 if (uri == NULL) {
3727 ctxt->errNo = XML_ERR_INVALID_URI;
3728 if ((ctxt->sax != NULL) &&
3729 (!ctxt->disableSAX) &&
3730 (ctxt->sax->error != NULL))
3731 ctxt->sax->error(ctxt->userData,
3732 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003733 /*
3734 * This really ought to be a well formedness error
3735 * but the XML Core WG decided otherwise c.f. issue
3736 * E26 of the XML erratas.
3737 */
Owen Taylor3473f882001-02-23 17:55:21 +00003738 } else {
3739 if (uri->fragment != NULL) {
3740 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3741 if ((ctxt->sax != NULL) &&
3742 (!ctxt->disableSAX) &&
3743 (ctxt->sax->error != NULL))
3744 ctxt->sax->error(ctxt->userData,
3745 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003746 /*
3747 * Okay this is foolish to block those but not
3748 * invalid URIs.
3749 */
Owen Taylor3473f882001-02-23 17:55:21 +00003750 ctxt->wellFormed = 0;
3751 }
3752 xmlFreeURI(uri);
3753 }
3754 }
3755 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3756 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3758 ctxt->sax->error(ctxt->userData,
3759 "Space required before 'NDATA'\n");
3760 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003761 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003762 }
3763 SKIP_BLANKS;
3764 if ((RAW == 'N') && (NXT(1) == 'D') &&
3765 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3766 (NXT(4) == 'A')) {
3767 SKIP(5);
3768 if (!IS_BLANK(CUR)) {
3769 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3771 ctxt->sax->error(ctxt->userData,
3772 "Space required after 'NDATA'\n");
3773 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003774 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003775 }
3776 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003777 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003778 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3779 (ctxt->sax->unparsedEntityDecl != NULL))
3780 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3781 literal, URI, ndata);
3782 } else {
3783 if ((ctxt->sax != NULL) &&
3784 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3785 ctxt->sax->entityDecl(ctxt->userData, name,
3786 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3787 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003788 /*
3789 * For expat compatibility in SAX mode.
3790 * assuming the entity repalcement was asked for
3791 */
3792 if ((ctxt->replaceEntities != 0) &&
3793 ((ctxt->myDoc == NULL) ||
3794 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3795 if (ctxt->myDoc == NULL) {
3796 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3797 }
3798
3799 if (ctxt->myDoc->intSubset == NULL)
3800 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3801 BAD_CAST "fake", NULL, NULL);
3802 entityDecl(ctxt, name,
3803 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3804 literal, URI, NULL);
3805 }
Owen Taylor3473f882001-02-23 17:55:21 +00003806 }
3807 }
3808 }
3809 SKIP_BLANKS;
3810 if (RAW != '>') {
3811 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3813 ctxt->sax->error(ctxt->userData,
3814 "xmlParseEntityDecl: entity %s not terminated\n", name);
3815 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003816 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003817 } else {
3818 if (input != ctxt->input) {
3819 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3821 ctxt->sax->error(ctxt->userData,
3822"Entity declaration doesn't start and stop in the same entity\n");
3823 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003824 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003825 }
3826 NEXT;
3827 }
3828 if (orig != NULL) {
3829 /*
3830 * Ugly mechanism to save the raw entity value.
3831 */
3832 xmlEntityPtr cur = NULL;
3833
3834 if (isParameter) {
3835 if ((ctxt->sax != NULL) &&
3836 (ctxt->sax->getParameterEntity != NULL))
3837 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3838 } else {
3839 if ((ctxt->sax != NULL) &&
3840 (ctxt->sax->getEntity != NULL))
3841 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003842 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3843 cur = getEntity(ctxt, name);
3844 }
Owen Taylor3473f882001-02-23 17:55:21 +00003845 }
3846 if (cur != NULL) {
3847 if (cur->orig != NULL)
3848 xmlFree(orig);
3849 else
3850 cur->orig = orig;
3851 } else
3852 xmlFree(orig);
3853 }
3854 if (name != NULL) xmlFree(name);
3855 if (value != NULL) xmlFree(value);
3856 if (URI != NULL) xmlFree(URI);
3857 if (literal != NULL) xmlFree(literal);
3858 if (ndata != NULL) xmlFree(ndata);
3859 }
3860}
3861
3862/**
3863 * xmlParseDefaultDecl:
3864 * @ctxt: an XML parser context
3865 * @value: Receive a possible fixed default value for the attribute
3866 *
3867 * Parse an attribute default declaration
3868 *
3869 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3870 *
3871 * [ VC: Required Attribute ]
3872 * if the default declaration is the keyword #REQUIRED, then the
3873 * attribute must be specified for all elements of the type in the
3874 * attribute-list declaration.
3875 *
3876 * [ VC: Attribute Default Legal ]
3877 * The declared default value must meet the lexical constraints of
3878 * the declared attribute type c.f. xmlValidateAttributeDecl()
3879 *
3880 * [ VC: Fixed Attribute Default ]
3881 * if an attribute has a default value declared with the #FIXED
3882 * keyword, instances of that attribute must match the default value.
3883 *
3884 * [ WFC: No < in Attribute Values ]
3885 * handled in xmlParseAttValue()
3886 *
3887 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3888 * or XML_ATTRIBUTE_FIXED.
3889 */
3890
3891int
3892xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3893 int val;
3894 xmlChar *ret;
3895
3896 *value = NULL;
3897 if ((RAW == '#') && (NXT(1) == 'R') &&
3898 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3899 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3900 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3901 (NXT(8) == 'D')) {
3902 SKIP(9);
3903 return(XML_ATTRIBUTE_REQUIRED);
3904 }
3905 if ((RAW == '#') && (NXT(1) == 'I') &&
3906 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3907 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3908 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3909 SKIP(8);
3910 return(XML_ATTRIBUTE_IMPLIED);
3911 }
3912 val = XML_ATTRIBUTE_NONE;
3913 if ((RAW == '#') && (NXT(1) == 'F') &&
3914 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3915 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3916 SKIP(6);
3917 val = XML_ATTRIBUTE_FIXED;
3918 if (!IS_BLANK(CUR)) {
3919 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3921 ctxt->sax->error(ctxt->userData,
3922 "Space required after '#FIXED'\n");
3923 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003924 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003925 }
3926 SKIP_BLANKS;
3927 }
3928 ret = xmlParseAttValue(ctxt);
3929 ctxt->instate = XML_PARSER_DTD;
3930 if (ret == NULL) {
3931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3932 ctxt->sax->error(ctxt->userData,
3933 "Attribute default value declaration error\n");
3934 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003935 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003936 } else
3937 *value = ret;
3938 return(val);
3939}
3940
3941/**
3942 * xmlParseNotationType:
3943 * @ctxt: an XML parser context
3944 *
3945 * parse an Notation attribute type.
3946 *
3947 * Note: the leading 'NOTATION' S part has already being parsed...
3948 *
3949 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3950 *
3951 * [ VC: Notation Attributes ]
3952 * Values of this type must match one of the notation names included
3953 * in the declaration; all notation names in the declaration must be declared.
3954 *
3955 * Returns: the notation attribute tree built while parsing
3956 */
3957
3958xmlEnumerationPtr
3959xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3960 xmlChar *name;
3961 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3962
3963 if (RAW != '(') {
3964 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3966 ctxt->sax->error(ctxt->userData,
3967 "'(' required to start 'NOTATION'\n");
3968 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003969 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003970 return(NULL);
3971 }
3972 SHRINK;
3973 do {
3974 NEXT;
3975 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003976 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003977 if (name == NULL) {
3978 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3979 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3980 ctxt->sax->error(ctxt->userData,
3981 "Name expected in NOTATION declaration\n");
3982 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003983 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003984 return(ret);
3985 }
3986 cur = xmlCreateEnumeration(name);
3987 xmlFree(name);
3988 if (cur == NULL) return(ret);
3989 if (last == NULL) ret = last = cur;
3990 else {
3991 last->next = cur;
3992 last = cur;
3993 }
3994 SKIP_BLANKS;
3995 } while (RAW == '|');
3996 if (RAW != ')') {
3997 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3999 ctxt->sax->error(ctxt->userData,
4000 "')' required to finish NOTATION declaration\n");
4001 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004002 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004003 if ((last != NULL) && (last != ret))
4004 xmlFreeEnumeration(last);
4005 return(ret);
4006 }
4007 NEXT;
4008 return(ret);
4009}
4010
4011/**
4012 * xmlParseEnumerationType:
4013 * @ctxt: an XML parser context
4014 *
4015 * parse an Enumeration attribute type.
4016 *
4017 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4018 *
4019 * [ VC: Enumeration ]
4020 * Values of this type must match one of the Nmtoken tokens in
4021 * the declaration
4022 *
4023 * Returns: the enumeration attribute tree built while parsing
4024 */
4025
4026xmlEnumerationPtr
4027xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4028 xmlChar *name;
4029 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4030
4031 if (RAW != '(') {
4032 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4034 ctxt->sax->error(ctxt->userData,
4035 "'(' required to start ATTLIST enumeration\n");
4036 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004037 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004038 return(NULL);
4039 }
4040 SHRINK;
4041 do {
4042 NEXT;
4043 SKIP_BLANKS;
4044 name = xmlParseNmtoken(ctxt);
4045 if (name == NULL) {
4046 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4048 ctxt->sax->error(ctxt->userData,
4049 "NmToken expected in ATTLIST enumeration\n");
4050 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004051 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004052 return(ret);
4053 }
4054 cur = xmlCreateEnumeration(name);
4055 xmlFree(name);
4056 if (cur == NULL) return(ret);
4057 if (last == NULL) ret = last = cur;
4058 else {
4059 last->next = cur;
4060 last = cur;
4061 }
4062 SKIP_BLANKS;
4063 } while (RAW == '|');
4064 if (RAW != ')') {
4065 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4067 ctxt->sax->error(ctxt->userData,
4068 "')' required to finish ATTLIST enumeration\n");
4069 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004070 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004071 return(ret);
4072 }
4073 NEXT;
4074 return(ret);
4075}
4076
4077/**
4078 * xmlParseEnumeratedType:
4079 * @ctxt: an XML parser context
4080 * @tree: the enumeration tree built while parsing
4081 *
4082 * parse an Enumerated attribute type.
4083 *
4084 * [57] EnumeratedType ::= NotationType | Enumeration
4085 *
4086 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4087 *
4088 *
4089 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4090 */
4091
4092int
4093xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4094 if ((RAW == 'N') && (NXT(1) == 'O') &&
4095 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4096 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4097 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4098 SKIP(8);
4099 if (!IS_BLANK(CUR)) {
4100 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4102 ctxt->sax->error(ctxt->userData,
4103 "Space required after 'NOTATION'\n");
4104 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004105 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004106 return(0);
4107 }
4108 SKIP_BLANKS;
4109 *tree = xmlParseNotationType(ctxt);
4110 if (*tree == NULL) return(0);
4111 return(XML_ATTRIBUTE_NOTATION);
4112 }
4113 *tree = xmlParseEnumerationType(ctxt);
4114 if (*tree == NULL) return(0);
4115 return(XML_ATTRIBUTE_ENUMERATION);
4116}
4117
4118/**
4119 * xmlParseAttributeType:
4120 * @ctxt: an XML parser context
4121 * @tree: the enumeration tree built while parsing
4122 *
4123 * parse the Attribute list def for an element
4124 *
4125 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4126 *
4127 * [55] StringType ::= 'CDATA'
4128 *
4129 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4130 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4131 *
4132 * Validity constraints for attribute values syntax are checked in
4133 * xmlValidateAttributeValue()
4134 *
4135 * [ VC: ID ]
4136 * Values of type ID must match the Name production. A name must not
4137 * appear more than once in an XML document as a value of this type;
4138 * i.e., ID values must uniquely identify the elements which bear them.
4139 *
4140 * [ VC: One ID per Element Type ]
4141 * No element type may have more than one ID attribute specified.
4142 *
4143 * [ VC: ID Attribute Default ]
4144 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4145 *
4146 * [ VC: IDREF ]
4147 * Values of type IDREF must match the Name production, and values
4148 * of type IDREFS must match Names; each IDREF Name must match the value
4149 * of an ID attribute on some element in the XML document; i.e. IDREF
4150 * values must match the value of some ID attribute.
4151 *
4152 * [ VC: Entity Name ]
4153 * Values of type ENTITY must match the Name production, values
4154 * of type ENTITIES must match Names; each Entity Name must match the
4155 * name of an unparsed entity declared in the DTD.
4156 *
4157 * [ VC: Name Token ]
4158 * Values of type NMTOKEN must match the Nmtoken production; values
4159 * of type NMTOKENS must match Nmtokens.
4160 *
4161 * Returns the attribute type
4162 */
4163int
4164xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4165 SHRINK;
4166 if ((RAW == 'C') && (NXT(1) == 'D') &&
4167 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4168 (NXT(4) == 'A')) {
4169 SKIP(5);
4170 return(XML_ATTRIBUTE_CDATA);
4171 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4172 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4173 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4174 SKIP(6);
4175 return(XML_ATTRIBUTE_IDREFS);
4176 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4177 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4178 (NXT(4) == 'F')) {
4179 SKIP(5);
4180 return(XML_ATTRIBUTE_IDREF);
4181 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4182 SKIP(2);
4183 return(XML_ATTRIBUTE_ID);
4184 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4185 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4186 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4187 SKIP(6);
4188 return(XML_ATTRIBUTE_ENTITY);
4189 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4190 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4191 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4192 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4193 SKIP(8);
4194 return(XML_ATTRIBUTE_ENTITIES);
4195 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4196 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4197 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4198 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4199 SKIP(8);
4200 return(XML_ATTRIBUTE_NMTOKENS);
4201 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4202 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4203 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4204 (NXT(6) == 'N')) {
4205 SKIP(7);
4206 return(XML_ATTRIBUTE_NMTOKEN);
4207 }
4208 return(xmlParseEnumeratedType(ctxt, tree));
4209}
4210
4211/**
4212 * xmlParseAttributeListDecl:
4213 * @ctxt: an XML parser context
4214 *
4215 * : parse the Attribute list def for an element
4216 *
4217 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4218 *
4219 * [53] AttDef ::= S Name S AttType S DefaultDecl
4220 *
4221 */
4222void
4223xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4224 xmlChar *elemName;
4225 xmlChar *attrName;
4226 xmlEnumerationPtr tree;
4227
4228 if ((RAW == '<') && (NXT(1) == '!') &&
4229 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4230 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4231 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4232 (NXT(8) == 'T')) {
4233 xmlParserInputPtr input = ctxt->input;
4234
4235 SKIP(9);
4236 if (!IS_BLANK(CUR)) {
4237 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4239 ctxt->sax->error(ctxt->userData,
4240 "Space required after '<!ATTLIST'\n");
4241 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004242 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004243 }
4244 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004245 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004246 if (elemName == NULL) {
4247 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4249 ctxt->sax->error(ctxt->userData,
4250 "ATTLIST: no name for Element\n");
4251 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004252 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004253 return;
4254 }
4255 SKIP_BLANKS;
4256 GROW;
4257 while (RAW != '>') {
4258 const xmlChar *check = CUR_PTR;
4259 int type;
4260 int def;
4261 xmlChar *defaultValue = NULL;
4262
4263 GROW;
4264 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004265 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004266 if (attrName == NULL) {
4267 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4269 ctxt->sax->error(ctxt->userData,
4270 "ATTLIST: no name for Attribute\n");
4271 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004272 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004273 break;
4274 }
4275 GROW;
4276 if (!IS_BLANK(CUR)) {
4277 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4279 ctxt->sax->error(ctxt->userData,
4280 "Space required after the attribute name\n");
4281 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004282 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004283 if (attrName != NULL)
4284 xmlFree(attrName);
4285 if (defaultValue != NULL)
4286 xmlFree(defaultValue);
4287 break;
4288 }
4289 SKIP_BLANKS;
4290
4291 type = xmlParseAttributeType(ctxt, &tree);
4292 if (type <= 0) {
4293 if (attrName != NULL)
4294 xmlFree(attrName);
4295 if (defaultValue != NULL)
4296 xmlFree(defaultValue);
4297 break;
4298 }
4299
4300 GROW;
4301 if (!IS_BLANK(CUR)) {
4302 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4304 ctxt->sax->error(ctxt->userData,
4305 "Space required after the attribute type\n");
4306 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004307 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004308 if (attrName != NULL)
4309 xmlFree(attrName);
4310 if (defaultValue != NULL)
4311 xmlFree(defaultValue);
4312 if (tree != NULL)
4313 xmlFreeEnumeration(tree);
4314 break;
4315 }
4316 SKIP_BLANKS;
4317
4318 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4319 if (def <= 0) {
4320 if (attrName != NULL)
4321 xmlFree(attrName);
4322 if (defaultValue != NULL)
4323 xmlFree(defaultValue);
4324 if (tree != NULL)
4325 xmlFreeEnumeration(tree);
4326 break;
4327 }
4328
4329 GROW;
4330 if (RAW != '>') {
4331 if (!IS_BLANK(CUR)) {
4332 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4334 ctxt->sax->error(ctxt->userData,
4335 "Space required after the attribute default value\n");
4336 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004337 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004338 if (attrName != NULL)
4339 xmlFree(attrName);
4340 if (defaultValue != NULL)
4341 xmlFree(defaultValue);
4342 if (tree != NULL)
4343 xmlFreeEnumeration(tree);
4344 break;
4345 }
4346 SKIP_BLANKS;
4347 }
4348 if (check == CUR_PTR) {
4349 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4351 ctxt->sax->error(ctxt->userData,
4352 "xmlParseAttributeListDecl: detected internal error\n");
4353 if (attrName != NULL)
4354 xmlFree(attrName);
4355 if (defaultValue != NULL)
4356 xmlFree(defaultValue);
4357 if (tree != NULL)
4358 xmlFreeEnumeration(tree);
4359 break;
4360 }
4361 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4362 (ctxt->sax->attributeDecl != NULL))
4363 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4364 type, def, defaultValue, tree);
4365 if (attrName != NULL)
4366 xmlFree(attrName);
4367 if (defaultValue != NULL)
4368 xmlFree(defaultValue);
4369 GROW;
4370 }
4371 if (RAW == '>') {
4372 if (input != ctxt->input) {
4373 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4375 ctxt->sax->error(ctxt->userData,
4376"Attribute list declaration doesn't start and stop in the same entity\n");
4377 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004378 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004379 }
4380 NEXT;
4381 }
4382
4383 xmlFree(elemName);
4384 }
4385}
4386
4387/**
4388 * xmlParseElementMixedContentDecl:
4389 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004390 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004391 *
4392 * parse the declaration for a Mixed Element content
4393 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4394 *
4395 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4396 * '(' S? '#PCDATA' S? ')'
4397 *
4398 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4399 *
4400 * [ VC: No Duplicate Types ]
4401 * The same name must not appear more than once in a single
4402 * mixed-content declaration.
4403 *
4404 * returns: the list of the xmlElementContentPtr describing the element choices
4405 */
4406xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004407xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004408 xmlElementContentPtr ret = NULL, cur = NULL, n;
4409 xmlChar *elem = NULL;
4410
4411 GROW;
4412 if ((RAW == '#') && (NXT(1) == 'P') &&
4413 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4414 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4415 (NXT(6) == 'A')) {
4416 SKIP(7);
4417 SKIP_BLANKS;
4418 SHRINK;
4419 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004420 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4421 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4422 if (ctxt->vctxt.error != NULL)
4423 ctxt->vctxt.error(ctxt->vctxt.userData,
4424"Element content declaration doesn't start and stop in the same entity\n");
4425 ctxt->valid = 0;
4426 }
Owen Taylor3473f882001-02-23 17:55:21 +00004427 NEXT;
4428 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4429 if (RAW == '*') {
4430 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4431 NEXT;
4432 }
4433 return(ret);
4434 }
4435 if ((RAW == '(') || (RAW == '|')) {
4436 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4437 if (ret == NULL) return(NULL);
4438 }
4439 while (RAW == '|') {
4440 NEXT;
4441 if (elem == NULL) {
4442 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4443 if (ret == NULL) return(NULL);
4444 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004445 if (cur != NULL)
4446 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004447 cur = ret;
4448 } else {
4449 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4450 if (n == NULL) return(NULL);
4451 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004452 if (n->c1 != NULL)
4453 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004454 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004455 if (n != NULL)
4456 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004457 cur = n;
4458 xmlFree(elem);
4459 }
4460 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004461 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004462 if (elem == NULL) {
4463 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4465 ctxt->sax->error(ctxt->userData,
4466 "xmlParseElementMixedContentDecl : Name expected\n");
4467 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004468 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004469 xmlFreeElementContent(cur);
4470 return(NULL);
4471 }
4472 SKIP_BLANKS;
4473 GROW;
4474 }
4475 if ((RAW == ')') && (NXT(1) == '*')) {
4476 if (elem != NULL) {
4477 cur->c2 = xmlNewElementContent(elem,
4478 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004479 if (cur->c2 != NULL)
4480 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004481 xmlFree(elem);
4482 }
4483 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004484 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4485 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4486 if (ctxt->vctxt.error != NULL)
4487 ctxt->vctxt.error(ctxt->vctxt.userData,
4488"Element content declaration doesn't start and stop in the same entity\n");
4489 ctxt->valid = 0;
4490 }
Owen Taylor3473f882001-02-23 17:55:21 +00004491 SKIP(2);
4492 } else {
4493 if (elem != NULL) xmlFree(elem);
4494 xmlFreeElementContent(ret);
4495 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4497 ctxt->sax->error(ctxt->userData,
4498 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4499 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004500 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004501 return(NULL);
4502 }
4503
4504 } else {
4505 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4507 ctxt->sax->error(ctxt->userData,
4508 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4509 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004510 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004511 }
4512 return(ret);
4513}
4514
4515/**
4516 * xmlParseElementChildrenContentDecl:
4517 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004518 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004519 *
4520 * parse the declaration for a Mixed Element content
4521 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4522 *
4523 *
4524 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4525 *
4526 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4527 *
4528 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4529 *
4530 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4531 *
4532 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4533 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004534 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004535 * opening or closing parentheses in a choice, seq, or Mixed
4536 * construct is contained in the replacement text for a parameter
4537 * entity, both must be contained in the same replacement text. For
4538 * interoperability, if a parameter-entity reference appears in a
4539 * choice, seq, or Mixed construct, its replacement text should not
4540 * be empty, and neither the first nor last non-blank character of
4541 * the replacement text should be a connector (| or ,).
4542 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004543 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004544 * hierarchy.
4545 */
4546xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004547xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004548(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004549 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4550 xmlChar *elem;
4551 xmlChar type = 0;
4552
4553 SKIP_BLANKS;
4554 GROW;
4555 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004556 xmlParserInputPtr input = ctxt->input;
4557
Owen Taylor3473f882001-02-23 17:55:21 +00004558 /* Recurse on first child */
4559 NEXT;
4560 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004561 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004562 SKIP_BLANKS;
4563 GROW;
4564 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004565 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004566 if (elem == NULL) {
4567 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4569 ctxt->sax->error(ctxt->userData,
4570 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4571 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004572 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004573 return(NULL);
4574 }
4575 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4576 GROW;
4577 if (RAW == '?') {
4578 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4579 NEXT;
4580 } else if (RAW == '*') {
4581 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4582 NEXT;
4583 } else if (RAW == '+') {
4584 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4585 NEXT;
4586 } else {
4587 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4588 }
4589 xmlFree(elem);
4590 GROW;
4591 }
4592 SKIP_BLANKS;
4593 SHRINK;
4594 while (RAW != ')') {
4595 /*
4596 * Each loop we parse one separator and one element.
4597 */
4598 if (RAW == ',') {
4599 if (type == 0) type = CUR;
4600
4601 /*
4602 * Detect "Name | Name , Name" error
4603 */
4604 else if (type != CUR) {
4605 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4607 ctxt->sax->error(ctxt->userData,
4608 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4609 type);
4610 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004611 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004612 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004613 xmlFreeElementContent(last);
4614 if (ret != NULL)
4615 xmlFreeElementContent(ret);
4616 return(NULL);
4617 }
4618 NEXT;
4619
4620 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4621 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004622 if ((last != NULL) && (last != ret))
4623 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004624 xmlFreeElementContent(ret);
4625 return(NULL);
4626 }
4627 if (last == NULL) {
4628 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004629 if (ret != NULL)
4630 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004631 ret = cur = op;
4632 } else {
4633 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004634 if (op != NULL)
4635 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004636 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004637 if (last != NULL)
4638 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004639 cur =op;
4640 last = NULL;
4641 }
4642 } else if (RAW == '|') {
4643 if (type == 0) type = CUR;
4644
4645 /*
4646 * Detect "Name , Name | Name" error
4647 */
4648 else if (type != CUR) {
4649 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4651 ctxt->sax->error(ctxt->userData,
4652 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4653 type);
4654 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004656 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004657 xmlFreeElementContent(last);
4658 if (ret != NULL)
4659 xmlFreeElementContent(ret);
4660 return(NULL);
4661 }
4662 NEXT;
4663
4664 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4665 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004666 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004667 xmlFreeElementContent(last);
4668 if (ret != NULL)
4669 xmlFreeElementContent(ret);
4670 return(NULL);
4671 }
4672 if (last == NULL) {
4673 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004674 if (ret != NULL)
4675 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004676 ret = cur = op;
4677 } else {
4678 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004679 if (op != NULL)
4680 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004681 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004682 if (last != NULL)
4683 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004684 cur =op;
4685 last = NULL;
4686 }
4687 } else {
4688 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4690 ctxt->sax->error(ctxt->userData,
4691 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4692 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004693 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004694 if (ret != NULL)
4695 xmlFreeElementContent(ret);
4696 return(NULL);
4697 }
4698 GROW;
4699 SKIP_BLANKS;
4700 GROW;
4701 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004702 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004703 /* Recurse on second child */
4704 NEXT;
4705 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004706 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004707 SKIP_BLANKS;
4708 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004709 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004710 if (elem == NULL) {
4711 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4713 ctxt->sax->error(ctxt->userData,
4714 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4715 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004716 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004717 if (ret != NULL)
4718 xmlFreeElementContent(ret);
4719 return(NULL);
4720 }
4721 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4722 xmlFree(elem);
4723 if (RAW == '?') {
4724 last->ocur = XML_ELEMENT_CONTENT_OPT;
4725 NEXT;
4726 } else if (RAW == '*') {
4727 last->ocur = XML_ELEMENT_CONTENT_MULT;
4728 NEXT;
4729 } else if (RAW == '+') {
4730 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4731 NEXT;
4732 } else {
4733 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4734 }
4735 }
4736 SKIP_BLANKS;
4737 GROW;
4738 }
4739 if ((cur != NULL) && (last != NULL)) {
4740 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004741 if (last != NULL)
4742 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004743 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004744 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4745 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4746 if (ctxt->vctxt.error != NULL)
4747 ctxt->vctxt.error(ctxt->vctxt.userData,
4748"Element content declaration doesn't start and stop in the same entity\n");
4749 ctxt->valid = 0;
4750 }
Owen Taylor3473f882001-02-23 17:55:21 +00004751 NEXT;
4752 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004753 if (ret != NULL)
4754 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004755 NEXT;
4756 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004757 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004758 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004759 cur = ret;
4760 /*
4761 * Some normalization:
4762 * (a | b* | c?)* == (a | b | c)*
4763 */
4764 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4765 if ((cur->c1 != NULL) &&
4766 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4767 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4768 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4769 if ((cur->c2 != NULL) &&
4770 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4771 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4772 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4773 cur = cur->c2;
4774 }
4775 }
Owen Taylor3473f882001-02-23 17:55:21 +00004776 NEXT;
4777 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004778 if (ret != NULL) {
4779 int found = 0;
4780
Daniel Veillarde470df72001-04-18 21:41:07 +00004781 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004782 /*
4783 * Some normalization:
4784 * (a | b*)+ == (a | b)*
4785 * (a | b?)+ == (a | b)*
4786 */
4787 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4788 if ((cur->c1 != NULL) &&
4789 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4790 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4791 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4792 found = 1;
4793 }
4794 if ((cur->c2 != NULL) &&
4795 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4796 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4797 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4798 found = 1;
4799 }
4800 cur = cur->c2;
4801 }
4802 if (found)
4803 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4804 }
Owen Taylor3473f882001-02-23 17:55:21 +00004805 NEXT;
4806 }
4807 return(ret);
4808}
4809
4810/**
4811 * xmlParseElementContentDecl:
4812 * @ctxt: an XML parser context
4813 * @name: the name of the element being defined.
4814 * @result: the Element Content pointer will be stored here if any
4815 *
4816 * parse the declaration for an Element content either Mixed or Children,
4817 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4818 *
4819 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4820 *
4821 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4822 */
4823
4824int
4825xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4826 xmlElementContentPtr *result) {
4827
4828 xmlElementContentPtr tree = NULL;
4829 xmlParserInputPtr input = ctxt->input;
4830 int res;
4831
4832 *result = NULL;
4833
4834 if (RAW != '(') {
4835 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4837 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004838 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004839 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004840 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004841 return(-1);
4842 }
4843 NEXT;
4844 GROW;
4845 SKIP_BLANKS;
4846 if ((RAW == '#') && (NXT(1) == 'P') &&
4847 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4848 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4849 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004850 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004851 res = XML_ELEMENT_TYPE_MIXED;
4852 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004853 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004854 res = XML_ELEMENT_TYPE_ELEMENT;
4855 }
Owen Taylor3473f882001-02-23 17:55:21 +00004856 SKIP_BLANKS;
4857 *result = tree;
4858 return(res);
4859}
4860
4861/**
4862 * xmlParseElementDecl:
4863 * @ctxt: an XML parser context
4864 *
4865 * parse an Element declaration.
4866 *
4867 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4868 *
4869 * [ VC: Unique Element Type Declaration ]
4870 * No element type may be declared more than once
4871 *
4872 * Returns the type of the element, or -1 in case of error
4873 */
4874int
4875xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4876 xmlChar *name;
4877 int ret = -1;
4878 xmlElementContentPtr content = NULL;
4879
4880 GROW;
4881 if ((RAW == '<') && (NXT(1) == '!') &&
4882 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4883 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4884 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4885 (NXT(8) == 'T')) {
4886 xmlParserInputPtr input = ctxt->input;
4887
4888 SKIP(9);
4889 if (!IS_BLANK(CUR)) {
4890 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4892 ctxt->sax->error(ctxt->userData,
4893 "Space required after 'ELEMENT'\n");
4894 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004895 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004896 }
4897 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004898 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004899 if (name == NULL) {
4900 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4902 ctxt->sax->error(ctxt->userData,
4903 "xmlParseElementDecl: no name for Element\n");
4904 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004905 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004906 return(-1);
4907 }
4908 while ((RAW == 0) && (ctxt->inputNr > 1))
4909 xmlPopInput(ctxt);
4910 if (!IS_BLANK(CUR)) {
4911 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4913 ctxt->sax->error(ctxt->userData,
4914 "Space required after the element name\n");
4915 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004916 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004917 }
4918 SKIP_BLANKS;
4919 if ((RAW == 'E') && (NXT(1) == 'M') &&
4920 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4921 (NXT(4) == 'Y')) {
4922 SKIP(5);
4923 /*
4924 * Element must always be empty.
4925 */
4926 ret = XML_ELEMENT_TYPE_EMPTY;
4927 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4928 (NXT(2) == 'Y')) {
4929 SKIP(3);
4930 /*
4931 * Element is a generic container.
4932 */
4933 ret = XML_ELEMENT_TYPE_ANY;
4934 } else if (RAW == '(') {
4935 ret = xmlParseElementContentDecl(ctxt, name, &content);
4936 } else {
4937 /*
4938 * [ WFC: PEs in Internal Subset ] error handling.
4939 */
4940 if ((RAW == '%') && (ctxt->external == 0) &&
4941 (ctxt->inputNr == 1)) {
4942 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4944 ctxt->sax->error(ctxt->userData,
4945 "PEReference: forbidden within markup decl in internal subset\n");
4946 } else {
4947 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4949 ctxt->sax->error(ctxt->userData,
4950 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4951 }
4952 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004953 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004954 if (name != NULL) xmlFree(name);
4955 return(-1);
4956 }
4957
4958 SKIP_BLANKS;
4959 /*
4960 * Pop-up of finished entities.
4961 */
4962 while ((RAW == 0) && (ctxt->inputNr > 1))
4963 xmlPopInput(ctxt);
4964 SKIP_BLANKS;
4965
4966 if (RAW != '>') {
4967 ctxt->errNo = XML_ERR_GT_REQUIRED;
4968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4969 ctxt->sax->error(ctxt->userData,
4970 "xmlParseElementDecl: expected '>' at the end\n");
4971 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004972 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004973 } else {
4974 if (input != ctxt->input) {
4975 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4977 ctxt->sax->error(ctxt->userData,
4978"Element declaration doesn't start and stop in the same entity\n");
4979 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004980 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004981 }
4982
4983 NEXT;
4984 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4985 (ctxt->sax->elementDecl != NULL))
4986 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4987 content);
4988 }
4989 if (content != NULL) {
4990 xmlFreeElementContent(content);
4991 }
4992 if (name != NULL) {
4993 xmlFree(name);
4994 }
4995 }
4996 return(ret);
4997}
4998
4999/**
Owen Taylor3473f882001-02-23 17:55:21 +00005000 * xmlParseConditionalSections
5001 * @ctxt: an XML parser context
5002 *
5003 * [61] conditionalSect ::= includeSect | ignoreSect
5004 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5005 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5006 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5007 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5008 */
5009
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005010static void
Owen Taylor3473f882001-02-23 17:55:21 +00005011xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5012 SKIP(3);
5013 SKIP_BLANKS;
5014 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5015 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5016 (NXT(6) == 'E')) {
5017 SKIP(7);
5018 SKIP_BLANKS;
5019 if (RAW != '[') {
5020 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5022 ctxt->sax->error(ctxt->userData,
5023 "XML conditional section '[' expected\n");
5024 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005026 } else {
5027 NEXT;
5028 }
5029 if (xmlParserDebugEntities) {
5030 if ((ctxt->input != NULL) && (ctxt->input->filename))
5031 xmlGenericError(xmlGenericErrorContext,
5032 "%s(%d): ", ctxt->input->filename,
5033 ctxt->input->line);
5034 xmlGenericError(xmlGenericErrorContext,
5035 "Entering INCLUDE Conditional Section\n");
5036 }
5037
5038 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5039 (NXT(2) != '>'))) {
5040 const xmlChar *check = CUR_PTR;
5041 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005042
5043 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5044 xmlParseConditionalSections(ctxt);
5045 } else if (IS_BLANK(CUR)) {
5046 NEXT;
5047 } else if (RAW == '%') {
5048 xmlParsePEReference(ctxt);
5049 } else
5050 xmlParseMarkupDecl(ctxt);
5051
5052 /*
5053 * Pop-up of finished entities.
5054 */
5055 while ((RAW == 0) && (ctxt->inputNr > 1))
5056 xmlPopInput(ctxt);
5057
Daniel Veillardfdc91562002-07-01 21:52:03 +00005058 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005059 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5061 ctxt->sax->error(ctxt->userData,
5062 "Content error in the external subset\n");
5063 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005064 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005065 break;
5066 }
5067 }
5068 if (xmlParserDebugEntities) {
5069 if ((ctxt->input != NULL) && (ctxt->input->filename))
5070 xmlGenericError(xmlGenericErrorContext,
5071 "%s(%d): ", ctxt->input->filename,
5072 ctxt->input->line);
5073 xmlGenericError(xmlGenericErrorContext,
5074 "Leaving INCLUDE Conditional Section\n");
5075 }
5076
5077 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5078 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5079 int state;
5080 int instate;
5081 int depth = 0;
5082
5083 SKIP(6);
5084 SKIP_BLANKS;
5085 if (RAW != '[') {
5086 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5088 ctxt->sax->error(ctxt->userData,
5089 "XML conditional section '[' expected\n");
5090 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005091 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005092 } else {
5093 NEXT;
5094 }
5095 if (xmlParserDebugEntities) {
5096 if ((ctxt->input != NULL) && (ctxt->input->filename))
5097 xmlGenericError(xmlGenericErrorContext,
5098 "%s(%d): ", ctxt->input->filename,
5099 ctxt->input->line);
5100 xmlGenericError(xmlGenericErrorContext,
5101 "Entering IGNORE Conditional Section\n");
5102 }
5103
5104 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005105 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005106 * But disable SAX event generating DTD building in the meantime
5107 */
5108 state = ctxt->disableSAX;
5109 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005110 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005111 ctxt->instate = XML_PARSER_IGNORE;
5112
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005113 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005114 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5115 depth++;
5116 SKIP(3);
5117 continue;
5118 }
5119 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5120 if (--depth >= 0) SKIP(3);
5121 continue;
5122 }
5123 NEXT;
5124 continue;
5125 }
5126
5127 ctxt->disableSAX = state;
5128 ctxt->instate = instate;
5129
5130 if (xmlParserDebugEntities) {
5131 if ((ctxt->input != NULL) && (ctxt->input->filename))
5132 xmlGenericError(xmlGenericErrorContext,
5133 "%s(%d): ", ctxt->input->filename,
5134 ctxt->input->line);
5135 xmlGenericError(xmlGenericErrorContext,
5136 "Leaving IGNORE Conditional Section\n");
5137 }
5138
5139 } else {
5140 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5142 ctxt->sax->error(ctxt->userData,
5143 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5144 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005145 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005146 }
5147
5148 if (RAW == 0)
5149 SHRINK;
5150
5151 if (RAW == 0) {
5152 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5154 ctxt->sax->error(ctxt->userData,
5155 "XML conditional section not closed\n");
5156 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005157 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005158 } else {
5159 SKIP(3);
5160 }
5161}
5162
5163/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005164 * xmlParseMarkupDecl:
5165 * @ctxt: an XML parser context
5166 *
5167 * parse Markup declarations
5168 *
5169 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5170 * NotationDecl | PI | Comment
5171 *
5172 * [ VC: Proper Declaration/PE Nesting ]
5173 * Parameter-entity replacement text must be properly nested with
5174 * markup declarations. That is to say, if either the first character
5175 * or the last character of a markup declaration (markupdecl above) is
5176 * contained in the replacement text for a parameter-entity reference,
5177 * both must be contained in the same replacement text.
5178 *
5179 * [ WFC: PEs in Internal Subset ]
5180 * In the internal DTD subset, parameter-entity references can occur
5181 * only where markup declarations can occur, not within markup declarations.
5182 * (This does not apply to references that occur in external parameter
5183 * entities or to the external subset.)
5184 */
5185void
5186xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5187 GROW;
5188 xmlParseElementDecl(ctxt);
5189 xmlParseAttributeListDecl(ctxt);
5190 xmlParseEntityDecl(ctxt);
5191 xmlParseNotationDecl(ctxt);
5192 xmlParsePI(ctxt);
5193 xmlParseComment(ctxt);
5194 /*
5195 * This is only for internal subset. On external entities,
5196 * the replacement is done before parsing stage
5197 */
5198 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5199 xmlParsePEReference(ctxt);
5200
5201 /*
5202 * Conditional sections are allowed from entities included
5203 * by PE References in the internal subset.
5204 */
5205 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5206 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5207 xmlParseConditionalSections(ctxt);
5208 }
5209 }
5210
5211 ctxt->instate = XML_PARSER_DTD;
5212}
5213
5214/**
5215 * xmlParseTextDecl:
5216 * @ctxt: an XML parser context
5217 *
5218 * parse an XML declaration header for external entities
5219 *
5220 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5221 *
5222 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5223 */
5224
5225void
5226xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5227 xmlChar *version;
5228
5229 /*
5230 * We know that '<?xml' is here.
5231 */
5232 if ((RAW == '<') && (NXT(1) == '?') &&
5233 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5234 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5235 SKIP(5);
5236 } else {
5237 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5239 ctxt->sax->error(ctxt->userData,
5240 "Text declaration '<?xml' required\n");
5241 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005242 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005243
5244 return;
5245 }
5246
5247 if (!IS_BLANK(CUR)) {
5248 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5250 ctxt->sax->error(ctxt->userData,
5251 "Space needed after '<?xml'\n");
5252 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005253 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005254 }
5255 SKIP_BLANKS;
5256
5257 /*
5258 * We may have the VersionInfo here.
5259 */
5260 version = xmlParseVersionInfo(ctxt);
5261 if (version == NULL)
5262 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005263 else {
5264 if (!IS_BLANK(CUR)) {
5265 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5267 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5268 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005269 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005270 }
5271 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005272 ctxt->input->version = version;
5273
5274 /*
5275 * We must have the encoding declaration
5276 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005277 xmlParseEncodingDecl(ctxt);
5278 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5279 /*
5280 * The XML REC instructs us to stop parsing right here
5281 */
5282 return;
5283 }
5284
5285 SKIP_BLANKS;
5286 if ((RAW == '?') && (NXT(1) == '>')) {
5287 SKIP(2);
5288 } else if (RAW == '>') {
5289 /* Deprecated old WD ... */
5290 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5292 ctxt->sax->error(ctxt->userData,
5293 "XML declaration must end-up with '?>'\n");
5294 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005295 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005296 NEXT;
5297 } else {
5298 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5300 ctxt->sax->error(ctxt->userData,
5301 "parsing XML declaration: '?>' expected\n");
5302 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005303 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005304 MOVETO_ENDTAG(CUR_PTR);
5305 NEXT;
5306 }
5307}
5308
5309/**
Owen Taylor3473f882001-02-23 17:55:21 +00005310 * xmlParseExternalSubset:
5311 * @ctxt: an XML parser context
5312 * @ExternalID: the external identifier
5313 * @SystemID: the system identifier (or URL)
5314 *
5315 * parse Markup declarations from an external subset
5316 *
5317 * [30] extSubset ::= textDecl? extSubsetDecl
5318 *
5319 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5320 */
5321void
5322xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5323 const xmlChar *SystemID) {
5324 GROW;
5325 if ((RAW == '<') && (NXT(1) == '?') &&
5326 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5327 (NXT(4) == 'l')) {
5328 xmlParseTextDecl(ctxt);
5329 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5330 /*
5331 * The XML REC instructs us to stop parsing right here
5332 */
5333 ctxt->instate = XML_PARSER_EOF;
5334 return;
5335 }
5336 }
5337 if (ctxt->myDoc == NULL) {
5338 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5339 }
5340 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5341 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5342
5343 ctxt->instate = XML_PARSER_DTD;
5344 ctxt->external = 1;
5345 while (((RAW == '<') && (NXT(1) == '?')) ||
5346 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005347 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005348 const xmlChar *check = CUR_PTR;
5349 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005350
5351 GROW;
5352 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5353 xmlParseConditionalSections(ctxt);
5354 } else if (IS_BLANK(CUR)) {
5355 NEXT;
5356 } else if (RAW == '%') {
5357 xmlParsePEReference(ctxt);
5358 } else
5359 xmlParseMarkupDecl(ctxt);
5360
5361 /*
5362 * Pop-up of finished entities.
5363 */
5364 while ((RAW == 0) && (ctxt->inputNr > 1))
5365 xmlPopInput(ctxt);
5366
Daniel Veillardfdc91562002-07-01 21:52:03 +00005367 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005368 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5370 ctxt->sax->error(ctxt->userData,
5371 "Content error in the external subset\n");
5372 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005373 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005374 break;
5375 }
5376 }
5377
5378 if (RAW != 0) {
5379 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5381 ctxt->sax->error(ctxt->userData,
5382 "Extra content at the end of the document\n");
5383 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005384 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005385 }
5386
5387}
5388
5389/**
5390 * xmlParseReference:
5391 * @ctxt: an XML parser context
5392 *
5393 * parse and handle entity references in content, depending on the SAX
5394 * interface, this may end-up in a call to character() if this is a
5395 * CharRef, a predefined entity, if there is no reference() callback.
5396 * or if the parser was asked to switch to that mode.
5397 *
5398 * [67] Reference ::= EntityRef | CharRef
5399 */
5400void
5401xmlParseReference(xmlParserCtxtPtr ctxt) {
5402 xmlEntityPtr ent;
5403 xmlChar *val;
5404 if (RAW != '&') return;
5405
5406 if (NXT(1) == '#') {
5407 int i = 0;
5408 xmlChar out[10];
5409 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005410 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005411
5412 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5413 /*
5414 * So we are using non-UTF-8 buffers
5415 * Check that the char fit on 8bits, if not
5416 * generate a CharRef.
5417 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005418 if (value <= 0xFF) {
5419 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005420 out[1] = 0;
5421 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5422 (!ctxt->disableSAX))
5423 ctxt->sax->characters(ctxt->userData, out, 1);
5424 } else {
5425 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005426 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005427 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005428 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005429 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5430 (!ctxt->disableSAX))
5431 ctxt->sax->reference(ctxt->userData, out);
5432 }
5433 } else {
5434 /*
5435 * Just encode the value in UTF-8
5436 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005437 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005438 out[i] = 0;
5439 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5440 (!ctxt->disableSAX))
5441 ctxt->sax->characters(ctxt->userData, out, i);
5442 }
5443 } else {
5444 ent = xmlParseEntityRef(ctxt);
5445 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005446 if (!ctxt->wellFormed)
5447 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005448 if ((ent->name != NULL) &&
5449 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5450 xmlNodePtr list = NULL;
5451 int ret;
5452
5453
5454 /*
5455 * The first reference to the entity trigger a parsing phase
5456 * where the ent->children is filled with the result from
5457 * the parsing.
5458 */
5459 if (ent->children == NULL) {
5460 xmlChar *value;
5461 value = ent->content;
5462
5463 /*
5464 * Check that this entity is well formed
5465 */
5466 if ((value != NULL) &&
5467 (value[1] == 0) && (value[0] == '<') &&
5468 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5469 /*
5470 * DONE: get definite answer on this !!!
5471 * Lots of entity decls are used to declare a single
5472 * char
5473 * <!ENTITY lt "<">
5474 * Which seems to be valid since
5475 * 2.4: The ampersand character (&) and the left angle
5476 * bracket (<) may appear in their literal form only
5477 * when used ... They are also legal within the literal
5478 * entity value of an internal entity declaration;i
5479 * see "4.3.2 Well-Formed Parsed Entities".
5480 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5481 * Looking at the OASIS test suite and James Clark
5482 * tests, this is broken. However the XML REC uses
5483 * it. Is the XML REC not well-formed ????
5484 * This is a hack to avoid this problem
5485 *
5486 * ANSWER: since lt gt amp .. are already defined,
5487 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005488 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005489 * is lousy but acceptable.
5490 */
5491 list = xmlNewDocText(ctxt->myDoc, value);
5492 if (list != NULL) {
5493 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5494 (ent->children == NULL)) {
5495 ent->children = list;
5496 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005497 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005498 list->parent = (xmlNodePtr) ent;
5499 } else {
5500 xmlFreeNodeList(list);
5501 }
5502 } else if (list != NULL) {
5503 xmlFreeNodeList(list);
5504 }
5505 } else {
5506 /*
5507 * 4.3.2: An internal general parsed entity is well-formed
5508 * if its replacement text matches the production labeled
5509 * content.
5510 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005511
5512 void *user_data;
5513 /*
5514 * This is a bit hackish but this seems the best
5515 * way to make sure both SAX and DOM entity support
5516 * behaves okay.
5517 */
5518 if (ctxt->userData == ctxt)
5519 user_data = NULL;
5520 else
5521 user_data = ctxt->userData;
5522
Owen Taylor3473f882001-02-23 17:55:21 +00005523 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5524 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005525 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5526 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005527 ctxt->depth--;
5528 } else if (ent->etype ==
5529 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5530 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005531 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005532 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005533 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005534 ctxt->depth--;
5535 } else {
5536 ret = -1;
5537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5538 ctxt->sax->error(ctxt->userData,
5539 "Internal: invalid entity type\n");
5540 }
5541 if (ret == XML_ERR_ENTITY_LOOP) {
5542 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5544 ctxt->sax->error(ctxt->userData,
5545 "Detected entity reference loop\n");
5546 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005547 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005548 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005549 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005550 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5551 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005552 (ent->children == NULL)) {
5553 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005554 if (ctxt->replaceEntities) {
5555 /*
5556 * Prune it directly in the generated document
5557 * except for single text nodes.
5558 */
5559 if ((list->type == XML_TEXT_NODE) &&
5560 (list->next == NULL)) {
5561 list->parent = (xmlNodePtr) ent;
5562 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005563 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005564 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005565 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005566 while (list != NULL) {
5567 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005568 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005569 if (list->next == NULL)
5570 ent->last = list;
5571 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005572 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005573 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005574 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5575 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005576 }
5577 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005578 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005579 while (list != NULL) {
5580 list->parent = (xmlNodePtr) ent;
5581 if (list->next == NULL)
5582 ent->last = list;
5583 list = list->next;
5584 }
Owen Taylor3473f882001-02-23 17:55:21 +00005585 }
5586 } else {
5587 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005588 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005589 }
5590 } else if (ret > 0) {
5591 ctxt->errNo = ret;
5592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5593 ctxt->sax->error(ctxt->userData,
5594 "Entity value required\n");
5595 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005596 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005597 } else if (list != NULL) {
5598 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005599 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005600 }
5601 }
5602 }
5603 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5604 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5605 /*
5606 * Create a node.
5607 */
5608 ctxt->sax->reference(ctxt->userData, ent->name);
5609 return;
5610 } else if (ctxt->replaceEntities) {
5611 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5612 /*
5613 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005614 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005615 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005616 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005617 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005618 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005619 cur = ent->children;
5620 while (cur != NULL) {
5621 new = xmlCopyNode(cur, 1);
Daniel Veillard8f872442003-01-09 23:19:02 +00005622 if (new != NULL) {
5623 new->_private = cur->_private;
5624 if (firstChild == NULL){
5625 firstChild = new;
5626 }
5627 xmlAddChild(ctxt->node, new);
Daniel Veillard8107a222002-01-13 14:10:10 +00005628 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005629 if (cur == ent->last)
5630 break;
5631 cur = cur->next;
5632 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005633 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5634 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005635 } else {
5636 /*
5637 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005638 * node with a possible previous text one which
5639 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005640 */
5641 if (ent->children->type == XML_TEXT_NODE)
5642 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5643 if ((ent->last != ent->children) &&
5644 (ent->last->type == XML_TEXT_NODE))
5645 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5646 xmlAddChildList(ctxt->node, ent->children);
5647 }
5648
Owen Taylor3473f882001-02-23 17:55:21 +00005649 /*
5650 * This is to avoid a nasty side effect, see
5651 * characters() in SAX.c
5652 */
5653 ctxt->nodemem = 0;
5654 ctxt->nodelen = 0;
5655 return;
5656 } else {
5657 /*
5658 * Probably running in SAX mode
5659 */
5660 xmlParserInputPtr input;
5661
5662 input = xmlNewEntityInputStream(ctxt, ent);
5663 xmlPushInput(ctxt, input);
5664 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5665 (RAW == '<') && (NXT(1) == '?') &&
5666 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5667 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5668 xmlParseTextDecl(ctxt);
5669 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5670 /*
5671 * The XML REC instructs us to stop parsing right here
5672 */
5673 ctxt->instate = XML_PARSER_EOF;
5674 return;
5675 }
5676 if (input->standalone == 1) {
5677 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5679 ctxt->sax->error(ctxt->userData,
5680 "external parsed entities cannot be standalone\n");
5681 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005682 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005683 }
5684 }
5685 return;
5686 }
5687 }
5688 } else {
5689 val = ent->content;
5690 if (val == NULL) return;
5691 /*
5692 * inline the entity.
5693 */
5694 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5695 (!ctxt->disableSAX))
5696 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5697 }
5698 }
5699}
5700
5701/**
5702 * xmlParseEntityRef:
5703 * @ctxt: an XML parser context
5704 *
5705 * parse ENTITY references declarations
5706 *
5707 * [68] EntityRef ::= '&' Name ';'
5708 *
5709 * [ WFC: Entity Declared ]
5710 * In a document without any DTD, a document with only an internal DTD
5711 * subset which contains no parameter entity references, or a document
5712 * with "standalone='yes'", the Name given in the entity reference
5713 * must match that in an entity declaration, except that well-formed
5714 * documents need not declare any of the following entities: amp, lt,
5715 * gt, apos, quot. The declaration of a parameter entity must precede
5716 * any reference to it. Similarly, the declaration of a general entity
5717 * must precede any reference to it which appears in a default value in an
5718 * attribute-list declaration. Note that if entities are declared in the
5719 * external subset or in external parameter entities, a non-validating
5720 * processor is not obligated to read and process their declarations;
5721 * for such documents, the rule that an entity must be declared is a
5722 * well-formedness constraint only if standalone='yes'.
5723 *
5724 * [ WFC: Parsed Entity ]
5725 * An entity reference must not contain the name of an unparsed entity
5726 *
5727 * Returns the xmlEntityPtr if found, or NULL otherwise.
5728 */
5729xmlEntityPtr
5730xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5731 xmlChar *name;
5732 xmlEntityPtr ent = NULL;
5733
5734 GROW;
5735
5736 if (RAW == '&') {
5737 NEXT;
5738 name = xmlParseName(ctxt);
5739 if (name == NULL) {
5740 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5742 ctxt->sax->error(ctxt->userData,
5743 "xmlParseEntityRef: no name\n");
5744 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005745 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005746 } else {
5747 if (RAW == ';') {
5748 NEXT;
5749 /*
5750 * Ask first SAX for entity resolution, otherwise try the
5751 * predefined set.
5752 */
5753 if (ctxt->sax != NULL) {
5754 if (ctxt->sax->getEntity != NULL)
5755 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005756 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005757 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005758 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5759 (ctxt->userData==ctxt)) {
Daniel Veillard5997aca2002-03-18 18:36:20 +00005760 ent = getEntity(ctxt, name);
5761 }
Owen Taylor3473f882001-02-23 17:55:21 +00005762 }
5763 /*
5764 * [ WFC: Entity Declared ]
5765 * In a document without any DTD, a document with only an
5766 * internal DTD subset which contains no parameter entity
5767 * references, or a document with "standalone='yes'", the
5768 * Name given in the entity reference must match that in an
5769 * entity declaration, except that well-formed documents
5770 * need not declare any of the following entities: amp, lt,
5771 * gt, apos, quot.
5772 * The declaration of a parameter entity must precede any
5773 * reference to it.
5774 * Similarly, the declaration of a general entity must
5775 * precede any reference to it which appears in a default
5776 * value in an attribute-list declaration. Note that if
5777 * entities are declared in the external subset or in
5778 * external parameter entities, a non-validating processor
5779 * is not obligated to read and process their declarations;
5780 * for such documents, the rule that an entity must be
5781 * declared is a well-formedness constraint only if
5782 * standalone='yes'.
5783 */
5784 if (ent == NULL) {
5785 if ((ctxt->standalone == 1) ||
5786 ((ctxt->hasExternalSubset == 0) &&
5787 (ctxt->hasPErefs == 0))) {
5788 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5790 ctxt->sax->error(ctxt->userData,
5791 "Entity '%s' not defined\n", name);
5792 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005793 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005794 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005795 } else {
5796 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005798 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005799 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005800 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005801 }
5802 }
5803
5804 /*
5805 * [ WFC: Parsed Entity ]
5806 * An entity reference must not contain the name of an
5807 * unparsed entity
5808 */
5809 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5810 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5812 ctxt->sax->error(ctxt->userData,
5813 "Entity reference to unparsed entity %s\n", name);
5814 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005815 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005816 }
5817
5818 /*
5819 * [ WFC: No External Entity References ]
5820 * Attribute values cannot contain direct or indirect
5821 * entity references to external entities.
5822 */
5823 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5824 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5825 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5827 ctxt->sax->error(ctxt->userData,
5828 "Attribute references external entity '%s'\n", name);
5829 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005830 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005831 }
5832 /*
5833 * [ WFC: No < in Attribute Values ]
5834 * The replacement text of any entity referred to directly or
5835 * indirectly in an attribute value (other than "&lt;") must
5836 * not contain a <.
5837 */
5838 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5839 (ent != NULL) &&
5840 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5841 (ent->content != NULL) &&
5842 (xmlStrchr(ent->content, '<'))) {
5843 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5844 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5845 ctxt->sax->error(ctxt->userData,
5846 "'<' in entity '%s' is not allowed in attributes values\n", name);
5847 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005848 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005849 }
5850
5851 /*
5852 * Internal check, no parameter entities here ...
5853 */
5854 else {
5855 switch (ent->etype) {
5856 case XML_INTERNAL_PARAMETER_ENTITY:
5857 case XML_EXTERNAL_PARAMETER_ENTITY:
5858 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5860 ctxt->sax->error(ctxt->userData,
5861 "Attempt to reference the parameter entity '%s'\n", name);
5862 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005863 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005864 break;
5865 default:
5866 break;
5867 }
5868 }
5869
5870 /*
5871 * [ WFC: No Recursion ]
5872 * A parsed entity must not contain a recursive reference
5873 * to itself, either directly or indirectly.
5874 * Done somewhere else
5875 */
5876
5877 } else {
5878 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5880 ctxt->sax->error(ctxt->userData,
5881 "xmlParseEntityRef: expecting ';'\n");
5882 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005883 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005884 }
5885 xmlFree(name);
5886 }
5887 }
5888 return(ent);
5889}
5890
5891/**
5892 * xmlParseStringEntityRef:
5893 * @ctxt: an XML parser context
5894 * @str: a pointer to an index in the string
5895 *
5896 * parse ENTITY references declarations, but this version parses it from
5897 * a string value.
5898 *
5899 * [68] EntityRef ::= '&' Name ';'
5900 *
5901 * [ WFC: Entity Declared ]
5902 * In a document without any DTD, a document with only an internal DTD
5903 * subset which contains no parameter entity references, or a document
5904 * with "standalone='yes'", the Name given in the entity reference
5905 * must match that in an entity declaration, except that well-formed
5906 * documents need not declare any of the following entities: amp, lt,
5907 * gt, apos, quot. The declaration of a parameter entity must precede
5908 * any reference to it. Similarly, the declaration of a general entity
5909 * must precede any reference to it which appears in a default value in an
5910 * attribute-list declaration. Note that if entities are declared in the
5911 * external subset or in external parameter entities, a non-validating
5912 * processor is not obligated to read and process their declarations;
5913 * for such documents, the rule that an entity must be declared is a
5914 * well-formedness constraint only if standalone='yes'.
5915 *
5916 * [ WFC: Parsed Entity ]
5917 * An entity reference must not contain the name of an unparsed entity
5918 *
5919 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5920 * is updated to the current location in the string.
5921 */
5922xmlEntityPtr
5923xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5924 xmlChar *name;
5925 const xmlChar *ptr;
5926 xmlChar cur;
5927 xmlEntityPtr ent = NULL;
5928
5929 if ((str == NULL) || (*str == NULL))
5930 return(NULL);
5931 ptr = *str;
5932 cur = *ptr;
5933 if (cur == '&') {
5934 ptr++;
5935 cur = *ptr;
5936 name = xmlParseStringName(ctxt, &ptr);
5937 if (name == NULL) {
5938 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5940 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005941 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005942 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005943 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005944 } else {
5945 if (*ptr == ';') {
5946 ptr++;
5947 /*
5948 * Ask first SAX for entity resolution, otherwise try the
5949 * predefined set.
5950 */
5951 if (ctxt->sax != NULL) {
5952 if (ctxt->sax->getEntity != NULL)
5953 ent = ctxt->sax->getEntity(ctxt->userData, name);
5954 if (ent == NULL)
5955 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005956 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5957 ent = getEntity(ctxt, name);
5958 }
Owen Taylor3473f882001-02-23 17:55:21 +00005959 }
5960 /*
5961 * [ WFC: Entity Declared ]
5962 * In a document without any DTD, a document with only an
5963 * internal DTD subset which contains no parameter entity
5964 * references, or a document with "standalone='yes'", the
5965 * Name given in the entity reference must match that in an
5966 * entity declaration, except that well-formed documents
5967 * need not declare any of the following entities: amp, lt,
5968 * gt, apos, quot.
5969 * The declaration of a parameter entity must precede any
5970 * reference to it.
5971 * Similarly, the declaration of a general entity must
5972 * precede any reference to it which appears in a default
5973 * value in an attribute-list declaration. Note that if
5974 * entities are declared in the external subset or in
5975 * external parameter entities, a non-validating processor
5976 * is not obligated to read and process their declarations;
5977 * for such documents, the rule that an entity must be
5978 * declared is a well-formedness constraint only if
5979 * standalone='yes'.
5980 */
5981 if (ent == NULL) {
5982 if ((ctxt->standalone == 1) ||
5983 ((ctxt->hasExternalSubset == 0) &&
5984 (ctxt->hasPErefs == 0))) {
5985 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5987 ctxt->sax->error(ctxt->userData,
5988 "Entity '%s' not defined\n", name);
5989 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005990 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005991 } else {
5992 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5993 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5994 ctxt->sax->warning(ctxt->userData,
5995 "Entity '%s' not defined\n", name);
5996 }
5997 }
5998
5999 /*
6000 * [ WFC: Parsed Entity ]
6001 * An entity reference must not contain the name of an
6002 * unparsed entity
6003 */
6004 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6005 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6007 ctxt->sax->error(ctxt->userData,
6008 "Entity reference to unparsed entity %s\n", name);
6009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006011 }
6012
6013 /*
6014 * [ WFC: No External Entity References ]
6015 * Attribute values cannot contain direct or indirect
6016 * entity references to external entities.
6017 */
6018 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6019 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6020 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6022 ctxt->sax->error(ctxt->userData,
6023 "Attribute references external entity '%s'\n", name);
6024 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006026 }
6027 /*
6028 * [ WFC: No < in Attribute Values ]
6029 * The replacement text of any entity referred to directly or
6030 * indirectly in an attribute value (other than "&lt;") must
6031 * not contain a <.
6032 */
6033 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6034 (ent != NULL) &&
6035 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6036 (ent->content != NULL) &&
6037 (xmlStrchr(ent->content, '<'))) {
6038 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6040 ctxt->sax->error(ctxt->userData,
6041 "'<' in entity '%s' is not allowed in attributes values\n", name);
6042 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006044 }
6045
6046 /*
6047 * Internal check, no parameter entities here ...
6048 */
6049 else {
6050 switch (ent->etype) {
6051 case XML_INTERNAL_PARAMETER_ENTITY:
6052 case XML_EXTERNAL_PARAMETER_ENTITY:
6053 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6055 ctxt->sax->error(ctxt->userData,
6056 "Attempt to reference the parameter entity '%s'\n", name);
6057 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006058 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006059 break;
6060 default:
6061 break;
6062 }
6063 }
6064
6065 /*
6066 * [ WFC: No Recursion ]
6067 * A parsed entity must not contain a recursive reference
6068 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006069 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006070 */
6071
6072 } else {
6073 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6075 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006076 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006077 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006078 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006079 }
6080 xmlFree(name);
6081 }
6082 }
6083 *str = ptr;
6084 return(ent);
6085}
6086
6087/**
6088 * xmlParsePEReference:
6089 * @ctxt: an XML parser context
6090 *
6091 * parse PEReference declarations
6092 * The entity content is handled directly by pushing it's content as
6093 * a new input stream.
6094 *
6095 * [69] PEReference ::= '%' Name ';'
6096 *
6097 * [ WFC: No Recursion ]
6098 * A parsed entity must not contain a recursive
6099 * reference to itself, either directly or indirectly.
6100 *
6101 * [ WFC: Entity Declared ]
6102 * In a document without any DTD, a document with only an internal DTD
6103 * subset which contains no parameter entity references, or a document
6104 * with "standalone='yes'", ... ... The declaration of a parameter
6105 * entity must precede any reference to it...
6106 *
6107 * [ VC: Entity Declared ]
6108 * In a document with an external subset or external parameter entities
6109 * with "standalone='no'", ... ... The declaration of a parameter entity
6110 * must precede any reference to it...
6111 *
6112 * [ WFC: In DTD ]
6113 * Parameter-entity references may only appear in the DTD.
6114 * NOTE: misleading but this is handled.
6115 */
6116void
6117xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6118 xmlChar *name;
6119 xmlEntityPtr entity = NULL;
6120 xmlParserInputPtr input;
6121
6122 if (RAW == '%') {
6123 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006124 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006125 if (name == NULL) {
6126 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6128 ctxt->sax->error(ctxt->userData,
6129 "xmlParsePEReference: no name\n");
6130 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006131 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006132 } else {
6133 if (RAW == ';') {
6134 NEXT;
6135 if ((ctxt->sax != NULL) &&
6136 (ctxt->sax->getParameterEntity != NULL))
6137 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6138 name);
6139 if (entity == NULL) {
6140 /*
6141 * [ WFC: Entity Declared ]
6142 * In a document without any DTD, a document with only an
6143 * internal DTD subset which contains no parameter entity
6144 * references, or a document with "standalone='yes'", ...
6145 * ... The declaration of a parameter entity must precede
6146 * any reference to it...
6147 */
6148 if ((ctxt->standalone == 1) ||
6149 ((ctxt->hasExternalSubset == 0) &&
6150 (ctxt->hasPErefs == 0))) {
6151 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6152 if ((!ctxt->disableSAX) &&
6153 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6154 ctxt->sax->error(ctxt->userData,
6155 "PEReference: %%%s; not found\n", name);
6156 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006157 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006158 } else {
6159 /*
6160 * [ VC: Entity Declared ]
6161 * In a document with an external subset or external
6162 * parameter entities with "standalone='no'", ...
6163 * ... The declaration of a parameter entity must precede
6164 * any reference to it...
6165 */
6166 if ((!ctxt->disableSAX) &&
6167 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6168 ctxt->sax->warning(ctxt->userData,
6169 "PEReference: %%%s; not found\n", name);
6170 ctxt->valid = 0;
6171 }
6172 } else {
6173 /*
6174 * Internal checking in case the entity quest barfed
6175 */
6176 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6177 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6178 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6179 ctxt->sax->warning(ctxt->userData,
6180 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006181 } else if (ctxt->input->free != deallocblankswrapper) {
6182 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6183 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006184 } else {
6185 /*
6186 * TODO !!!
6187 * handle the extra spaces added before and after
6188 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6189 */
6190 input = xmlNewEntityInputStream(ctxt, entity);
6191 xmlPushInput(ctxt, input);
6192 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6193 (RAW == '<') && (NXT(1) == '?') &&
6194 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6195 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6196 xmlParseTextDecl(ctxt);
6197 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6198 /*
6199 * The XML REC instructs us to stop parsing
6200 * right here
6201 */
6202 ctxt->instate = XML_PARSER_EOF;
6203 xmlFree(name);
6204 return;
6205 }
6206 }
Owen Taylor3473f882001-02-23 17:55:21 +00006207 }
6208 }
6209 ctxt->hasPErefs = 1;
6210 } else {
6211 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6213 ctxt->sax->error(ctxt->userData,
6214 "xmlParsePEReference: expecting ';'\n");
6215 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006216 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006217 }
6218 xmlFree(name);
6219 }
6220 }
6221}
6222
6223/**
6224 * xmlParseStringPEReference:
6225 * @ctxt: an XML parser context
6226 * @str: a pointer to an index in the string
6227 *
6228 * parse PEReference declarations
6229 *
6230 * [69] PEReference ::= '%' Name ';'
6231 *
6232 * [ WFC: No Recursion ]
6233 * A parsed entity must not contain a recursive
6234 * reference to itself, either directly or indirectly.
6235 *
6236 * [ WFC: Entity Declared ]
6237 * In a document without any DTD, a document with only an internal DTD
6238 * subset which contains no parameter entity references, or a document
6239 * with "standalone='yes'", ... ... The declaration of a parameter
6240 * entity must precede any reference to it...
6241 *
6242 * [ VC: Entity Declared ]
6243 * In a document with an external subset or external parameter entities
6244 * with "standalone='no'", ... ... The declaration of a parameter entity
6245 * must precede any reference to it...
6246 *
6247 * [ WFC: In DTD ]
6248 * Parameter-entity references may only appear in the DTD.
6249 * NOTE: misleading but this is handled.
6250 *
6251 * Returns the string of the entity content.
6252 * str is updated to the current value of the index
6253 */
6254xmlEntityPtr
6255xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6256 const xmlChar *ptr;
6257 xmlChar cur;
6258 xmlChar *name;
6259 xmlEntityPtr entity = NULL;
6260
6261 if ((str == NULL) || (*str == NULL)) return(NULL);
6262 ptr = *str;
6263 cur = *ptr;
6264 if (cur == '%') {
6265 ptr++;
6266 cur = *ptr;
6267 name = xmlParseStringName(ctxt, &ptr);
6268 if (name == NULL) {
6269 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6271 ctxt->sax->error(ctxt->userData,
6272 "xmlParseStringPEReference: no name\n");
6273 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006274 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006275 } else {
6276 cur = *ptr;
6277 if (cur == ';') {
6278 ptr++;
6279 cur = *ptr;
6280 if ((ctxt->sax != NULL) &&
6281 (ctxt->sax->getParameterEntity != NULL))
6282 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6283 name);
6284 if (entity == NULL) {
6285 /*
6286 * [ WFC: Entity Declared ]
6287 * In a document without any DTD, a document with only an
6288 * internal DTD subset which contains no parameter entity
6289 * references, or a document with "standalone='yes'", ...
6290 * ... The declaration of a parameter entity must precede
6291 * any reference to it...
6292 */
6293 if ((ctxt->standalone == 1) ||
6294 ((ctxt->hasExternalSubset == 0) &&
6295 (ctxt->hasPErefs == 0))) {
6296 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6298 ctxt->sax->error(ctxt->userData,
6299 "PEReference: %%%s; not found\n", name);
6300 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006301 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006302 } else {
6303 /*
6304 * [ VC: Entity Declared ]
6305 * In a document with an external subset or external
6306 * parameter entities with "standalone='no'", ...
6307 * ... The declaration of a parameter entity must
6308 * precede any reference to it...
6309 */
6310 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6311 ctxt->sax->warning(ctxt->userData,
6312 "PEReference: %%%s; not found\n", name);
6313 ctxt->valid = 0;
6314 }
6315 } else {
6316 /*
6317 * Internal checking in case the entity quest barfed
6318 */
6319 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6320 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6321 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6322 ctxt->sax->warning(ctxt->userData,
6323 "Internal: %%%s; is not a parameter entity\n", name);
6324 }
6325 }
6326 ctxt->hasPErefs = 1;
6327 } else {
6328 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6330 ctxt->sax->error(ctxt->userData,
6331 "xmlParseStringPEReference: expecting ';'\n");
6332 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006333 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006334 }
6335 xmlFree(name);
6336 }
6337 }
6338 *str = ptr;
6339 return(entity);
6340}
6341
6342/**
6343 * xmlParseDocTypeDecl:
6344 * @ctxt: an XML parser context
6345 *
6346 * parse a DOCTYPE declaration
6347 *
6348 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6349 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6350 *
6351 * [ VC: Root Element Type ]
6352 * The Name in the document type declaration must match the element
6353 * type of the root element.
6354 */
6355
6356void
6357xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6358 xmlChar *name = NULL;
6359 xmlChar *ExternalID = NULL;
6360 xmlChar *URI = NULL;
6361
6362 /*
6363 * We know that '<!DOCTYPE' has been detected.
6364 */
6365 SKIP(9);
6366
6367 SKIP_BLANKS;
6368
6369 /*
6370 * Parse the DOCTYPE name.
6371 */
6372 name = xmlParseName(ctxt);
6373 if (name == NULL) {
6374 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6376 ctxt->sax->error(ctxt->userData,
6377 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6378 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006379 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006380 }
6381 ctxt->intSubName = name;
6382
6383 SKIP_BLANKS;
6384
6385 /*
6386 * Check for SystemID and ExternalID
6387 */
6388 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6389
6390 if ((URI != NULL) || (ExternalID != NULL)) {
6391 ctxt->hasExternalSubset = 1;
6392 }
6393 ctxt->extSubURI = URI;
6394 ctxt->extSubSystem = ExternalID;
6395
6396 SKIP_BLANKS;
6397
6398 /*
6399 * Create and update the internal subset.
6400 */
6401 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6402 (!ctxt->disableSAX))
6403 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6404
6405 /*
6406 * Is there any internal subset declarations ?
6407 * they are handled separately in xmlParseInternalSubset()
6408 */
6409 if (RAW == '[')
6410 return;
6411
6412 /*
6413 * We should be at the end of the DOCTYPE declaration.
6414 */
6415 if (RAW != '>') {
6416 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6417 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006418 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006419 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006420 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006421 }
6422 NEXT;
6423}
6424
6425/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006426 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006427 * @ctxt: an XML parser context
6428 *
6429 * parse the internal subset declaration
6430 *
6431 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6432 */
6433
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006434static void
Owen Taylor3473f882001-02-23 17:55:21 +00006435xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6436 /*
6437 * Is there any DTD definition ?
6438 */
6439 if (RAW == '[') {
6440 ctxt->instate = XML_PARSER_DTD;
6441 NEXT;
6442 /*
6443 * Parse the succession of Markup declarations and
6444 * PEReferences.
6445 * Subsequence (markupdecl | PEReference | S)*
6446 */
6447 while (RAW != ']') {
6448 const xmlChar *check = CUR_PTR;
6449 int cons = ctxt->input->consumed;
6450
6451 SKIP_BLANKS;
6452 xmlParseMarkupDecl(ctxt);
6453 xmlParsePEReference(ctxt);
6454
6455 /*
6456 * Pop-up of finished entities.
6457 */
6458 while ((RAW == 0) && (ctxt->inputNr > 1))
6459 xmlPopInput(ctxt);
6460
6461 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6462 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6463 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6464 ctxt->sax->error(ctxt->userData,
6465 "xmlParseInternalSubset: error detected in Markup declaration\n");
6466 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006467 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006468 break;
6469 }
6470 }
6471 if (RAW == ']') {
6472 NEXT;
6473 SKIP_BLANKS;
6474 }
6475 }
6476
6477 /*
6478 * We should be at the end of the DOCTYPE declaration.
6479 */
6480 if (RAW != '>') {
6481 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006483 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006484 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006485 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006486 }
6487 NEXT;
6488}
6489
6490/**
6491 * xmlParseAttribute:
6492 * @ctxt: an XML parser context
6493 * @value: a xmlChar ** used to store the value of the attribute
6494 *
6495 * parse an attribute
6496 *
6497 * [41] Attribute ::= Name Eq AttValue
6498 *
6499 * [ WFC: No External Entity References ]
6500 * Attribute values cannot contain direct or indirect entity references
6501 * to external entities.
6502 *
6503 * [ WFC: No < in Attribute Values ]
6504 * The replacement text of any entity referred to directly or indirectly in
6505 * an attribute value (other than "&lt;") must not contain a <.
6506 *
6507 * [ VC: Attribute Value Type ]
6508 * The attribute must have been declared; the value must be of the type
6509 * declared for it.
6510 *
6511 * [25] Eq ::= S? '=' S?
6512 *
6513 * With namespace:
6514 *
6515 * [NS 11] Attribute ::= QName Eq AttValue
6516 *
6517 * Also the case QName == xmlns:??? is handled independently as a namespace
6518 * definition.
6519 *
6520 * Returns the attribute name, and the value in *value.
6521 */
6522
6523xmlChar *
6524xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6525 xmlChar *name, *val;
6526
6527 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006528 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006529 name = xmlParseName(ctxt);
6530 if (name == NULL) {
6531 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6533 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6534 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006535 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006536 return(NULL);
6537 }
6538
6539 /*
6540 * read the value
6541 */
6542 SKIP_BLANKS;
6543 if (RAW == '=') {
6544 NEXT;
6545 SKIP_BLANKS;
6546 val = xmlParseAttValue(ctxt);
6547 ctxt->instate = XML_PARSER_CONTENT;
6548 } else {
6549 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6551 ctxt->sax->error(ctxt->userData,
6552 "Specification mandate value for attribute %s\n", name);
6553 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006554 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006555 xmlFree(name);
6556 return(NULL);
6557 }
6558
6559 /*
6560 * Check that xml:lang conforms to the specification
6561 * No more registered as an error, just generate a warning now
6562 * since this was deprecated in XML second edition
6563 */
6564 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6565 if (!xmlCheckLanguageID(val)) {
6566 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6567 ctxt->sax->warning(ctxt->userData,
6568 "Malformed value for xml:lang : %s\n", val);
6569 }
6570 }
6571
6572 /*
6573 * Check that xml:space conforms to the specification
6574 */
6575 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6576 if (xmlStrEqual(val, BAD_CAST "default"))
6577 *(ctxt->space) = 0;
6578 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6579 *(ctxt->space) = 1;
6580 else {
6581 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6582 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6583 ctxt->sax->error(ctxt->userData,
6584"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6585 val);
6586 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006587 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006588 }
6589 }
6590
6591 *value = val;
6592 return(name);
6593}
6594
6595/**
6596 * xmlParseStartTag:
6597 * @ctxt: an XML parser context
6598 *
6599 * parse a start of tag either for rule element or
6600 * EmptyElement. In both case we don't parse the tag closing chars.
6601 *
6602 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6603 *
6604 * [ WFC: Unique Att Spec ]
6605 * No attribute name may appear more than once in the same start-tag or
6606 * empty-element tag.
6607 *
6608 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6609 *
6610 * [ WFC: Unique Att Spec ]
6611 * No attribute name may appear more than once in the same start-tag or
6612 * empty-element tag.
6613 *
6614 * With namespace:
6615 *
6616 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6617 *
6618 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6619 *
6620 * Returns the element name parsed
6621 */
6622
6623xmlChar *
6624xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6625 xmlChar *name;
6626 xmlChar *attname;
6627 xmlChar *attvalue;
6628 const xmlChar **atts = NULL;
6629 int nbatts = 0;
6630 int maxatts = 0;
6631 int i;
6632
6633 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006634 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006635
6636 name = xmlParseName(ctxt);
6637 if (name == NULL) {
6638 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6640 ctxt->sax->error(ctxt->userData,
6641 "xmlParseStartTag: invalid element name\n");
6642 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006643 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006644 return(NULL);
6645 }
6646
6647 /*
6648 * Now parse the attributes, it ends up with the ending
6649 *
6650 * (S Attribute)* S?
6651 */
6652 SKIP_BLANKS;
6653 GROW;
6654
Daniel Veillard21a0f912001-02-25 19:54:14 +00006655 while ((RAW != '>') &&
6656 ((RAW != '/') || (NXT(1) != '>')) &&
6657 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006658 const xmlChar *q = CUR_PTR;
6659 int cons = ctxt->input->consumed;
6660
6661 attname = xmlParseAttribute(ctxt, &attvalue);
6662 if ((attname != NULL) && (attvalue != NULL)) {
6663 /*
6664 * [ WFC: Unique Att Spec ]
6665 * No attribute name may appear more than once in the same
6666 * start-tag or empty-element tag.
6667 */
6668 for (i = 0; i < nbatts;i += 2) {
6669 if (xmlStrEqual(atts[i], attname)) {
6670 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6672 ctxt->sax->error(ctxt->userData,
6673 "Attribute %s redefined\n",
6674 attname);
6675 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006676 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006677 xmlFree(attname);
6678 xmlFree(attvalue);
6679 goto failed;
6680 }
6681 }
6682
6683 /*
6684 * Add the pair to atts
6685 */
6686 if (atts == NULL) {
6687 maxatts = 10;
6688 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6689 if (atts == NULL) {
6690 xmlGenericError(xmlGenericErrorContext,
6691 "malloc of %ld byte failed\n",
6692 maxatts * (long)sizeof(xmlChar *));
6693 return(NULL);
6694 }
6695 } else if (nbatts + 4 > maxatts) {
6696 maxatts *= 2;
6697 atts = (const xmlChar **) xmlRealloc((void *) atts,
6698 maxatts * sizeof(xmlChar *));
6699 if (atts == NULL) {
6700 xmlGenericError(xmlGenericErrorContext,
6701 "realloc of %ld byte failed\n",
6702 maxatts * (long)sizeof(xmlChar *));
6703 return(NULL);
6704 }
6705 }
6706 atts[nbatts++] = attname;
6707 atts[nbatts++] = attvalue;
6708 atts[nbatts] = NULL;
6709 atts[nbatts + 1] = NULL;
6710 } else {
6711 if (attname != NULL)
6712 xmlFree(attname);
6713 if (attvalue != NULL)
6714 xmlFree(attvalue);
6715 }
6716
6717failed:
6718
Daniel Veillard3772de32002-12-17 10:31:45 +00006719 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006720 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6721 break;
6722 if (!IS_BLANK(RAW)) {
6723 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6724 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6725 ctxt->sax->error(ctxt->userData,
6726 "attributes construct error\n");
6727 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006728 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006729 }
6730 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006731 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6732 (attname == NULL) && (attvalue == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006733 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6735 ctxt->sax->error(ctxt->userData,
6736 "xmlParseStartTag: problem parsing attributes\n");
6737 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006738 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006739 break;
6740 }
6741 GROW;
6742 }
6743
6744 /*
6745 * SAX: Start of Element !
6746 */
6747 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6748 (!ctxt->disableSAX))
6749 ctxt->sax->startElement(ctxt->userData, name, atts);
6750
6751 if (atts != NULL) {
6752 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6753 xmlFree((void *) atts);
6754 }
6755 return(name);
6756}
6757
6758/**
6759 * xmlParseEndTag:
6760 * @ctxt: an XML parser context
6761 *
6762 * parse an end of tag
6763 *
6764 * [42] ETag ::= '</' Name S? '>'
6765 *
6766 * With namespace
6767 *
6768 * [NS 9] ETag ::= '</' QName S? '>'
6769 */
6770
6771void
6772xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6773 xmlChar *name;
6774 xmlChar *oldname;
6775
6776 GROW;
6777 if ((RAW != '<') || (NXT(1) != '/')) {
6778 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6779 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6780 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6781 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006782 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006783 return;
6784 }
6785 SKIP(2);
6786
Daniel Veillard46de64e2002-05-29 08:21:33 +00006787 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006788
6789 /*
6790 * We should definitely be at the ending "S? '>'" part
6791 */
6792 GROW;
6793 SKIP_BLANKS;
6794 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6795 ctxt->errNo = XML_ERR_GT_REQUIRED;
6796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6797 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6798 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006799 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006800 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006801 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006802
6803 /*
6804 * [ WFC: Element Type Match ]
6805 * The Name in an element's end-tag must match the element type in the
6806 * start-tag.
6807 *
6808 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006809 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006810 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006812 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006813 ctxt->sax->error(ctxt->userData,
6814 "Opening and ending tag mismatch: %s and %s\n",
6815 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006816 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006817 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006818 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006819 }
6820
6821 }
6822 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006823 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6824#if 0
6825 else {
6826 /*
6827 * Recover in case of one missing close
6828 */
6829 if ((ctxt->nameNr > 2) &&
6830 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6831 namePop(ctxt);
6832 spacePop(ctxt);
6833 }
6834 }
6835#endif
6836 if (name != NULL)
6837 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006838 }
6839
6840 /*
6841 * SAX: End of Tag
6842 */
6843 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6844 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006845 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006846
Owen Taylor3473f882001-02-23 17:55:21 +00006847 oldname = namePop(ctxt);
6848 spacePop(ctxt);
6849 if (oldname != NULL) {
6850#ifdef DEBUG_STACK
6851 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6852#endif
6853 xmlFree(oldname);
6854 }
6855 return;
6856}
6857
6858/**
6859 * xmlParseCDSect:
6860 * @ctxt: an XML parser context
6861 *
6862 * Parse escaped pure raw content.
6863 *
6864 * [18] CDSect ::= CDStart CData CDEnd
6865 *
6866 * [19] CDStart ::= '<![CDATA['
6867 *
6868 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6869 *
6870 * [21] CDEnd ::= ']]>'
6871 */
6872void
6873xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6874 xmlChar *buf = NULL;
6875 int len = 0;
6876 int size = XML_PARSER_BUFFER_SIZE;
6877 int r, rl;
6878 int s, sl;
6879 int cur, l;
6880 int count = 0;
6881
6882 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6883 (NXT(2) == '[') && (NXT(3) == 'C') &&
6884 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6885 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6886 (NXT(8) == '[')) {
6887 SKIP(9);
6888 } else
6889 return;
6890
6891 ctxt->instate = XML_PARSER_CDATA_SECTION;
6892 r = CUR_CHAR(rl);
6893 if (!IS_CHAR(r)) {
6894 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6896 ctxt->sax->error(ctxt->userData,
6897 "CData section not finished\n");
6898 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006899 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006900 ctxt->instate = XML_PARSER_CONTENT;
6901 return;
6902 }
6903 NEXTL(rl);
6904 s = CUR_CHAR(sl);
6905 if (!IS_CHAR(s)) {
6906 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6908 ctxt->sax->error(ctxt->userData,
6909 "CData section not finished\n");
6910 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006911 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006912 ctxt->instate = XML_PARSER_CONTENT;
6913 return;
6914 }
6915 NEXTL(sl);
6916 cur = CUR_CHAR(l);
6917 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6918 if (buf == NULL) {
6919 xmlGenericError(xmlGenericErrorContext,
6920 "malloc of %d byte failed\n", size);
6921 return;
6922 }
6923 while (IS_CHAR(cur) &&
6924 ((r != ']') || (s != ']') || (cur != '>'))) {
6925 if (len + 5 >= size) {
6926 size *= 2;
6927 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6928 if (buf == NULL) {
6929 xmlGenericError(xmlGenericErrorContext,
6930 "realloc of %d byte failed\n", size);
6931 return;
6932 }
6933 }
6934 COPY_BUF(rl,buf,len,r);
6935 r = s;
6936 rl = sl;
6937 s = cur;
6938 sl = l;
6939 count++;
6940 if (count > 50) {
6941 GROW;
6942 count = 0;
6943 }
6944 NEXTL(l);
6945 cur = CUR_CHAR(l);
6946 }
6947 buf[len] = 0;
6948 ctxt->instate = XML_PARSER_CONTENT;
6949 if (cur != '>') {
6950 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6952 ctxt->sax->error(ctxt->userData,
6953 "CData section not finished\n%.50s\n", buf);
6954 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006955 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006956 xmlFree(buf);
6957 return;
6958 }
6959 NEXTL(l);
6960
6961 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006962 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006963 */
6964 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6965 if (ctxt->sax->cdataBlock != NULL)
6966 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006967 else if (ctxt->sax->characters != NULL)
6968 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006969 }
6970 xmlFree(buf);
6971}
6972
6973/**
6974 * xmlParseContent:
6975 * @ctxt: an XML parser context
6976 *
6977 * Parse a content:
6978 *
6979 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6980 */
6981
6982void
6983xmlParseContent(xmlParserCtxtPtr ctxt) {
6984 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006985 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006986 ((RAW != '<') || (NXT(1) != '/'))) {
6987 const xmlChar *test = CUR_PTR;
6988 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006989 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006990
6991 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006992 * First case : a Processing Instruction.
6993 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006994 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006995 xmlParsePI(ctxt);
6996 }
6997
6998 /*
6999 * Second case : a CDSection
7000 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007001 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007002 (NXT(2) == '[') && (NXT(3) == 'C') &&
7003 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7004 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7005 (NXT(8) == '[')) {
7006 xmlParseCDSect(ctxt);
7007 }
7008
7009 /*
7010 * Third case : a comment
7011 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007012 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007013 (NXT(2) == '-') && (NXT(3) == '-')) {
7014 xmlParseComment(ctxt);
7015 ctxt->instate = XML_PARSER_CONTENT;
7016 }
7017
7018 /*
7019 * Fourth case : a sub-element.
7020 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007021 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007022 xmlParseElement(ctxt);
7023 }
7024
7025 /*
7026 * Fifth case : a reference. If if has not been resolved,
7027 * parsing returns it's Name, create the node
7028 */
7029
Daniel Veillard21a0f912001-02-25 19:54:14 +00007030 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007031 xmlParseReference(ctxt);
7032 }
7033
7034 /*
7035 * Last case, text. Note that References are handled directly.
7036 */
7037 else {
7038 xmlParseCharData(ctxt, 0);
7039 }
7040
7041 GROW;
7042 /*
7043 * Pop-up of finished entities.
7044 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007045 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007046 xmlPopInput(ctxt);
7047 SHRINK;
7048
Daniel Veillardfdc91562002-07-01 21:52:03 +00007049 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007050 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7051 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7052 ctxt->sax->error(ctxt->userData,
7053 "detected an error in element content\n");
7054 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007055 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007056 ctxt->instate = XML_PARSER_EOF;
7057 break;
7058 }
7059 }
7060}
7061
7062/**
7063 * xmlParseElement:
7064 * @ctxt: an XML parser context
7065 *
7066 * parse an XML element, this is highly recursive
7067 *
7068 * [39] element ::= EmptyElemTag | STag content ETag
7069 *
7070 * [ WFC: Element Type Match ]
7071 * The Name in an element's end-tag must match the element type in the
7072 * start-tag.
7073 *
7074 * [ VC: Element Valid ]
7075 * An element is valid if there is a declaration matching elementdecl
7076 * where the Name matches the element type and one of the following holds:
7077 * - The declaration matches EMPTY and the element has no content.
7078 * - The declaration matches children and the sequence of child elements
7079 * belongs to the language generated by the regular expression in the
7080 * content model, with optional white space (characters matching the
7081 * nonterminal S) between each pair of child elements.
7082 * - The declaration matches Mixed and the content consists of character
7083 * data and child elements whose types match names in the content model.
7084 * - The declaration matches ANY, and the types of any child elements have
7085 * been declared.
7086 */
7087
7088void
7089xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007090 xmlChar *name;
7091 xmlChar *oldname;
7092 xmlParserNodeInfo node_info;
7093 xmlNodePtr ret;
7094
7095 /* Capture start position */
7096 if (ctxt->record_info) {
7097 node_info.begin_pos = ctxt->input->consumed +
7098 (CUR_PTR - ctxt->input->base);
7099 node_info.begin_line = ctxt->input->line;
7100 }
7101
7102 if (ctxt->spaceNr == 0)
7103 spacePush(ctxt, -1);
7104 else
7105 spacePush(ctxt, *ctxt->space);
7106
7107 name = xmlParseStartTag(ctxt);
7108 if (name == NULL) {
7109 spacePop(ctxt);
7110 return;
7111 }
7112 namePush(ctxt, name);
7113 ret = ctxt->node;
7114
7115 /*
7116 * [ VC: Root Element Type ]
7117 * The Name in the document type declaration must match the element
7118 * type of the root element.
7119 */
7120 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7121 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7122 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7123
7124 /*
7125 * Check for an Empty Element.
7126 */
7127 if ((RAW == '/') && (NXT(1) == '>')) {
7128 SKIP(2);
7129 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7130 (!ctxt->disableSAX))
7131 ctxt->sax->endElement(ctxt->userData, name);
7132 oldname = namePop(ctxt);
7133 spacePop(ctxt);
7134 if (oldname != NULL) {
7135#ifdef DEBUG_STACK
7136 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7137#endif
7138 xmlFree(oldname);
7139 }
7140 if ( ret != NULL && ctxt->record_info ) {
7141 node_info.end_pos = ctxt->input->consumed +
7142 (CUR_PTR - ctxt->input->base);
7143 node_info.end_line = ctxt->input->line;
7144 node_info.node = ret;
7145 xmlParserAddNodeInfo(ctxt, &node_info);
7146 }
7147 return;
7148 }
7149 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007150 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007151 } else {
7152 ctxt->errNo = XML_ERR_GT_REQUIRED;
7153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7154 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007155 "Couldn't find end of Start Tag %s\n",
7156 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007157 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007158 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007159
7160 /*
7161 * end of parsing of this node.
7162 */
7163 nodePop(ctxt);
7164 oldname = namePop(ctxt);
7165 spacePop(ctxt);
7166 if (oldname != NULL) {
7167#ifdef DEBUG_STACK
7168 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7169#endif
7170 xmlFree(oldname);
7171 }
7172
7173 /*
7174 * Capture end position and add node
7175 */
7176 if ( ret != NULL && ctxt->record_info ) {
7177 node_info.end_pos = ctxt->input->consumed +
7178 (CUR_PTR - ctxt->input->base);
7179 node_info.end_line = ctxt->input->line;
7180 node_info.node = ret;
7181 xmlParserAddNodeInfo(ctxt, &node_info);
7182 }
7183 return;
7184 }
7185
7186 /*
7187 * Parse the content of the element:
7188 */
7189 xmlParseContent(ctxt);
7190 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007191 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7193 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007194 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007195 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007196 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007197
7198 /*
7199 * end of parsing of this node.
7200 */
7201 nodePop(ctxt);
7202 oldname = namePop(ctxt);
7203 spacePop(ctxt);
7204 if (oldname != NULL) {
7205#ifdef DEBUG_STACK
7206 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7207#endif
7208 xmlFree(oldname);
7209 }
7210 return;
7211 }
7212
7213 /*
7214 * parse the end of tag: '</' should be here.
7215 */
7216 xmlParseEndTag(ctxt);
7217
7218 /*
7219 * Capture end position and add node
7220 */
7221 if ( ret != NULL && ctxt->record_info ) {
7222 node_info.end_pos = ctxt->input->consumed +
7223 (CUR_PTR - ctxt->input->base);
7224 node_info.end_line = ctxt->input->line;
7225 node_info.node = ret;
7226 xmlParserAddNodeInfo(ctxt, &node_info);
7227 }
7228}
7229
7230/**
7231 * xmlParseVersionNum:
7232 * @ctxt: an XML parser context
7233 *
7234 * parse the XML version value.
7235 *
7236 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7237 *
7238 * Returns the string giving the XML version number, or NULL
7239 */
7240xmlChar *
7241xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7242 xmlChar *buf = NULL;
7243 int len = 0;
7244 int size = 10;
7245 xmlChar cur;
7246
7247 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7248 if (buf == NULL) {
7249 xmlGenericError(xmlGenericErrorContext,
7250 "malloc of %d byte failed\n", size);
7251 return(NULL);
7252 }
7253 cur = CUR;
7254 while (((cur >= 'a') && (cur <= 'z')) ||
7255 ((cur >= 'A') && (cur <= 'Z')) ||
7256 ((cur >= '0') && (cur <= '9')) ||
7257 (cur == '_') || (cur == '.') ||
7258 (cur == ':') || (cur == '-')) {
7259 if (len + 1 >= size) {
7260 size *= 2;
7261 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7262 if (buf == NULL) {
7263 xmlGenericError(xmlGenericErrorContext,
7264 "realloc of %d byte failed\n", size);
7265 return(NULL);
7266 }
7267 }
7268 buf[len++] = cur;
7269 NEXT;
7270 cur=CUR;
7271 }
7272 buf[len] = 0;
7273 return(buf);
7274}
7275
7276/**
7277 * xmlParseVersionInfo:
7278 * @ctxt: an XML parser context
7279 *
7280 * parse the XML version.
7281 *
7282 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7283 *
7284 * [25] Eq ::= S? '=' S?
7285 *
7286 * Returns the version string, e.g. "1.0"
7287 */
7288
7289xmlChar *
7290xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7291 xmlChar *version = NULL;
7292 const xmlChar *q;
7293
7294 if ((RAW == 'v') && (NXT(1) == 'e') &&
7295 (NXT(2) == 'r') && (NXT(3) == 's') &&
7296 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7297 (NXT(6) == 'n')) {
7298 SKIP(7);
7299 SKIP_BLANKS;
7300 if (RAW != '=') {
7301 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7303 ctxt->sax->error(ctxt->userData,
7304 "xmlParseVersionInfo : expected '='\n");
7305 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007306 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007307 return(NULL);
7308 }
7309 NEXT;
7310 SKIP_BLANKS;
7311 if (RAW == '"') {
7312 NEXT;
7313 q = CUR_PTR;
7314 version = xmlParseVersionNum(ctxt);
7315 if (RAW != '"') {
7316 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7318 ctxt->sax->error(ctxt->userData,
7319 "String not closed\n%.50s\n", q);
7320 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007321 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007322 } else
7323 NEXT;
7324 } else if (RAW == '\''){
7325 NEXT;
7326 q = CUR_PTR;
7327 version = xmlParseVersionNum(ctxt);
7328 if (RAW != '\'') {
7329 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7331 ctxt->sax->error(ctxt->userData,
7332 "String not closed\n%.50s\n", q);
7333 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007334 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007335 } else
7336 NEXT;
7337 } else {
7338 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7340 ctxt->sax->error(ctxt->userData,
7341 "xmlParseVersionInfo : expected ' or \"\n");
7342 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007343 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007344 }
7345 }
7346 return(version);
7347}
7348
7349/**
7350 * xmlParseEncName:
7351 * @ctxt: an XML parser context
7352 *
7353 * parse the XML encoding name
7354 *
7355 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7356 *
7357 * Returns the encoding name value or NULL
7358 */
7359xmlChar *
7360xmlParseEncName(xmlParserCtxtPtr ctxt) {
7361 xmlChar *buf = NULL;
7362 int len = 0;
7363 int size = 10;
7364 xmlChar cur;
7365
7366 cur = CUR;
7367 if (((cur >= 'a') && (cur <= 'z')) ||
7368 ((cur >= 'A') && (cur <= 'Z'))) {
7369 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7370 if (buf == NULL) {
7371 xmlGenericError(xmlGenericErrorContext,
7372 "malloc of %d byte failed\n", size);
7373 return(NULL);
7374 }
7375
7376 buf[len++] = cur;
7377 NEXT;
7378 cur = CUR;
7379 while (((cur >= 'a') && (cur <= 'z')) ||
7380 ((cur >= 'A') && (cur <= 'Z')) ||
7381 ((cur >= '0') && (cur <= '9')) ||
7382 (cur == '.') || (cur == '_') ||
7383 (cur == '-')) {
7384 if (len + 1 >= size) {
7385 size *= 2;
7386 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7387 if (buf == NULL) {
7388 xmlGenericError(xmlGenericErrorContext,
7389 "realloc of %d byte failed\n", size);
7390 return(NULL);
7391 }
7392 }
7393 buf[len++] = cur;
7394 NEXT;
7395 cur = CUR;
7396 if (cur == 0) {
7397 SHRINK;
7398 GROW;
7399 cur = CUR;
7400 }
7401 }
7402 buf[len] = 0;
7403 } else {
7404 ctxt->errNo = XML_ERR_ENCODING_NAME;
7405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7406 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7407 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007408 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007409 }
7410 return(buf);
7411}
7412
7413/**
7414 * xmlParseEncodingDecl:
7415 * @ctxt: an XML parser context
7416 *
7417 * parse the XML encoding declaration
7418 *
7419 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7420 *
7421 * this setups the conversion filters.
7422 *
7423 * Returns the encoding value or NULL
7424 */
7425
7426xmlChar *
7427xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7428 xmlChar *encoding = NULL;
7429 const xmlChar *q;
7430
7431 SKIP_BLANKS;
7432 if ((RAW == 'e') && (NXT(1) == 'n') &&
7433 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7434 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7435 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7436 SKIP(8);
7437 SKIP_BLANKS;
7438 if (RAW != '=') {
7439 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7440 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7441 ctxt->sax->error(ctxt->userData,
7442 "xmlParseEncodingDecl : expected '='\n");
7443 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007444 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007445 return(NULL);
7446 }
7447 NEXT;
7448 SKIP_BLANKS;
7449 if (RAW == '"') {
7450 NEXT;
7451 q = CUR_PTR;
7452 encoding = xmlParseEncName(ctxt);
7453 if (RAW != '"') {
7454 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7456 ctxt->sax->error(ctxt->userData,
7457 "String not closed\n%.50s\n", q);
7458 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007459 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007460 } else
7461 NEXT;
7462 } else if (RAW == '\''){
7463 NEXT;
7464 q = CUR_PTR;
7465 encoding = xmlParseEncName(ctxt);
7466 if (RAW != '\'') {
7467 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7469 ctxt->sax->error(ctxt->userData,
7470 "String not closed\n%.50s\n", q);
7471 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007472 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007473 } else
7474 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007475 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007476 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7478 ctxt->sax->error(ctxt->userData,
7479 "xmlParseEncodingDecl : expected ' or \"\n");
7480 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007481 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007482 }
7483 if (encoding != NULL) {
7484 xmlCharEncoding enc;
7485 xmlCharEncodingHandlerPtr handler;
7486
7487 if (ctxt->input->encoding != NULL)
7488 xmlFree((xmlChar *) ctxt->input->encoding);
7489 ctxt->input->encoding = encoding;
7490
7491 enc = xmlParseCharEncoding((const char *) encoding);
7492 /*
7493 * registered set of known encodings
7494 */
7495 if (enc != XML_CHAR_ENCODING_ERROR) {
7496 xmlSwitchEncoding(ctxt, enc);
7497 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007498 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007499 xmlFree(encoding);
7500 return(NULL);
7501 }
7502 } else {
7503 /*
7504 * fallback for unknown encodings
7505 */
7506 handler = xmlFindCharEncodingHandler((const char *) encoding);
7507 if (handler != NULL) {
7508 xmlSwitchToEncoding(ctxt, handler);
7509 } else {
7510 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7512 ctxt->sax->error(ctxt->userData,
7513 "Unsupported encoding %s\n", encoding);
7514 return(NULL);
7515 }
7516 }
7517 }
7518 }
7519 return(encoding);
7520}
7521
7522/**
7523 * xmlParseSDDecl:
7524 * @ctxt: an XML parser context
7525 *
7526 * parse the XML standalone declaration
7527 *
7528 * [32] SDDecl ::= S 'standalone' Eq
7529 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7530 *
7531 * [ VC: Standalone Document Declaration ]
7532 * TODO The standalone document declaration must have the value "no"
7533 * if any external markup declarations contain declarations of:
7534 * - attributes with default values, if elements to which these
7535 * attributes apply appear in the document without specifications
7536 * of values for these attributes, or
7537 * - entities (other than amp, lt, gt, apos, quot), if references
7538 * to those entities appear in the document, or
7539 * - attributes with values subject to normalization, where the
7540 * attribute appears in the document with a value which will change
7541 * as a result of normalization, or
7542 * - element types with element content, if white space occurs directly
7543 * within any instance of those types.
7544 *
7545 * Returns 1 if standalone, 0 otherwise
7546 */
7547
7548int
7549xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7550 int standalone = -1;
7551
7552 SKIP_BLANKS;
7553 if ((RAW == 's') && (NXT(1) == 't') &&
7554 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7555 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7556 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7557 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7558 SKIP(10);
7559 SKIP_BLANKS;
7560 if (RAW != '=') {
7561 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7562 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7563 ctxt->sax->error(ctxt->userData,
7564 "XML standalone declaration : expected '='\n");
7565 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007566 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007567 return(standalone);
7568 }
7569 NEXT;
7570 SKIP_BLANKS;
7571 if (RAW == '\''){
7572 NEXT;
7573 if ((RAW == 'n') && (NXT(1) == 'o')) {
7574 standalone = 0;
7575 SKIP(2);
7576 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7577 (NXT(2) == 's')) {
7578 standalone = 1;
7579 SKIP(3);
7580 } else {
7581 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7582 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7583 ctxt->sax->error(ctxt->userData,
7584 "standalone accepts only 'yes' or 'no'\n");
7585 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007586 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007587 }
7588 if (RAW != '\'') {
7589 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7591 ctxt->sax->error(ctxt->userData, "String not closed\n");
7592 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007593 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007594 } else
7595 NEXT;
7596 } else if (RAW == '"'){
7597 NEXT;
7598 if ((RAW == 'n') && (NXT(1) == 'o')) {
7599 standalone = 0;
7600 SKIP(2);
7601 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7602 (NXT(2) == 's')) {
7603 standalone = 1;
7604 SKIP(3);
7605 } else {
7606 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7608 ctxt->sax->error(ctxt->userData,
7609 "standalone accepts only 'yes' or 'no'\n");
7610 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007611 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007612 }
7613 if (RAW != '"') {
7614 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7616 ctxt->sax->error(ctxt->userData, "String not closed\n");
7617 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007618 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007619 } else
7620 NEXT;
7621 } else {
7622 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7624 ctxt->sax->error(ctxt->userData,
7625 "Standalone value not found\n");
7626 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007627 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007628 }
7629 }
7630 return(standalone);
7631}
7632
7633/**
7634 * xmlParseXMLDecl:
7635 * @ctxt: an XML parser context
7636 *
7637 * parse an XML declaration header
7638 *
7639 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7640 */
7641
7642void
7643xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7644 xmlChar *version;
7645
7646 /*
7647 * We know that '<?xml' is here.
7648 */
7649 SKIP(5);
7650
7651 if (!IS_BLANK(RAW)) {
7652 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7654 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7655 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007656 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007657 }
7658 SKIP_BLANKS;
7659
7660 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007661 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007662 */
7663 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007664 if (version == NULL) {
7665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7666 ctxt->sax->error(ctxt->userData,
7667 "Malformed declaration expecting version\n");
7668 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007669 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007670 } else {
7671 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7672 /*
7673 * TODO: Blueberry should be detected here
7674 */
7675 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7676 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7677 version);
7678 }
7679 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007680 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007681 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007682 }
Owen Taylor3473f882001-02-23 17:55:21 +00007683
7684 /*
7685 * We may have the encoding declaration
7686 */
7687 if (!IS_BLANK(RAW)) {
7688 if ((RAW == '?') && (NXT(1) == '>')) {
7689 SKIP(2);
7690 return;
7691 }
7692 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7694 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7695 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007696 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007697 }
7698 xmlParseEncodingDecl(ctxt);
7699 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7700 /*
7701 * The XML REC instructs us to stop parsing right here
7702 */
7703 return;
7704 }
7705
7706 /*
7707 * We may have the standalone status.
7708 */
7709 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7710 if ((RAW == '?') && (NXT(1) == '>')) {
7711 SKIP(2);
7712 return;
7713 }
7714 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7716 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7717 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007718 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007719 }
7720 SKIP_BLANKS;
7721 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7722
7723 SKIP_BLANKS;
7724 if ((RAW == '?') && (NXT(1) == '>')) {
7725 SKIP(2);
7726 } else if (RAW == '>') {
7727 /* Deprecated old WD ... */
7728 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7730 ctxt->sax->error(ctxt->userData,
7731 "XML declaration must end-up with '?>'\n");
7732 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007733 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007734 NEXT;
7735 } else {
7736 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7738 ctxt->sax->error(ctxt->userData,
7739 "parsing XML declaration: '?>' expected\n");
7740 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007741 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007742 MOVETO_ENDTAG(CUR_PTR);
7743 NEXT;
7744 }
7745}
7746
7747/**
7748 * xmlParseMisc:
7749 * @ctxt: an XML parser context
7750 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007751 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007752 *
7753 * [27] Misc ::= Comment | PI | S
7754 */
7755
7756void
7757xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007758 while (((RAW == '<') && (NXT(1) == '?')) ||
7759 ((RAW == '<') && (NXT(1) == '!') &&
7760 (NXT(2) == '-') && (NXT(3) == '-')) ||
7761 IS_BLANK(CUR)) {
7762 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007763 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007764 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007765 NEXT;
7766 } else
7767 xmlParseComment(ctxt);
7768 }
7769}
7770
7771/**
7772 * xmlParseDocument:
7773 * @ctxt: an XML parser context
7774 *
7775 * parse an XML document (and build a tree if using the standard SAX
7776 * interface).
7777 *
7778 * [1] document ::= prolog element Misc*
7779 *
7780 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7781 *
7782 * Returns 0, -1 in case of error. the parser context is augmented
7783 * as a result of the parsing.
7784 */
7785
7786int
7787xmlParseDocument(xmlParserCtxtPtr ctxt) {
7788 xmlChar start[4];
7789 xmlCharEncoding enc;
7790
7791 xmlInitParser();
7792
7793 GROW;
7794
7795 /*
7796 * SAX: beginning of the document processing.
7797 */
7798 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7799 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7800
Daniel Veillard50f34372001-08-03 12:06:36 +00007801 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007802 /*
7803 * Get the 4 first bytes and decode the charset
7804 * if enc != XML_CHAR_ENCODING_NONE
7805 * plug some encoding conversion routines.
7806 */
7807 start[0] = RAW;
7808 start[1] = NXT(1);
7809 start[2] = NXT(2);
7810 start[3] = NXT(3);
7811 enc = xmlDetectCharEncoding(start, 4);
7812 if (enc != XML_CHAR_ENCODING_NONE) {
7813 xmlSwitchEncoding(ctxt, enc);
7814 }
Owen Taylor3473f882001-02-23 17:55:21 +00007815 }
7816
7817
7818 if (CUR == 0) {
7819 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7821 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7822 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007823 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007824 }
7825
7826 /*
7827 * Check for the XMLDecl in the Prolog.
7828 */
7829 GROW;
7830 if ((RAW == '<') && (NXT(1) == '?') &&
7831 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7832 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7833
7834 /*
7835 * Note that we will switch encoding on the fly.
7836 */
7837 xmlParseXMLDecl(ctxt);
7838 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7839 /*
7840 * The XML REC instructs us to stop parsing right here
7841 */
7842 return(-1);
7843 }
7844 ctxt->standalone = ctxt->input->standalone;
7845 SKIP_BLANKS;
7846 } else {
7847 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7848 }
7849 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7850 ctxt->sax->startDocument(ctxt->userData);
7851
7852 /*
7853 * The Misc part of the Prolog
7854 */
7855 GROW;
7856 xmlParseMisc(ctxt);
7857
7858 /*
7859 * Then possibly doc type declaration(s) and more Misc
7860 * (doctypedecl Misc*)?
7861 */
7862 GROW;
7863 if ((RAW == '<') && (NXT(1) == '!') &&
7864 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7865 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7866 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7867 (NXT(8) == 'E')) {
7868
7869 ctxt->inSubset = 1;
7870 xmlParseDocTypeDecl(ctxt);
7871 if (RAW == '[') {
7872 ctxt->instate = XML_PARSER_DTD;
7873 xmlParseInternalSubset(ctxt);
7874 }
7875
7876 /*
7877 * Create and update the external subset.
7878 */
7879 ctxt->inSubset = 2;
7880 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7881 (!ctxt->disableSAX))
7882 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7883 ctxt->extSubSystem, ctxt->extSubURI);
7884 ctxt->inSubset = 0;
7885
7886
7887 ctxt->instate = XML_PARSER_PROLOG;
7888 xmlParseMisc(ctxt);
7889 }
7890
7891 /*
7892 * Time to start parsing the tree itself
7893 */
7894 GROW;
7895 if (RAW != '<') {
7896 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7898 ctxt->sax->error(ctxt->userData,
7899 "Start tag expected, '<' not found\n");
7900 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007901 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007902 ctxt->instate = XML_PARSER_EOF;
7903 } else {
7904 ctxt->instate = XML_PARSER_CONTENT;
7905 xmlParseElement(ctxt);
7906 ctxt->instate = XML_PARSER_EPILOG;
7907
7908
7909 /*
7910 * The Misc part at the end
7911 */
7912 xmlParseMisc(ctxt);
7913
Daniel Veillard561b7f82002-03-20 21:55:57 +00007914 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007915 ctxt->errNo = XML_ERR_DOCUMENT_END;
7916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7917 ctxt->sax->error(ctxt->userData,
7918 "Extra content at the end of the document\n");
7919 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007920 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007921 }
7922 ctxt->instate = XML_PARSER_EOF;
7923 }
7924
7925 /*
7926 * SAX: end of the document processing.
7927 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007928 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007929 ctxt->sax->endDocument(ctxt->userData);
7930
Daniel Veillard5997aca2002-03-18 18:36:20 +00007931 /*
7932 * Remove locally kept entity definitions if the tree was not built
7933 */
7934 if ((ctxt->myDoc != NULL) &&
7935 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7936 xmlFreeDoc(ctxt->myDoc);
7937 ctxt->myDoc = NULL;
7938 }
7939
Daniel Veillardc7612992002-02-17 22:47:37 +00007940 if (! ctxt->wellFormed) {
7941 ctxt->valid = 0;
7942 return(-1);
7943 }
Owen Taylor3473f882001-02-23 17:55:21 +00007944 return(0);
7945}
7946
7947/**
7948 * xmlParseExtParsedEnt:
7949 * @ctxt: an XML parser context
7950 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007951 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007952 * An external general parsed entity is well-formed if it matches the
7953 * production labeled extParsedEnt.
7954 *
7955 * [78] extParsedEnt ::= TextDecl? content
7956 *
7957 * Returns 0, -1 in case of error. the parser context is augmented
7958 * as a result of the parsing.
7959 */
7960
7961int
7962xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7963 xmlChar start[4];
7964 xmlCharEncoding enc;
7965
7966 xmlDefaultSAXHandlerInit();
7967
7968 GROW;
7969
7970 /*
7971 * SAX: beginning of the document processing.
7972 */
7973 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7974 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7975
7976 /*
7977 * Get the 4 first bytes and decode the charset
7978 * if enc != XML_CHAR_ENCODING_NONE
7979 * plug some encoding conversion routines.
7980 */
7981 start[0] = RAW;
7982 start[1] = NXT(1);
7983 start[2] = NXT(2);
7984 start[3] = NXT(3);
7985 enc = xmlDetectCharEncoding(start, 4);
7986 if (enc != XML_CHAR_ENCODING_NONE) {
7987 xmlSwitchEncoding(ctxt, enc);
7988 }
7989
7990
7991 if (CUR == 0) {
7992 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7994 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7995 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007996 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007997 }
7998
7999 /*
8000 * Check for the XMLDecl in the Prolog.
8001 */
8002 GROW;
8003 if ((RAW == '<') && (NXT(1) == '?') &&
8004 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8005 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8006
8007 /*
8008 * Note that we will switch encoding on the fly.
8009 */
8010 xmlParseXMLDecl(ctxt);
8011 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8012 /*
8013 * The XML REC instructs us to stop parsing right here
8014 */
8015 return(-1);
8016 }
8017 SKIP_BLANKS;
8018 } else {
8019 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8020 }
8021 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8022 ctxt->sax->startDocument(ctxt->userData);
8023
8024 /*
8025 * Doing validity checking on chunk doesn't make sense
8026 */
8027 ctxt->instate = XML_PARSER_CONTENT;
8028 ctxt->validate = 0;
8029 ctxt->loadsubset = 0;
8030 ctxt->depth = 0;
8031
8032 xmlParseContent(ctxt);
8033
8034 if ((RAW == '<') && (NXT(1) == '/')) {
8035 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8037 ctxt->sax->error(ctxt->userData,
8038 "chunk is not well balanced\n");
8039 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008040 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008041 } else if (RAW != 0) {
8042 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8044 ctxt->sax->error(ctxt->userData,
8045 "extra content at the end of well balanced chunk\n");
8046 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008047 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008048 }
8049
8050 /*
8051 * SAX: end of the document processing.
8052 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008053 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008054 ctxt->sax->endDocument(ctxt->userData);
8055
8056 if (! ctxt->wellFormed) return(-1);
8057 return(0);
8058}
8059
8060/************************************************************************
8061 * *
8062 * Progressive parsing interfaces *
8063 * *
8064 ************************************************************************/
8065
8066/**
8067 * xmlParseLookupSequence:
8068 * @ctxt: an XML parser context
8069 * @first: the first char to lookup
8070 * @next: the next char to lookup or zero
8071 * @third: the next char to lookup or zero
8072 *
8073 * Try to find if a sequence (first, next, third) or just (first next) or
8074 * (first) is available in the input stream.
8075 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8076 * to avoid rescanning sequences of bytes, it DOES change the state of the
8077 * parser, do not use liberally.
8078 *
8079 * Returns the index to the current parsing point if the full sequence
8080 * is available, -1 otherwise.
8081 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008082static int
Owen Taylor3473f882001-02-23 17:55:21 +00008083xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8084 xmlChar next, xmlChar third) {
8085 int base, len;
8086 xmlParserInputPtr in;
8087 const xmlChar *buf;
8088
8089 in = ctxt->input;
8090 if (in == NULL) return(-1);
8091 base = in->cur - in->base;
8092 if (base < 0) return(-1);
8093 if (ctxt->checkIndex > base)
8094 base = ctxt->checkIndex;
8095 if (in->buf == NULL) {
8096 buf = in->base;
8097 len = in->length;
8098 } else {
8099 buf = in->buf->buffer->content;
8100 len = in->buf->buffer->use;
8101 }
8102 /* take into account the sequence length */
8103 if (third) len -= 2;
8104 else if (next) len --;
8105 for (;base < len;base++) {
8106 if (buf[base] == first) {
8107 if (third != 0) {
8108 if ((buf[base + 1] != next) ||
8109 (buf[base + 2] != third)) continue;
8110 } else if (next != 0) {
8111 if (buf[base + 1] != next) continue;
8112 }
8113 ctxt->checkIndex = 0;
8114#ifdef DEBUG_PUSH
8115 if (next == 0)
8116 xmlGenericError(xmlGenericErrorContext,
8117 "PP: lookup '%c' found at %d\n",
8118 first, base);
8119 else if (third == 0)
8120 xmlGenericError(xmlGenericErrorContext,
8121 "PP: lookup '%c%c' found at %d\n",
8122 first, next, base);
8123 else
8124 xmlGenericError(xmlGenericErrorContext,
8125 "PP: lookup '%c%c%c' found at %d\n",
8126 first, next, third, base);
8127#endif
8128 return(base - (in->cur - in->base));
8129 }
8130 }
8131 ctxt->checkIndex = base;
8132#ifdef DEBUG_PUSH
8133 if (next == 0)
8134 xmlGenericError(xmlGenericErrorContext,
8135 "PP: lookup '%c' failed\n", first);
8136 else if (third == 0)
8137 xmlGenericError(xmlGenericErrorContext,
8138 "PP: lookup '%c%c' failed\n", first, next);
8139 else
8140 xmlGenericError(xmlGenericErrorContext,
8141 "PP: lookup '%c%c%c' failed\n", first, next, third);
8142#endif
8143 return(-1);
8144}
8145
8146/**
8147 * xmlParseTryOrFinish:
8148 * @ctxt: an XML parser context
8149 * @terminate: last chunk indicator
8150 *
8151 * Try to progress on parsing
8152 *
8153 * Returns zero if no parsing was possible
8154 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008155static int
Owen Taylor3473f882001-02-23 17:55:21 +00008156xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8157 int ret = 0;
8158 int avail;
8159 xmlChar cur, next;
8160
8161#ifdef DEBUG_PUSH
8162 switch (ctxt->instate) {
8163 case XML_PARSER_EOF:
8164 xmlGenericError(xmlGenericErrorContext,
8165 "PP: try EOF\n"); break;
8166 case XML_PARSER_START:
8167 xmlGenericError(xmlGenericErrorContext,
8168 "PP: try START\n"); break;
8169 case XML_PARSER_MISC:
8170 xmlGenericError(xmlGenericErrorContext,
8171 "PP: try MISC\n");break;
8172 case XML_PARSER_COMMENT:
8173 xmlGenericError(xmlGenericErrorContext,
8174 "PP: try COMMENT\n");break;
8175 case XML_PARSER_PROLOG:
8176 xmlGenericError(xmlGenericErrorContext,
8177 "PP: try PROLOG\n");break;
8178 case XML_PARSER_START_TAG:
8179 xmlGenericError(xmlGenericErrorContext,
8180 "PP: try START_TAG\n");break;
8181 case XML_PARSER_CONTENT:
8182 xmlGenericError(xmlGenericErrorContext,
8183 "PP: try CONTENT\n");break;
8184 case XML_PARSER_CDATA_SECTION:
8185 xmlGenericError(xmlGenericErrorContext,
8186 "PP: try CDATA_SECTION\n");break;
8187 case XML_PARSER_END_TAG:
8188 xmlGenericError(xmlGenericErrorContext,
8189 "PP: try END_TAG\n");break;
8190 case XML_PARSER_ENTITY_DECL:
8191 xmlGenericError(xmlGenericErrorContext,
8192 "PP: try ENTITY_DECL\n");break;
8193 case XML_PARSER_ENTITY_VALUE:
8194 xmlGenericError(xmlGenericErrorContext,
8195 "PP: try ENTITY_VALUE\n");break;
8196 case XML_PARSER_ATTRIBUTE_VALUE:
8197 xmlGenericError(xmlGenericErrorContext,
8198 "PP: try ATTRIBUTE_VALUE\n");break;
8199 case XML_PARSER_DTD:
8200 xmlGenericError(xmlGenericErrorContext,
8201 "PP: try DTD\n");break;
8202 case XML_PARSER_EPILOG:
8203 xmlGenericError(xmlGenericErrorContext,
8204 "PP: try EPILOG\n");break;
8205 case XML_PARSER_PI:
8206 xmlGenericError(xmlGenericErrorContext,
8207 "PP: try PI\n");break;
8208 case XML_PARSER_IGNORE:
8209 xmlGenericError(xmlGenericErrorContext,
8210 "PP: try IGNORE\n");break;
8211 }
8212#endif
8213
8214 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008215 SHRINK;
8216
Owen Taylor3473f882001-02-23 17:55:21 +00008217 /*
8218 * Pop-up of finished entities.
8219 */
8220 while ((RAW == 0) && (ctxt->inputNr > 1))
8221 xmlPopInput(ctxt);
8222
8223 if (ctxt->input ==NULL) break;
8224 if (ctxt->input->buf == NULL)
8225 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008226 else {
8227 /*
8228 * If we are operating on converted input, try to flush
8229 * remainng chars to avoid them stalling in the non-converted
8230 * buffer.
8231 */
8232 if ((ctxt->input->buf->raw != NULL) &&
8233 (ctxt->input->buf->raw->use > 0)) {
8234 int base = ctxt->input->base -
8235 ctxt->input->buf->buffer->content;
8236 int current = ctxt->input->cur - ctxt->input->base;
8237
8238 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8239 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8240 ctxt->input->cur = ctxt->input->base + current;
8241 ctxt->input->end =
8242 &ctxt->input->buf->buffer->content[
8243 ctxt->input->buf->buffer->use];
8244 }
8245 avail = ctxt->input->buf->buffer->use -
8246 (ctxt->input->cur - ctxt->input->base);
8247 }
Owen Taylor3473f882001-02-23 17:55:21 +00008248 if (avail < 1)
8249 goto done;
8250 switch (ctxt->instate) {
8251 case XML_PARSER_EOF:
8252 /*
8253 * Document parsing is done !
8254 */
8255 goto done;
8256 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008257 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8258 xmlChar start[4];
8259 xmlCharEncoding enc;
8260
8261 /*
8262 * Very first chars read from the document flow.
8263 */
8264 if (avail < 4)
8265 goto done;
8266
8267 /*
8268 * Get the 4 first bytes and decode the charset
8269 * if enc != XML_CHAR_ENCODING_NONE
8270 * plug some encoding conversion routines.
8271 */
8272 start[0] = RAW;
8273 start[1] = NXT(1);
8274 start[2] = NXT(2);
8275 start[3] = NXT(3);
8276 enc = xmlDetectCharEncoding(start, 4);
8277 if (enc != XML_CHAR_ENCODING_NONE) {
8278 xmlSwitchEncoding(ctxt, enc);
8279 }
8280 break;
8281 }
Owen Taylor3473f882001-02-23 17:55:21 +00008282
8283 cur = ctxt->input->cur[0];
8284 next = ctxt->input->cur[1];
8285 if (cur == 0) {
8286 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8287 ctxt->sax->setDocumentLocator(ctxt->userData,
8288 &xmlDefaultSAXLocator);
8289 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8290 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8291 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8292 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008293 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008294 ctxt->instate = XML_PARSER_EOF;
8295#ifdef DEBUG_PUSH
8296 xmlGenericError(xmlGenericErrorContext,
8297 "PP: entering EOF\n");
8298#endif
8299 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8300 ctxt->sax->endDocument(ctxt->userData);
8301 goto done;
8302 }
8303 if ((cur == '<') && (next == '?')) {
8304 /* PI or XML decl */
8305 if (avail < 5) return(ret);
8306 if ((!terminate) &&
8307 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8308 return(ret);
8309 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8310 ctxt->sax->setDocumentLocator(ctxt->userData,
8311 &xmlDefaultSAXLocator);
8312 if ((ctxt->input->cur[2] == 'x') &&
8313 (ctxt->input->cur[3] == 'm') &&
8314 (ctxt->input->cur[4] == 'l') &&
8315 (IS_BLANK(ctxt->input->cur[5]))) {
8316 ret += 5;
8317#ifdef DEBUG_PUSH
8318 xmlGenericError(xmlGenericErrorContext,
8319 "PP: Parsing XML Decl\n");
8320#endif
8321 xmlParseXMLDecl(ctxt);
8322 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8323 /*
8324 * The XML REC instructs us to stop parsing right
8325 * here
8326 */
8327 ctxt->instate = XML_PARSER_EOF;
8328 return(0);
8329 }
8330 ctxt->standalone = ctxt->input->standalone;
8331 if ((ctxt->encoding == NULL) &&
8332 (ctxt->input->encoding != NULL))
8333 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8334 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8335 (!ctxt->disableSAX))
8336 ctxt->sax->startDocument(ctxt->userData);
8337 ctxt->instate = XML_PARSER_MISC;
8338#ifdef DEBUG_PUSH
8339 xmlGenericError(xmlGenericErrorContext,
8340 "PP: entering MISC\n");
8341#endif
8342 } else {
8343 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8344 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8345 (!ctxt->disableSAX))
8346 ctxt->sax->startDocument(ctxt->userData);
8347 ctxt->instate = XML_PARSER_MISC;
8348#ifdef DEBUG_PUSH
8349 xmlGenericError(xmlGenericErrorContext,
8350 "PP: entering MISC\n");
8351#endif
8352 }
8353 } else {
8354 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8355 ctxt->sax->setDocumentLocator(ctxt->userData,
8356 &xmlDefaultSAXLocator);
8357 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8358 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8359 (!ctxt->disableSAX))
8360 ctxt->sax->startDocument(ctxt->userData);
8361 ctxt->instate = XML_PARSER_MISC;
8362#ifdef DEBUG_PUSH
8363 xmlGenericError(xmlGenericErrorContext,
8364 "PP: entering MISC\n");
8365#endif
8366 }
8367 break;
8368 case XML_PARSER_MISC:
8369 SKIP_BLANKS;
8370 if (ctxt->input->buf == NULL)
8371 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8372 else
8373 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8374 if (avail < 2)
8375 goto done;
8376 cur = ctxt->input->cur[0];
8377 next = ctxt->input->cur[1];
8378 if ((cur == '<') && (next == '?')) {
8379 if ((!terminate) &&
8380 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8381 goto done;
8382#ifdef DEBUG_PUSH
8383 xmlGenericError(xmlGenericErrorContext,
8384 "PP: Parsing PI\n");
8385#endif
8386 xmlParsePI(ctxt);
8387 } else if ((cur == '<') && (next == '!') &&
8388 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8389 if ((!terminate) &&
8390 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8391 goto done;
8392#ifdef DEBUG_PUSH
8393 xmlGenericError(xmlGenericErrorContext,
8394 "PP: Parsing Comment\n");
8395#endif
8396 xmlParseComment(ctxt);
8397 ctxt->instate = XML_PARSER_MISC;
8398 } else if ((cur == '<') && (next == '!') &&
8399 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8400 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8401 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8402 (ctxt->input->cur[8] == 'E')) {
8403 if ((!terminate) &&
8404 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8405 goto done;
8406#ifdef DEBUG_PUSH
8407 xmlGenericError(xmlGenericErrorContext,
8408 "PP: Parsing internal subset\n");
8409#endif
8410 ctxt->inSubset = 1;
8411 xmlParseDocTypeDecl(ctxt);
8412 if (RAW == '[') {
8413 ctxt->instate = XML_PARSER_DTD;
8414#ifdef DEBUG_PUSH
8415 xmlGenericError(xmlGenericErrorContext,
8416 "PP: entering DTD\n");
8417#endif
8418 } else {
8419 /*
8420 * Create and update the external subset.
8421 */
8422 ctxt->inSubset = 2;
8423 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8424 (ctxt->sax->externalSubset != NULL))
8425 ctxt->sax->externalSubset(ctxt->userData,
8426 ctxt->intSubName, ctxt->extSubSystem,
8427 ctxt->extSubURI);
8428 ctxt->inSubset = 0;
8429 ctxt->instate = XML_PARSER_PROLOG;
8430#ifdef DEBUG_PUSH
8431 xmlGenericError(xmlGenericErrorContext,
8432 "PP: entering PROLOG\n");
8433#endif
8434 }
8435 } else if ((cur == '<') && (next == '!') &&
8436 (avail < 9)) {
8437 goto done;
8438 } else {
8439 ctxt->instate = XML_PARSER_START_TAG;
8440#ifdef DEBUG_PUSH
8441 xmlGenericError(xmlGenericErrorContext,
8442 "PP: entering START_TAG\n");
8443#endif
8444 }
8445 break;
8446 case XML_PARSER_IGNORE:
8447 xmlGenericError(xmlGenericErrorContext,
8448 "PP: internal error, state == IGNORE");
8449 ctxt->instate = XML_PARSER_DTD;
8450#ifdef DEBUG_PUSH
8451 xmlGenericError(xmlGenericErrorContext,
8452 "PP: entering DTD\n");
8453#endif
8454 break;
8455 case XML_PARSER_PROLOG:
8456 SKIP_BLANKS;
8457 if (ctxt->input->buf == NULL)
8458 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8459 else
8460 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8461 if (avail < 2)
8462 goto done;
8463 cur = ctxt->input->cur[0];
8464 next = ctxt->input->cur[1];
8465 if ((cur == '<') && (next == '?')) {
8466 if ((!terminate) &&
8467 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8468 goto done;
8469#ifdef DEBUG_PUSH
8470 xmlGenericError(xmlGenericErrorContext,
8471 "PP: Parsing PI\n");
8472#endif
8473 xmlParsePI(ctxt);
8474 } else if ((cur == '<') && (next == '!') &&
8475 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8476 if ((!terminate) &&
8477 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8478 goto done;
8479#ifdef DEBUG_PUSH
8480 xmlGenericError(xmlGenericErrorContext,
8481 "PP: Parsing Comment\n");
8482#endif
8483 xmlParseComment(ctxt);
8484 ctxt->instate = XML_PARSER_PROLOG;
8485 } else if ((cur == '<') && (next == '!') &&
8486 (avail < 4)) {
8487 goto done;
8488 } else {
8489 ctxt->instate = XML_PARSER_START_TAG;
8490#ifdef DEBUG_PUSH
8491 xmlGenericError(xmlGenericErrorContext,
8492 "PP: entering START_TAG\n");
8493#endif
8494 }
8495 break;
8496 case XML_PARSER_EPILOG:
8497 SKIP_BLANKS;
8498 if (ctxt->input->buf == NULL)
8499 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8500 else
8501 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8502 if (avail < 2)
8503 goto done;
8504 cur = ctxt->input->cur[0];
8505 next = ctxt->input->cur[1];
8506 if ((cur == '<') && (next == '?')) {
8507 if ((!terminate) &&
8508 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8509 goto done;
8510#ifdef DEBUG_PUSH
8511 xmlGenericError(xmlGenericErrorContext,
8512 "PP: Parsing PI\n");
8513#endif
8514 xmlParsePI(ctxt);
8515 ctxt->instate = XML_PARSER_EPILOG;
8516 } else if ((cur == '<') && (next == '!') &&
8517 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8518 if ((!terminate) &&
8519 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8520 goto done;
8521#ifdef DEBUG_PUSH
8522 xmlGenericError(xmlGenericErrorContext,
8523 "PP: Parsing Comment\n");
8524#endif
8525 xmlParseComment(ctxt);
8526 ctxt->instate = XML_PARSER_EPILOG;
8527 } else if ((cur == '<') && (next == '!') &&
8528 (avail < 4)) {
8529 goto done;
8530 } else {
8531 ctxt->errNo = XML_ERR_DOCUMENT_END;
8532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8533 ctxt->sax->error(ctxt->userData,
8534 "Extra content at the end of the document\n");
8535 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008536 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008537 ctxt->instate = XML_PARSER_EOF;
8538#ifdef DEBUG_PUSH
8539 xmlGenericError(xmlGenericErrorContext,
8540 "PP: entering EOF\n");
8541#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008542 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008543 ctxt->sax->endDocument(ctxt->userData);
8544 goto done;
8545 }
8546 break;
8547 case XML_PARSER_START_TAG: {
8548 xmlChar *name, *oldname;
8549
8550 if ((avail < 2) && (ctxt->inputNr == 1))
8551 goto done;
8552 cur = ctxt->input->cur[0];
8553 if (cur != '<') {
8554 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8556 ctxt->sax->error(ctxt->userData,
8557 "Start tag expect, '<' not found\n");
8558 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008559 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008560 ctxt->instate = XML_PARSER_EOF;
8561#ifdef DEBUG_PUSH
8562 xmlGenericError(xmlGenericErrorContext,
8563 "PP: entering EOF\n");
8564#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008565 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008566 ctxt->sax->endDocument(ctxt->userData);
8567 goto done;
8568 }
8569 if ((!terminate) &&
8570 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8571 goto done;
8572 if (ctxt->spaceNr == 0)
8573 spacePush(ctxt, -1);
8574 else
8575 spacePush(ctxt, *ctxt->space);
8576 name = xmlParseStartTag(ctxt);
8577 if (name == NULL) {
8578 spacePop(ctxt);
8579 ctxt->instate = XML_PARSER_EOF;
8580#ifdef DEBUG_PUSH
8581 xmlGenericError(xmlGenericErrorContext,
8582 "PP: entering EOF\n");
8583#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008584 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008585 ctxt->sax->endDocument(ctxt->userData);
8586 goto done;
8587 }
8588 namePush(ctxt, xmlStrdup(name));
8589
8590 /*
8591 * [ VC: Root Element Type ]
8592 * The Name in the document type declaration must match
8593 * the element type of the root element.
8594 */
8595 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8596 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8597 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8598
8599 /*
8600 * Check for an Empty Element.
8601 */
8602 if ((RAW == '/') && (NXT(1) == '>')) {
8603 SKIP(2);
8604 if ((ctxt->sax != NULL) &&
8605 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8606 ctxt->sax->endElement(ctxt->userData, name);
8607 xmlFree(name);
8608 oldname = namePop(ctxt);
8609 spacePop(ctxt);
8610 if (oldname != NULL) {
8611#ifdef DEBUG_STACK
8612 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8613#endif
8614 xmlFree(oldname);
8615 }
8616 if (ctxt->name == NULL) {
8617 ctxt->instate = XML_PARSER_EPILOG;
8618#ifdef DEBUG_PUSH
8619 xmlGenericError(xmlGenericErrorContext,
8620 "PP: entering EPILOG\n");
8621#endif
8622 } else {
8623 ctxt->instate = XML_PARSER_CONTENT;
8624#ifdef DEBUG_PUSH
8625 xmlGenericError(xmlGenericErrorContext,
8626 "PP: entering CONTENT\n");
8627#endif
8628 }
8629 break;
8630 }
8631 if (RAW == '>') {
8632 NEXT;
8633 } else {
8634 ctxt->errNo = XML_ERR_GT_REQUIRED;
8635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8636 ctxt->sax->error(ctxt->userData,
8637 "Couldn't find end of Start Tag %s\n",
8638 name);
8639 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008640 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008641
8642 /*
8643 * end of parsing of this node.
8644 */
8645 nodePop(ctxt);
8646 oldname = namePop(ctxt);
8647 spacePop(ctxt);
8648 if (oldname != NULL) {
8649#ifdef DEBUG_STACK
8650 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8651#endif
8652 xmlFree(oldname);
8653 }
8654 }
8655 xmlFree(name);
8656 ctxt->instate = XML_PARSER_CONTENT;
8657#ifdef DEBUG_PUSH
8658 xmlGenericError(xmlGenericErrorContext,
8659 "PP: entering CONTENT\n");
8660#endif
8661 break;
8662 }
8663 case XML_PARSER_CONTENT: {
8664 const xmlChar *test;
8665 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008666 if ((avail < 2) && (ctxt->inputNr == 1))
8667 goto done;
8668 cur = ctxt->input->cur[0];
8669 next = ctxt->input->cur[1];
8670
8671 test = CUR_PTR;
8672 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008673 if ((cur == '<') && (next == '?')) {
8674 if ((!terminate) &&
8675 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8676 goto done;
8677#ifdef DEBUG_PUSH
8678 xmlGenericError(xmlGenericErrorContext,
8679 "PP: Parsing PI\n");
8680#endif
8681 xmlParsePI(ctxt);
8682 } else if ((cur == '<') && (next == '!') &&
8683 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8684 if ((!terminate) &&
8685 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8686 goto done;
8687#ifdef DEBUG_PUSH
8688 xmlGenericError(xmlGenericErrorContext,
8689 "PP: Parsing Comment\n");
8690#endif
8691 xmlParseComment(ctxt);
8692 ctxt->instate = XML_PARSER_CONTENT;
8693 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8694 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8695 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8696 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8697 (ctxt->input->cur[8] == '[')) {
8698 SKIP(9);
8699 ctxt->instate = XML_PARSER_CDATA_SECTION;
8700#ifdef DEBUG_PUSH
8701 xmlGenericError(xmlGenericErrorContext,
8702 "PP: entering CDATA_SECTION\n");
8703#endif
8704 break;
8705 } else if ((cur == '<') && (next == '!') &&
8706 (avail < 9)) {
8707 goto done;
8708 } else if ((cur == '<') && (next == '/')) {
8709 ctxt->instate = XML_PARSER_END_TAG;
8710#ifdef DEBUG_PUSH
8711 xmlGenericError(xmlGenericErrorContext,
8712 "PP: entering END_TAG\n");
8713#endif
8714 break;
8715 } else if (cur == '<') {
8716 ctxt->instate = XML_PARSER_START_TAG;
8717#ifdef DEBUG_PUSH
8718 xmlGenericError(xmlGenericErrorContext,
8719 "PP: entering START_TAG\n");
8720#endif
8721 break;
8722 } else if (cur == '&') {
8723 if ((!terminate) &&
8724 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8725 goto done;
8726#ifdef DEBUG_PUSH
8727 xmlGenericError(xmlGenericErrorContext,
8728 "PP: Parsing Reference\n");
8729#endif
8730 xmlParseReference(ctxt);
8731 } else {
8732 /* TODO Avoid the extra copy, handle directly !!! */
8733 /*
8734 * Goal of the following test is:
8735 * - minimize calls to the SAX 'character' callback
8736 * when they are mergeable
8737 * - handle an problem for isBlank when we only parse
8738 * a sequence of blank chars and the next one is
8739 * not available to check against '<' presence.
8740 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008741 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008742 * of the parser.
8743 */
8744 if ((ctxt->inputNr == 1) &&
8745 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8746 if ((!terminate) &&
8747 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8748 goto done;
8749 }
8750 ctxt->checkIndex = 0;
8751#ifdef DEBUG_PUSH
8752 xmlGenericError(xmlGenericErrorContext,
8753 "PP: Parsing char data\n");
8754#endif
8755 xmlParseCharData(ctxt, 0);
8756 }
8757 /*
8758 * Pop-up of finished entities.
8759 */
8760 while ((RAW == 0) && (ctxt->inputNr > 1))
8761 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008762 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008763 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8765 ctxt->sax->error(ctxt->userData,
8766 "detected an error in element content\n");
8767 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008768 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008769 ctxt->instate = XML_PARSER_EOF;
8770 break;
8771 }
8772 break;
8773 }
8774 case XML_PARSER_CDATA_SECTION: {
8775 /*
8776 * The Push mode need to have the SAX callback for
8777 * cdataBlock merge back contiguous callbacks.
8778 */
8779 int base;
8780
8781 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8782 if (base < 0) {
8783 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8784 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8785 if (ctxt->sax->cdataBlock != NULL)
8786 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8787 XML_PARSER_BIG_BUFFER_SIZE);
8788 }
8789 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8790 ctxt->checkIndex = 0;
8791 }
8792 goto done;
8793 } else {
8794 if ((ctxt->sax != NULL) && (base > 0) &&
8795 (!ctxt->disableSAX)) {
8796 if (ctxt->sax->cdataBlock != NULL)
8797 ctxt->sax->cdataBlock(ctxt->userData,
8798 ctxt->input->cur, base);
8799 }
8800 SKIP(base + 3);
8801 ctxt->checkIndex = 0;
8802 ctxt->instate = XML_PARSER_CONTENT;
8803#ifdef DEBUG_PUSH
8804 xmlGenericError(xmlGenericErrorContext,
8805 "PP: entering CONTENT\n");
8806#endif
8807 }
8808 break;
8809 }
8810 case XML_PARSER_END_TAG:
8811 if (avail < 2)
8812 goto done;
8813 if ((!terminate) &&
8814 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8815 goto done;
8816 xmlParseEndTag(ctxt);
8817 if (ctxt->name == NULL) {
8818 ctxt->instate = XML_PARSER_EPILOG;
8819#ifdef DEBUG_PUSH
8820 xmlGenericError(xmlGenericErrorContext,
8821 "PP: entering EPILOG\n");
8822#endif
8823 } else {
8824 ctxt->instate = XML_PARSER_CONTENT;
8825#ifdef DEBUG_PUSH
8826 xmlGenericError(xmlGenericErrorContext,
8827 "PP: entering CONTENT\n");
8828#endif
8829 }
8830 break;
8831 case XML_PARSER_DTD: {
8832 /*
8833 * Sorry but progressive parsing of the internal subset
8834 * is not expected to be supported. We first check that
8835 * the full content of the internal subset is available and
8836 * the parsing is launched only at that point.
8837 * Internal subset ends up with "']' S? '>'" in an unescaped
8838 * section and not in a ']]>' sequence which are conditional
8839 * sections (whoever argued to keep that crap in XML deserve
8840 * a place in hell !).
8841 */
8842 int base, i;
8843 xmlChar *buf;
8844 xmlChar quote = 0;
8845
8846 base = ctxt->input->cur - ctxt->input->base;
8847 if (base < 0) return(0);
8848 if (ctxt->checkIndex > base)
8849 base = ctxt->checkIndex;
8850 buf = ctxt->input->buf->buffer->content;
8851 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8852 base++) {
8853 if (quote != 0) {
8854 if (buf[base] == quote)
8855 quote = 0;
8856 continue;
8857 }
8858 if (buf[base] == '"') {
8859 quote = '"';
8860 continue;
8861 }
8862 if (buf[base] == '\'') {
8863 quote = '\'';
8864 continue;
8865 }
8866 if (buf[base] == ']') {
8867 if ((unsigned int) base +1 >=
8868 ctxt->input->buf->buffer->use)
8869 break;
8870 if (buf[base + 1] == ']') {
8871 /* conditional crap, skip both ']' ! */
8872 base++;
8873 continue;
8874 }
8875 for (i = 0;
8876 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8877 i++) {
8878 if (buf[base + i] == '>')
8879 goto found_end_int_subset;
8880 }
8881 break;
8882 }
8883 }
8884 /*
8885 * We didn't found the end of the Internal subset
8886 */
8887 if (quote == 0)
8888 ctxt->checkIndex = base;
8889#ifdef DEBUG_PUSH
8890 if (next == 0)
8891 xmlGenericError(xmlGenericErrorContext,
8892 "PP: lookup of int subset end filed\n");
8893#endif
8894 goto done;
8895
8896found_end_int_subset:
8897 xmlParseInternalSubset(ctxt);
8898 ctxt->inSubset = 2;
8899 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8900 (ctxt->sax->externalSubset != NULL))
8901 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8902 ctxt->extSubSystem, ctxt->extSubURI);
8903 ctxt->inSubset = 0;
8904 ctxt->instate = XML_PARSER_PROLOG;
8905 ctxt->checkIndex = 0;
8906#ifdef DEBUG_PUSH
8907 xmlGenericError(xmlGenericErrorContext,
8908 "PP: entering PROLOG\n");
8909#endif
8910 break;
8911 }
8912 case XML_PARSER_COMMENT:
8913 xmlGenericError(xmlGenericErrorContext,
8914 "PP: internal error, state == COMMENT\n");
8915 ctxt->instate = XML_PARSER_CONTENT;
8916#ifdef DEBUG_PUSH
8917 xmlGenericError(xmlGenericErrorContext,
8918 "PP: entering CONTENT\n");
8919#endif
8920 break;
8921 case XML_PARSER_PI:
8922 xmlGenericError(xmlGenericErrorContext,
8923 "PP: internal error, state == PI\n");
8924 ctxt->instate = XML_PARSER_CONTENT;
8925#ifdef DEBUG_PUSH
8926 xmlGenericError(xmlGenericErrorContext,
8927 "PP: entering CONTENT\n");
8928#endif
8929 break;
8930 case XML_PARSER_ENTITY_DECL:
8931 xmlGenericError(xmlGenericErrorContext,
8932 "PP: internal error, state == ENTITY_DECL\n");
8933 ctxt->instate = XML_PARSER_DTD;
8934#ifdef DEBUG_PUSH
8935 xmlGenericError(xmlGenericErrorContext,
8936 "PP: entering DTD\n");
8937#endif
8938 break;
8939 case XML_PARSER_ENTITY_VALUE:
8940 xmlGenericError(xmlGenericErrorContext,
8941 "PP: internal error, state == ENTITY_VALUE\n");
8942 ctxt->instate = XML_PARSER_CONTENT;
8943#ifdef DEBUG_PUSH
8944 xmlGenericError(xmlGenericErrorContext,
8945 "PP: entering DTD\n");
8946#endif
8947 break;
8948 case XML_PARSER_ATTRIBUTE_VALUE:
8949 xmlGenericError(xmlGenericErrorContext,
8950 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8951 ctxt->instate = XML_PARSER_START_TAG;
8952#ifdef DEBUG_PUSH
8953 xmlGenericError(xmlGenericErrorContext,
8954 "PP: entering START_TAG\n");
8955#endif
8956 break;
8957 case XML_PARSER_SYSTEM_LITERAL:
8958 xmlGenericError(xmlGenericErrorContext,
8959 "PP: internal error, state == SYSTEM_LITERAL\n");
8960 ctxt->instate = XML_PARSER_START_TAG;
8961#ifdef DEBUG_PUSH
8962 xmlGenericError(xmlGenericErrorContext,
8963 "PP: entering START_TAG\n");
8964#endif
8965 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008966 case XML_PARSER_PUBLIC_LITERAL:
8967 xmlGenericError(xmlGenericErrorContext,
8968 "PP: internal error, state == PUBLIC_LITERAL\n");
8969 ctxt->instate = XML_PARSER_START_TAG;
8970#ifdef DEBUG_PUSH
8971 xmlGenericError(xmlGenericErrorContext,
8972 "PP: entering START_TAG\n");
8973#endif
8974 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008975 }
8976 }
8977done:
8978#ifdef DEBUG_PUSH
8979 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8980#endif
8981 return(ret);
8982}
8983
8984/**
Owen Taylor3473f882001-02-23 17:55:21 +00008985 * xmlParseChunk:
8986 * @ctxt: an XML parser context
8987 * @chunk: an char array
8988 * @size: the size in byte of the chunk
8989 * @terminate: last chunk indicator
8990 *
8991 * Parse a Chunk of memory
8992 *
8993 * Returns zero if no error, the xmlParserErrors otherwise.
8994 */
8995int
8996xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8997 int terminate) {
8998 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8999 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9000 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9001 int cur = ctxt->input->cur - ctxt->input->base;
9002
9003 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9004 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9005 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009006 ctxt->input->end =
9007 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009008#ifdef DEBUG_PUSH
9009 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9010#endif
9011
9012 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9013 xmlParseTryOrFinish(ctxt, terminate);
9014 } else if (ctxt->instate != XML_PARSER_EOF) {
9015 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9016 xmlParserInputBufferPtr in = ctxt->input->buf;
9017 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9018 (in->raw != NULL)) {
9019 int nbchars;
9020
9021 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9022 if (nbchars < 0) {
9023 xmlGenericError(xmlGenericErrorContext,
9024 "xmlParseChunk: encoder error\n");
9025 return(XML_ERR_INVALID_ENCODING);
9026 }
9027 }
9028 }
9029 }
9030 xmlParseTryOrFinish(ctxt, terminate);
9031 if (terminate) {
9032 /*
9033 * Check for termination
9034 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009035 int avail = 0;
9036 if (ctxt->input->buf == NULL)
9037 avail = ctxt->input->length -
9038 (ctxt->input->cur - ctxt->input->base);
9039 else
9040 avail = ctxt->input->buf->buffer->use -
9041 (ctxt->input->cur - ctxt->input->base);
9042
Owen Taylor3473f882001-02-23 17:55:21 +00009043 if ((ctxt->instate != XML_PARSER_EOF) &&
9044 (ctxt->instate != XML_PARSER_EPILOG)) {
9045 ctxt->errNo = XML_ERR_DOCUMENT_END;
9046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9047 ctxt->sax->error(ctxt->userData,
9048 "Extra content at the end of the document\n");
9049 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009050 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009051 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009052 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9053 ctxt->errNo = XML_ERR_DOCUMENT_END;
9054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9055 ctxt->sax->error(ctxt->userData,
9056 "Extra content at the end of the document\n");
9057 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009058 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009059
9060 }
Owen Taylor3473f882001-02-23 17:55:21 +00009061 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009062 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009063 ctxt->sax->endDocument(ctxt->userData);
9064 }
9065 ctxt->instate = XML_PARSER_EOF;
9066 }
9067 return((xmlParserErrors) ctxt->errNo);
9068}
9069
9070/************************************************************************
9071 * *
9072 * I/O front end functions to the parser *
9073 * *
9074 ************************************************************************/
9075
9076/**
9077 * xmlStopParser:
9078 * @ctxt: an XML parser context
9079 *
9080 * Blocks further parser processing
9081 */
9082void
9083xmlStopParser(xmlParserCtxtPtr ctxt) {
9084 ctxt->instate = XML_PARSER_EOF;
9085 if (ctxt->input != NULL)
9086 ctxt->input->cur = BAD_CAST"";
9087}
9088
9089/**
9090 * xmlCreatePushParserCtxt:
9091 * @sax: a SAX handler
9092 * @user_data: The user data returned on SAX callbacks
9093 * @chunk: a pointer to an array of chars
9094 * @size: number of chars in the array
9095 * @filename: an optional file name or URI
9096 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009097 * Create a parser context for using the XML parser in push mode.
9098 * If @buffer and @size are non-NULL, the data is used to detect
9099 * the encoding. The remaining characters will be parsed so they
9100 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009101 * To allow content encoding detection, @size should be >= 4
9102 * The value of @filename is used for fetching external entities
9103 * and error/warning reports.
9104 *
9105 * Returns the new parser context or NULL
9106 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009107
Owen Taylor3473f882001-02-23 17:55:21 +00009108xmlParserCtxtPtr
9109xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9110 const char *chunk, int size, const char *filename) {
9111 xmlParserCtxtPtr ctxt;
9112 xmlParserInputPtr inputStream;
9113 xmlParserInputBufferPtr buf;
9114 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9115
9116 /*
9117 * plug some encoding conversion routines
9118 */
9119 if ((chunk != NULL) && (size >= 4))
9120 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9121
9122 buf = xmlAllocParserInputBuffer(enc);
9123 if (buf == NULL) return(NULL);
9124
9125 ctxt = xmlNewParserCtxt();
9126 if (ctxt == NULL) {
9127 xmlFree(buf);
9128 return(NULL);
9129 }
9130 if (sax != NULL) {
9131 if (ctxt->sax != &xmlDefaultSAXHandler)
9132 xmlFree(ctxt->sax);
9133 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9134 if (ctxt->sax == NULL) {
9135 xmlFree(buf);
9136 xmlFree(ctxt);
9137 return(NULL);
9138 }
9139 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9140 if (user_data != NULL)
9141 ctxt->userData = user_data;
9142 }
9143 if (filename == NULL) {
9144 ctxt->directory = NULL;
9145 } else {
9146 ctxt->directory = xmlParserGetDirectory(filename);
9147 }
9148
9149 inputStream = xmlNewInputStream(ctxt);
9150 if (inputStream == NULL) {
9151 xmlFreeParserCtxt(ctxt);
9152 return(NULL);
9153 }
9154
9155 if (filename == NULL)
9156 inputStream->filename = NULL;
9157 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009158 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009159 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009160 inputStream->buf = buf;
9161 inputStream->base = inputStream->buf->buffer->content;
9162 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009163 inputStream->end =
9164 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009165
9166 inputPush(ctxt, inputStream);
9167
9168 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9169 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009170 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9171 int cur = ctxt->input->cur - ctxt->input->base;
9172
Owen Taylor3473f882001-02-23 17:55:21 +00009173 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009174
9175 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9176 ctxt->input->cur = ctxt->input->base + cur;
9177 ctxt->input->end =
9178 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009179#ifdef DEBUG_PUSH
9180 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9181#endif
9182 }
9183
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009184 if (enc != XML_CHAR_ENCODING_NONE) {
9185 xmlSwitchEncoding(ctxt, enc);
9186 }
9187
Owen Taylor3473f882001-02-23 17:55:21 +00009188 return(ctxt);
9189}
9190
9191/**
9192 * xmlCreateIOParserCtxt:
9193 * @sax: a SAX handler
9194 * @user_data: The user data returned on SAX callbacks
9195 * @ioread: an I/O read function
9196 * @ioclose: an I/O close function
9197 * @ioctx: an I/O handler
9198 * @enc: the charset encoding if known
9199 *
9200 * Create a parser context for using the XML parser with an existing
9201 * I/O stream
9202 *
9203 * Returns the new parser context or NULL
9204 */
9205xmlParserCtxtPtr
9206xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9207 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9208 void *ioctx, xmlCharEncoding enc) {
9209 xmlParserCtxtPtr ctxt;
9210 xmlParserInputPtr inputStream;
9211 xmlParserInputBufferPtr buf;
9212
9213 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9214 if (buf == NULL) return(NULL);
9215
9216 ctxt = xmlNewParserCtxt();
9217 if (ctxt == NULL) {
9218 xmlFree(buf);
9219 return(NULL);
9220 }
9221 if (sax != NULL) {
9222 if (ctxt->sax != &xmlDefaultSAXHandler)
9223 xmlFree(ctxt->sax);
9224 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9225 if (ctxt->sax == NULL) {
9226 xmlFree(buf);
9227 xmlFree(ctxt);
9228 return(NULL);
9229 }
9230 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9231 if (user_data != NULL)
9232 ctxt->userData = user_data;
9233 }
9234
9235 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9236 if (inputStream == NULL) {
9237 xmlFreeParserCtxt(ctxt);
9238 return(NULL);
9239 }
9240 inputPush(ctxt, inputStream);
9241
9242 return(ctxt);
9243}
9244
9245/************************************************************************
9246 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009247 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009248 * *
9249 ************************************************************************/
9250
9251/**
9252 * xmlIOParseDTD:
9253 * @sax: the SAX handler block or NULL
9254 * @input: an Input Buffer
9255 * @enc: the charset encoding if known
9256 *
9257 * Load and parse a DTD
9258 *
9259 * Returns the resulting xmlDtdPtr or NULL in case of error.
9260 * @input will be freed at parsing end.
9261 */
9262
9263xmlDtdPtr
9264xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9265 xmlCharEncoding enc) {
9266 xmlDtdPtr ret = NULL;
9267 xmlParserCtxtPtr ctxt;
9268 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009269 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009270
9271 if (input == NULL)
9272 return(NULL);
9273
9274 ctxt = xmlNewParserCtxt();
9275 if (ctxt == NULL) {
9276 return(NULL);
9277 }
9278
9279 /*
9280 * Set-up the SAX context
9281 */
9282 if (sax != NULL) {
9283 if (ctxt->sax != NULL)
9284 xmlFree(ctxt->sax);
9285 ctxt->sax = sax;
9286 ctxt->userData = NULL;
9287 }
9288
9289 /*
9290 * generate a parser input from the I/O handler
9291 */
9292
9293 pinput = xmlNewIOInputStream(ctxt, input, enc);
9294 if (pinput == NULL) {
9295 if (sax != NULL) ctxt->sax = NULL;
9296 xmlFreeParserCtxt(ctxt);
9297 return(NULL);
9298 }
9299
9300 /*
9301 * plug some encoding conversion routines here.
9302 */
9303 xmlPushInput(ctxt, pinput);
9304
9305 pinput->filename = NULL;
9306 pinput->line = 1;
9307 pinput->col = 1;
9308 pinput->base = ctxt->input->cur;
9309 pinput->cur = ctxt->input->cur;
9310 pinput->free = NULL;
9311
9312 /*
9313 * let's parse that entity knowing it's an external subset.
9314 */
9315 ctxt->inSubset = 2;
9316 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9317 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9318 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009319
9320 if (enc == XML_CHAR_ENCODING_NONE) {
9321 /*
9322 * Get the 4 first bytes and decode the charset
9323 * if enc != XML_CHAR_ENCODING_NONE
9324 * plug some encoding conversion routines.
9325 */
9326 start[0] = RAW;
9327 start[1] = NXT(1);
9328 start[2] = NXT(2);
9329 start[3] = NXT(3);
9330 enc = xmlDetectCharEncoding(start, 4);
9331 if (enc != XML_CHAR_ENCODING_NONE) {
9332 xmlSwitchEncoding(ctxt, enc);
9333 }
9334 }
9335
Owen Taylor3473f882001-02-23 17:55:21 +00009336 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9337
9338 if (ctxt->myDoc != NULL) {
9339 if (ctxt->wellFormed) {
9340 ret = ctxt->myDoc->extSubset;
9341 ctxt->myDoc->extSubset = NULL;
9342 } else {
9343 ret = NULL;
9344 }
9345 xmlFreeDoc(ctxt->myDoc);
9346 ctxt->myDoc = NULL;
9347 }
9348 if (sax != NULL) ctxt->sax = NULL;
9349 xmlFreeParserCtxt(ctxt);
9350
9351 return(ret);
9352}
9353
9354/**
9355 * xmlSAXParseDTD:
9356 * @sax: the SAX handler block
9357 * @ExternalID: a NAME* containing the External ID of the DTD
9358 * @SystemID: a NAME* containing the URL to the DTD
9359 *
9360 * Load and parse an external subset.
9361 *
9362 * Returns the resulting xmlDtdPtr or NULL in case of error.
9363 */
9364
9365xmlDtdPtr
9366xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9367 const xmlChar *SystemID) {
9368 xmlDtdPtr ret = NULL;
9369 xmlParserCtxtPtr ctxt;
9370 xmlParserInputPtr input = NULL;
9371 xmlCharEncoding enc;
9372
9373 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9374
9375 ctxt = xmlNewParserCtxt();
9376 if (ctxt == NULL) {
9377 return(NULL);
9378 }
9379
9380 /*
9381 * Set-up the SAX context
9382 */
9383 if (sax != NULL) {
9384 if (ctxt->sax != NULL)
9385 xmlFree(ctxt->sax);
9386 ctxt->sax = sax;
9387 ctxt->userData = NULL;
9388 }
9389
9390 /*
9391 * Ask the Entity resolver to load the damn thing
9392 */
9393
9394 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9395 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9396 if (input == NULL) {
9397 if (sax != NULL) ctxt->sax = NULL;
9398 xmlFreeParserCtxt(ctxt);
9399 return(NULL);
9400 }
9401
9402 /*
9403 * plug some encoding conversion routines here.
9404 */
9405 xmlPushInput(ctxt, input);
9406 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9407 xmlSwitchEncoding(ctxt, enc);
9408
9409 if (input->filename == NULL)
9410 input->filename = (char *) xmlStrdup(SystemID);
9411 input->line = 1;
9412 input->col = 1;
9413 input->base = ctxt->input->cur;
9414 input->cur = ctxt->input->cur;
9415 input->free = NULL;
9416
9417 /*
9418 * let's parse that entity knowing it's an external subset.
9419 */
9420 ctxt->inSubset = 2;
9421 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9422 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9423 ExternalID, SystemID);
9424 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9425
9426 if (ctxt->myDoc != NULL) {
9427 if (ctxt->wellFormed) {
9428 ret = ctxt->myDoc->extSubset;
9429 ctxt->myDoc->extSubset = NULL;
9430 } else {
9431 ret = NULL;
9432 }
9433 xmlFreeDoc(ctxt->myDoc);
9434 ctxt->myDoc = NULL;
9435 }
9436 if (sax != NULL) ctxt->sax = NULL;
9437 xmlFreeParserCtxt(ctxt);
9438
9439 return(ret);
9440}
9441
9442/**
9443 * xmlParseDTD:
9444 * @ExternalID: a NAME* containing the External ID of the DTD
9445 * @SystemID: a NAME* containing the URL to the DTD
9446 *
9447 * Load and parse an external subset.
9448 *
9449 * Returns the resulting xmlDtdPtr or NULL in case of error.
9450 */
9451
9452xmlDtdPtr
9453xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9454 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9455}
9456
9457/************************************************************************
9458 * *
9459 * Front ends when parsing an Entity *
9460 * *
9461 ************************************************************************/
9462
9463/**
Owen Taylor3473f882001-02-23 17:55:21 +00009464 * xmlParseCtxtExternalEntity:
9465 * @ctx: the existing parsing context
9466 * @URL: the URL for the entity to load
9467 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009468 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009469 *
9470 * Parse an external general entity within an existing parsing context
9471 * An external general parsed entity is well-formed if it matches the
9472 * production labeled extParsedEnt.
9473 *
9474 * [78] extParsedEnt ::= TextDecl? content
9475 *
9476 * Returns 0 if the entity is well formed, -1 in case of args problem and
9477 * the parser error code otherwise
9478 */
9479
9480int
9481xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009482 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009483 xmlParserCtxtPtr ctxt;
9484 xmlDocPtr newDoc;
9485 xmlSAXHandlerPtr oldsax = NULL;
9486 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009487 xmlChar start[4];
9488 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009489
9490 if (ctx->depth > 40) {
9491 return(XML_ERR_ENTITY_LOOP);
9492 }
9493
Daniel Veillardcda96922001-08-21 10:56:31 +00009494 if (lst != NULL)
9495 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009496 if ((URL == NULL) && (ID == NULL))
9497 return(-1);
9498 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9499 return(-1);
9500
9501
9502 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9503 if (ctxt == NULL) return(-1);
9504 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +00009505 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +00009506 oldsax = ctxt->sax;
9507 ctxt->sax = ctx->sax;
9508 newDoc = xmlNewDoc(BAD_CAST "1.0");
9509 if (newDoc == NULL) {
9510 xmlFreeParserCtxt(ctxt);
9511 return(-1);
9512 }
9513 if (ctx->myDoc != NULL) {
9514 newDoc->intSubset = ctx->myDoc->intSubset;
9515 newDoc->extSubset = ctx->myDoc->extSubset;
9516 }
9517 if (ctx->myDoc->URL != NULL) {
9518 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9519 }
9520 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9521 if (newDoc->children == NULL) {
9522 ctxt->sax = oldsax;
9523 xmlFreeParserCtxt(ctxt);
9524 newDoc->intSubset = NULL;
9525 newDoc->extSubset = NULL;
9526 xmlFreeDoc(newDoc);
9527 return(-1);
9528 }
9529 nodePush(ctxt, newDoc->children);
9530 if (ctx->myDoc == NULL) {
9531 ctxt->myDoc = newDoc;
9532 } else {
9533 ctxt->myDoc = ctx->myDoc;
9534 newDoc->children->doc = ctx->myDoc;
9535 }
9536
Daniel Veillard87a764e2001-06-20 17:41:10 +00009537 /*
9538 * Get the 4 first bytes and decode the charset
9539 * if enc != XML_CHAR_ENCODING_NONE
9540 * plug some encoding conversion routines.
9541 */
9542 GROW
9543 start[0] = RAW;
9544 start[1] = NXT(1);
9545 start[2] = NXT(2);
9546 start[3] = NXT(3);
9547 enc = xmlDetectCharEncoding(start, 4);
9548 if (enc != XML_CHAR_ENCODING_NONE) {
9549 xmlSwitchEncoding(ctxt, enc);
9550 }
9551
Owen Taylor3473f882001-02-23 17:55:21 +00009552 /*
9553 * Parse a possible text declaration first
9554 */
Owen Taylor3473f882001-02-23 17:55:21 +00009555 if ((RAW == '<') && (NXT(1) == '?') &&
9556 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9557 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9558 xmlParseTextDecl(ctxt);
9559 }
9560
9561 /*
9562 * Doing validity checking on chunk doesn't make sense
9563 */
9564 ctxt->instate = XML_PARSER_CONTENT;
9565 ctxt->validate = ctx->validate;
9566 ctxt->loadsubset = ctx->loadsubset;
9567 ctxt->depth = ctx->depth + 1;
9568 ctxt->replaceEntities = ctx->replaceEntities;
9569 if (ctxt->validate) {
9570 ctxt->vctxt.error = ctx->vctxt.error;
9571 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009572 } else {
9573 ctxt->vctxt.error = NULL;
9574 ctxt->vctxt.warning = NULL;
9575 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009576 ctxt->vctxt.nodeTab = NULL;
9577 ctxt->vctxt.nodeNr = 0;
9578 ctxt->vctxt.nodeMax = 0;
9579 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009580
9581 xmlParseContent(ctxt);
9582
9583 if ((RAW == '<') && (NXT(1) == '/')) {
9584 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9586 ctxt->sax->error(ctxt->userData,
9587 "chunk is not well balanced\n");
9588 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009589 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009590 } else if (RAW != 0) {
9591 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9593 ctxt->sax->error(ctxt->userData,
9594 "extra content at the end of well balanced chunk\n");
9595 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009596 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009597 }
9598 if (ctxt->node != newDoc->children) {
9599 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9601 ctxt->sax->error(ctxt->userData,
9602 "chunk is not well balanced\n");
9603 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009604 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009605 }
9606
9607 if (!ctxt->wellFormed) {
9608 if (ctxt->errNo == 0)
9609 ret = 1;
9610 else
9611 ret = ctxt->errNo;
9612 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009613 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009614 xmlNodePtr cur;
9615
9616 /*
9617 * Return the newly created nodeset after unlinking it from
9618 * they pseudo parent.
9619 */
9620 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009621 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009622 while (cur != NULL) {
9623 cur->parent = NULL;
9624 cur = cur->next;
9625 }
9626 newDoc->children->children = NULL;
9627 }
9628 ret = 0;
9629 }
9630 ctxt->sax = oldsax;
9631 xmlFreeParserCtxt(ctxt);
9632 newDoc->intSubset = NULL;
9633 newDoc->extSubset = NULL;
9634 xmlFreeDoc(newDoc);
9635
9636 return(ret);
9637}
9638
9639/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009640 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009641 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009642 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009643 * @sax: the SAX handler bloc (possibly NULL)
9644 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9645 * @depth: Used for loop detection, use 0
9646 * @URL: the URL for the entity to load
9647 * @ID: the System ID for the entity to load
9648 * @list: the return value for the set of parsed nodes
9649 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009650 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009651 *
9652 * Returns 0 if the entity is well formed, -1 in case of args problem and
9653 * the parser error code otherwise
9654 */
9655
Daniel Veillard257d9102001-05-08 10:41:44 +00009656static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009657xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9658 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009659 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009660 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009661 xmlParserCtxtPtr ctxt;
9662 xmlDocPtr newDoc;
9663 xmlSAXHandlerPtr oldsax = NULL;
9664 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009665 xmlChar start[4];
9666 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009667
9668 if (depth > 40) {
9669 return(XML_ERR_ENTITY_LOOP);
9670 }
9671
9672
9673
9674 if (list != NULL)
9675 *list = NULL;
9676 if ((URL == NULL) && (ID == NULL))
9677 return(-1);
9678 if (doc == NULL) /* @@ relax but check for dereferences */
9679 return(-1);
9680
9681
9682 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9683 if (ctxt == NULL) return(-1);
9684 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009685 if (oldctxt != NULL) {
9686 ctxt->_private = oldctxt->_private;
9687 ctxt->loadsubset = oldctxt->loadsubset;
9688 ctxt->validate = oldctxt->validate;
9689 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009690 ctxt->record_info = oldctxt->record_info;
9691 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
9692 ctxt->node_seq.length = oldctxt->node_seq.length;
9693 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009694 } else {
9695 /*
9696 * Doing validity checking on chunk without context
9697 * doesn't make sense
9698 */
9699 ctxt->_private = NULL;
9700 ctxt->validate = 0;
9701 ctxt->external = 2;
9702 ctxt->loadsubset = 0;
9703 }
Owen Taylor3473f882001-02-23 17:55:21 +00009704 if (sax != NULL) {
9705 oldsax = ctxt->sax;
9706 ctxt->sax = sax;
9707 if (user_data != NULL)
9708 ctxt->userData = user_data;
9709 }
9710 newDoc = xmlNewDoc(BAD_CAST "1.0");
9711 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009712 ctxt->node_seq.maximum = 0;
9713 ctxt->node_seq.length = 0;
9714 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009715 xmlFreeParserCtxt(ctxt);
9716 return(-1);
9717 }
9718 if (doc != NULL) {
9719 newDoc->intSubset = doc->intSubset;
9720 newDoc->extSubset = doc->extSubset;
9721 }
9722 if (doc->URL != NULL) {
9723 newDoc->URL = xmlStrdup(doc->URL);
9724 }
9725 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9726 if (newDoc->children == NULL) {
9727 if (sax != NULL)
9728 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009729 ctxt->node_seq.maximum = 0;
9730 ctxt->node_seq.length = 0;
9731 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009732 xmlFreeParserCtxt(ctxt);
9733 newDoc->intSubset = NULL;
9734 newDoc->extSubset = NULL;
9735 xmlFreeDoc(newDoc);
9736 return(-1);
9737 }
9738 nodePush(ctxt, newDoc->children);
9739 if (doc == NULL) {
9740 ctxt->myDoc = newDoc;
9741 } else {
9742 ctxt->myDoc = doc;
9743 newDoc->children->doc = doc;
9744 }
9745
Daniel Veillard87a764e2001-06-20 17:41:10 +00009746 /*
9747 * Get the 4 first bytes and decode the charset
9748 * if enc != XML_CHAR_ENCODING_NONE
9749 * plug some encoding conversion routines.
9750 */
9751 GROW;
9752 start[0] = RAW;
9753 start[1] = NXT(1);
9754 start[2] = NXT(2);
9755 start[3] = NXT(3);
9756 enc = xmlDetectCharEncoding(start, 4);
9757 if (enc != XML_CHAR_ENCODING_NONE) {
9758 xmlSwitchEncoding(ctxt, enc);
9759 }
9760
Owen Taylor3473f882001-02-23 17:55:21 +00009761 /*
9762 * Parse a possible text declaration first
9763 */
Owen Taylor3473f882001-02-23 17:55:21 +00009764 if ((RAW == '<') && (NXT(1) == '?') &&
9765 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9766 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9767 xmlParseTextDecl(ctxt);
9768 }
9769
Owen Taylor3473f882001-02-23 17:55:21 +00009770 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009771 ctxt->depth = depth;
9772
9773 xmlParseContent(ctxt);
9774
Daniel Veillard561b7f82002-03-20 21:55:57 +00009775 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009776 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9778 ctxt->sax->error(ctxt->userData,
9779 "chunk is not well balanced\n");
9780 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009781 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009782 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009783 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9785 ctxt->sax->error(ctxt->userData,
9786 "extra content at the end of well balanced chunk\n");
9787 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009788 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009789 }
9790 if (ctxt->node != newDoc->children) {
9791 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9793 ctxt->sax->error(ctxt->userData,
9794 "chunk is not well balanced\n");
9795 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009796 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009797 }
9798
9799 if (!ctxt->wellFormed) {
9800 if (ctxt->errNo == 0)
9801 ret = 1;
9802 else
9803 ret = ctxt->errNo;
9804 } else {
9805 if (list != NULL) {
9806 xmlNodePtr cur;
9807
9808 /*
9809 * Return the newly created nodeset after unlinking it from
9810 * they pseudo parent.
9811 */
9812 cur = newDoc->children->children;
9813 *list = cur;
9814 while (cur != NULL) {
9815 cur->parent = NULL;
9816 cur = cur->next;
9817 }
9818 newDoc->children->children = NULL;
9819 }
9820 ret = 0;
9821 }
9822 if (sax != NULL)
9823 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +00009824 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
9825 oldctxt->node_seq.length = ctxt->node_seq.length;
9826 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009827 ctxt->node_seq.maximum = 0;
9828 ctxt->node_seq.length = 0;
9829 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009830 xmlFreeParserCtxt(ctxt);
9831 newDoc->intSubset = NULL;
9832 newDoc->extSubset = NULL;
9833 xmlFreeDoc(newDoc);
9834
9835 return(ret);
9836}
9837
9838/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009839 * xmlParseExternalEntity:
9840 * @doc: the document the chunk pertains to
9841 * @sax: the SAX handler bloc (possibly NULL)
9842 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9843 * @depth: Used for loop detection, use 0
9844 * @URL: the URL for the entity to load
9845 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009846 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009847 *
9848 * Parse an external general entity
9849 * An external general parsed entity is well-formed if it matches the
9850 * production labeled extParsedEnt.
9851 *
9852 * [78] extParsedEnt ::= TextDecl? content
9853 *
9854 * Returns 0 if the entity is well formed, -1 in case of args problem and
9855 * the parser error code otherwise
9856 */
9857
9858int
9859xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009860 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009861 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009862 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009863}
9864
9865/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009866 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009867 * @doc: the document the chunk pertains to
9868 * @sax: the SAX handler bloc (possibly NULL)
9869 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9870 * @depth: Used for loop detection, use 0
9871 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009872 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009873 *
9874 * Parse a well-balanced chunk of an XML document
9875 * called by the parser
9876 * The allowed sequence for the Well Balanced Chunk is the one defined by
9877 * the content production in the XML grammar:
9878 *
9879 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9880 *
9881 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9882 * the parser error code otherwise
9883 */
9884
9885int
9886xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009887 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009888 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9889 depth, string, lst, 0 );
9890}
9891
9892/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009893 * xmlParseBalancedChunkMemoryInternal:
9894 * @oldctxt: the existing parsing context
9895 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9896 * @user_data: the user data field for the parser context
9897 * @lst: the return value for the set of parsed nodes
9898 *
9899 *
9900 * Parse a well-balanced chunk of an XML document
9901 * called by the parser
9902 * The allowed sequence for the Well Balanced Chunk is the one defined by
9903 * the content production in the XML grammar:
9904 *
9905 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9906 *
9907 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9908 * the parser error code otherwise
9909 *
9910 * In case recover is set to 1, the nodelist will not be empty even if
9911 * the parsed chunk is not well balanced.
9912 */
9913static int
9914xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9915 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9916 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009917 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009918 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009919 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009920 int size;
9921 int ret = 0;
9922
9923 if (oldctxt->depth > 40) {
9924 return(XML_ERR_ENTITY_LOOP);
9925 }
9926
9927
9928 if (lst != NULL)
9929 *lst = NULL;
9930 if (string == NULL)
9931 return(-1);
9932
9933 size = xmlStrlen(string);
9934
9935 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9936 if (ctxt == NULL) return(-1);
9937 if (user_data != NULL)
9938 ctxt->userData = user_data;
9939 else
9940 ctxt->userData = ctxt;
9941
9942 oldsax = ctxt->sax;
9943 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +00009944 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009945 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009946 newDoc = xmlNewDoc(BAD_CAST "1.0");
9947 if (newDoc == NULL) {
9948 ctxt->sax = oldsax;
9949 xmlFreeParserCtxt(ctxt);
9950 return(-1);
9951 }
Daniel Veillard328f48c2002-11-15 15:24:34 +00009952 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009953 } else {
9954 ctxt->myDoc = oldctxt->myDoc;
9955 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009956 }
Daniel Veillard9bc53102002-11-25 13:20:04 +00009957 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +00009958 BAD_CAST "pseudoroot", NULL);
9959 if (ctxt->myDoc->children == NULL) {
9960 ctxt->sax = oldsax;
9961 xmlFreeParserCtxt(ctxt);
9962 if (newDoc != NULL)
9963 xmlFreeDoc(newDoc);
9964 return(-1);
9965 }
9966 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009967 ctxt->instate = XML_PARSER_CONTENT;
9968 ctxt->depth = oldctxt->depth + 1;
9969
Daniel Veillard328f48c2002-11-15 15:24:34 +00009970 ctxt->validate = 0;
9971 ctxt->loadsubset = oldctxt->loadsubset;
9972
Daniel Veillard68e9e742002-11-16 15:35:11 +00009973 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009974 if ((RAW == '<') && (NXT(1) == '/')) {
9975 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9977 ctxt->sax->error(ctxt->userData,
9978 "chunk is not well balanced\n");
9979 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009980 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009981 } else if (RAW != 0) {
9982 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9984 ctxt->sax->error(ctxt->userData,
9985 "extra content at the end of well balanced chunk\n");
9986 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009987 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009988 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009989 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +00009990 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9992 ctxt->sax->error(ctxt->userData,
9993 "chunk is not well balanced\n");
9994 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009995 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009996 }
9997
9998 if (!ctxt->wellFormed) {
9999 if (ctxt->errNo == 0)
10000 ret = 1;
10001 else
10002 ret = ctxt->errNo;
10003 } else {
10004 ret = 0;
10005 }
10006
10007 if ((lst != NULL) && (ret == 0)) {
10008 xmlNodePtr cur;
10009
10010 /*
10011 * Return the newly created nodeset after unlinking it from
10012 * they pseudo parent.
10013 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010014 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010015 *lst = cur;
10016 while (cur != NULL) {
Daniel Veillard8d589042003-02-04 15:07:21 +000010017 if (oldctxt->validate && oldctxt->wellFormed &&
10018 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10019 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10020 oldctxt->myDoc, cur);
10021 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010022 cur->parent = NULL;
10023 cur = cur->next;
10024 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010025 ctxt->myDoc->children->children = NULL;
10026 }
10027 if (ctxt->myDoc != NULL) {
10028 xmlFreeNode(ctxt->myDoc->children);
10029 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010030 }
10031
10032 ctxt->sax = oldsax;
10033 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010034 if (newDoc != NULL)
10035 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010036
10037 return(ret);
10038}
10039
10040/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010041 * xmlParseBalancedChunkMemoryRecover:
10042 * @doc: the document the chunk pertains to
10043 * @sax: the SAX handler bloc (possibly NULL)
10044 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10045 * @depth: Used for loop detection, use 0
10046 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10047 * @lst: the return value for the set of parsed nodes
10048 * @recover: return nodes even if the data is broken (use 0)
10049 *
10050 *
10051 * Parse a well-balanced chunk of an XML document
10052 * called by the parser
10053 * The allowed sequence for the Well Balanced Chunk is the one defined by
10054 * the content production in the XML grammar:
10055 *
10056 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10057 *
10058 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10059 * the parser error code otherwise
10060 *
10061 * In case recover is set to 1, the nodelist will not be empty even if
10062 * the parsed chunk is not well balanced.
10063 */
10064int
10065xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10066 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10067 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010068 xmlParserCtxtPtr ctxt;
10069 xmlDocPtr newDoc;
10070 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010071 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010072 int size;
10073 int ret = 0;
10074
10075 if (depth > 40) {
10076 return(XML_ERR_ENTITY_LOOP);
10077 }
10078
10079
Daniel Veillardcda96922001-08-21 10:56:31 +000010080 if (lst != NULL)
10081 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010082 if (string == NULL)
10083 return(-1);
10084
10085 size = xmlStrlen(string);
10086
10087 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10088 if (ctxt == NULL) return(-1);
10089 ctxt->userData = ctxt;
10090 if (sax != NULL) {
10091 oldsax = ctxt->sax;
10092 ctxt->sax = sax;
10093 if (user_data != NULL)
10094 ctxt->userData = user_data;
10095 }
10096 newDoc = xmlNewDoc(BAD_CAST "1.0");
10097 if (newDoc == NULL) {
10098 xmlFreeParserCtxt(ctxt);
10099 return(-1);
10100 }
10101 if (doc != NULL) {
10102 newDoc->intSubset = doc->intSubset;
10103 newDoc->extSubset = doc->extSubset;
10104 }
10105 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10106 if (newDoc->children == NULL) {
10107 if (sax != NULL)
10108 ctxt->sax = oldsax;
10109 xmlFreeParserCtxt(ctxt);
10110 newDoc->intSubset = NULL;
10111 newDoc->extSubset = NULL;
10112 xmlFreeDoc(newDoc);
10113 return(-1);
10114 }
10115 nodePush(ctxt, newDoc->children);
10116 if (doc == NULL) {
10117 ctxt->myDoc = newDoc;
10118 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010119 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010120 newDoc->children->doc = doc;
10121 }
10122 ctxt->instate = XML_PARSER_CONTENT;
10123 ctxt->depth = depth;
10124
10125 /*
10126 * Doing validity checking on chunk doesn't make sense
10127 */
10128 ctxt->validate = 0;
10129 ctxt->loadsubset = 0;
10130
Daniel Veillardb39bc392002-10-26 19:29:51 +000010131 if ( doc != NULL ){
10132 content = doc->children;
10133 doc->children = NULL;
10134 xmlParseContent(ctxt);
10135 doc->children = content;
10136 }
10137 else {
10138 xmlParseContent(ctxt);
10139 }
Owen Taylor3473f882001-02-23 17:55:21 +000010140 if ((RAW == '<') && (NXT(1) == '/')) {
10141 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10143 ctxt->sax->error(ctxt->userData,
10144 "chunk is not well balanced\n");
10145 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010146 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010147 } else if (RAW != 0) {
10148 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10150 ctxt->sax->error(ctxt->userData,
10151 "extra content at the end of well balanced chunk\n");
10152 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010153 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010154 }
10155 if (ctxt->node != newDoc->children) {
10156 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10158 ctxt->sax->error(ctxt->userData,
10159 "chunk is not well balanced\n");
10160 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010161 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010162 }
10163
10164 if (!ctxt->wellFormed) {
10165 if (ctxt->errNo == 0)
10166 ret = 1;
10167 else
10168 ret = ctxt->errNo;
10169 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010170 ret = 0;
10171 }
10172
10173 if (lst != NULL && (ret == 0 || recover == 1)) {
10174 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010175
10176 /*
10177 * Return the newly created nodeset after unlinking it from
10178 * they pseudo parent.
10179 */
10180 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010181 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010182 while (cur != NULL) {
10183 cur->parent = NULL;
10184 cur = cur->next;
10185 }
10186 newDoc->children->children = NULL;
10187 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010188
Owen Taylor3473f882001-02-23 17:55:21 +000010189 if (sax != NULL)
10190 ctxt->sax = oldsax;
10191 xmlFreeParserCtxt(ctxt);
10192 newDoc->intSubset = NULL;
10193 newDoc->extSubset = NULL;
10194 xmlFreeDoc(newDoc);
10195
10196 return(ret);
10197}
10198
10199/**
10200 * xmlSAXParseEntity:
10201 * @sax: the SAX handler block
10202 * @filename: the filename
10203 *
10204 * parse an XML external entity out of context and build a tree.
10205 * It use the given SAX function block to handle the parsing callback.
10206 * If sax is NULL, fallback to the default DOM tree building routines.
10207 *
10208 * [78] extParsedEnt ::= TextDecl? content
10209 *
10210 * This correspond to a "Well Balanced" chunk
10211 *
10212 * Returns the resulting document tree
10213 */
10214
10215xmlDocPtr
10216xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10217 xmlDocPtr ret;
10218 xmlParserCtxtPtr ctxt;
10219 char *directory = NULL;
10220
10221 ctxt = xmlCreateFileParserCtxt(filename);
10222 if (ctxt == NULL) {
10223 return(NULL);
10224 }
10225 if (sax != NULL) {
10226 if (ctxt->sax != NULL)
10227 xmlFree(ctxt->sax);
10228 ctxt->sax = sax;
10229 ctxt->userData = NULL;
10230 }
10231
10232 if ((ctxt->directory == NULL) && (directory == NULL))
10233 directory = xmlParserGetDirectory(filename);
10234
10235 xmlParseExtParsedEnt(ctxt);
10236
10237 if (ctxt->wellFormed)
10238 ret = ctxt->myDoc;
10239 else {
10240 ret = NULL;
10241 xmlFreeDoc(ctxt->myDoc);
10242 ctxt->myDoc = NULL;
10243 }
10244 if (sax != NULL)
10245 ctxt->sax = NULL;
10246 xmlFreeParserCtxt(ctxt);
10247
10248 return(ret);
10249}
10250
10251/**
10252 * xmlParseEntity:
10253 * @filename: the filename
10254 *
10255 * parse an XML external entity out of context and build a tree.
10256 *
10257 * [78] extParsedEnt ::= TextDecl? content
10258 *
10259 * This correspond to a "Well Balanced" chunk
10260 *
10261 * Returns the resulting document tree
10262 */
10263
10264xmlDocPtr
10265xmlParseEntity(const char *filename) {
10266 return(xmlSAXParseEntity(NULL, filename));
10267}
10268
10269/**
10270 * xmlCreateEntityParserCtxt:
10271 * @URL: the entity URL
10272 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010273 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010274 *
10275 * Create a parser context for an external entity
10276 * Automatic support for ZLIB/Compress compressed document is provided
10277 * by default if found at compile-time.
10278 *
10279 * Returns the new parser context or NULL
10280 */
10281xmlParserCtxtPtr
10282xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10283 const xmlChar *base) {
10284 xmlParserCtxtPtr ctxt;
10285 xmlParserInputPtr inputStream;
10286 char *directory = NULL;
10287 xmlChar *uri;
10288
10289 ctxt = xmlNewParserCtxt();
10290 if (ctxt == NULL) {
10291 return(NULL);
10292 }
10293
10294 uri = xmlBuildURI(URL, base);
10295
10296 if (uri == NULL) {
10297 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10298 if (inputStream == NULL) {
10299 xmlFreeParserCtxt(ctxt);
10300 return(NULL);
10301 }
10302
10303 inputPush(ctxt, inputStream);
10304
10305 if ((ctxt->directory == NULL) && (directory == NULL))
10306 directory = xmlParserGetDirectory((char *)URL);
10307 if ((ctxt->directory == NULL) && (directory != NULL))
10308 ctxt->directory = directory;
10309 } else {
10310 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10311 if (inputStream == NULL) {
10312 xmlFree(uri);
10313 xmlFreeParserCtxt(ctxt);
10314 return(NULL);
10315 }
10316
10317 inputPush(ctxt, inputStream);
10318
10319 if ((ctxt->directory == NULL) && (directory == NULL))
10320 directory = xmlParserGetDirectory((char *)uri);
10321 if ((ctxt->directory == NULL) && (directory != NULL))
10322 ctxt->directory = directory;
10323 xmlFree(uri);
10324 }
10325
10326 return(ctxt);
10327}
10328
10329/************************************************************************
10330 * *
10331 * Front ends when parsing from a file *
10332 * *
10333 ************************************************************************/
10334
10335/**
10336 * xmlCreateFileParserCtxt:
10337 * @filename: the filename
10338 *
10339 * Create a parser context for a file content.
10340 * Automatic support for ZLIB/Compress compressed document is provided
10341 * by default if found at compile-time.
10342 *
10343 * Returns the new parser context or NULL
10344 */
10345xmlParserCtxtPtr
10346xmlCreateFileParserCtxt(const char *filename)
10347{
10348 xmlParserCtxtPtr ctxt;
10349 xmlParserInputPtr inputStream;
Igor Zlatkovicce076162003-02-23 13:39:39 +000010350 char *canonicFilename;
Owen Taylor3473f882001-02-23 17:55:21 +000010351 char *directory = NULL;
10352
Owen Taylor3473f882001-02-23 17:55:21 +000010353 ctxt = xmlNewParserCtxt();
10354 if (ctxt == NULL) {
10355 if (xmlDefaultSAXHandler.error != NULL) {
10356 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10357 }
10358 return(NULL);
10359 }
10360
Daniel Veillardc64b8e92003-02-24 11:47:13 +000010361 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename);
Igor Zlatkovicce076162003-02-23 13:39:39 +000010362 if (canonicFilename == NULL) {
10363 if (xmlDefaultSAXHandler.error != NULL) {
10364 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10365 }
10366 return(NULL);
10367 }
10368
10369 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
10370 xmlFree(canonicFilename);
Owen Taylor3473f882001-02-23 17:55:21 +000010371 if (inputStream == NULL) {
10372 xmlFreeParserCtxt(ctxt);
10373 return(NULL);
10374 }
10375
Owen Taylor3473f882001-02-23 17:55:21 +000010376 inputPush(ctxt, inputStream);
10377 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010378 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010379 if ((ctxt->directory == NULL) && (directory != NULL))
10380 ctxt->directory = directory;
10381
10382 return(ctxt);
10383}
10384
10385/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010386 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010387 * @sax: the SAX handler block
10388 * @filename: the filename
10389 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10390 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010391 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010392 *
10393 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10394 * compressed document is provided by default if found at compile-time.
10395 * It use the given SAX function block to handle the parsing callback.
10396 * If sax is NULL, fallback to the default DOM tree building routines.
10397 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010398 * User data (void *) is stored within the parser context in the
10399 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010400 *
Owen Taylor3473f882001-02-23 17:55:21 +000010401 * Returns the resulting document tree
10402 */
10403
10404xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010405xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10406 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010407 xmlDocPtr ret;
10408 xmlParserCtxtPtr ctxt;
10409 char *directory = NULL;
10410
Daniel Veillard635ef722001-10-29 11:48:19 +000010411 xmlInitParser();
10412
Owen Taylor3473f882001-02-23 17:55:21 +000010413 ctxt = xmlCreateFileParserCtxt(filename);
10414 if (ctxt == NULL) {
10415 return(NULL);
10416 }
10417 if (sax != NULL) {
10418 if (ctxt->sax != NULL)
10419 xmlFree(ctxt->sax);
10420 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010421 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010422 if (data!=NULL) {
10423 ctxt->_private=data;
10424 }
Owen Taylor3473f882001-02-23 17:55:21 +000010425
10426 if ((ctxt->directory == NULL) && (directory == NULL))
10427 directory = xmlParserGetDirectory(filename);
10428 if ((ctxt->directory == NULL) && (directory != NULL))
10429 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10430
Daniel Veillarddad3f682002-11-17 16:47:27 +000010431 ctxt->recovery = recovery;
10432
Owen Taylor3473f882001-02-23 17:55:21 +000010433 xmlParseDocument(ctxt);
10434
10435 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10436 else {
10437 ret = NULL;
10438 xmlFreeDoc(ctxt->myDoc);
10439 ctxt->myDoc = NULL;
10440 }
10441 if (sax != NULL)
10442 ctxt->sax = NULL;
10443 xmlFreeParserCtxt(ctxt);
10444
10445 return(ret);
10446}
10447
10448/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010449 * xmlSAXParseFile:
10450 * @sax: the SAX handler block
10451 * @filename: the filename
10452 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10453 * documents
10454 *
10455 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10456 * compressed document is provided by default if found at compile-time.
10457 * It use the given SAX function block to handle the parsing callback.
10458 * If sax is NULL, fallback to the default DOM tree building routines.
10459 *
10460 * Returns the resulting document tree
10461 */
10462
10463xmlDocPtr
10464xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10465 int recovery) {
10466 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10467}
10468
10469/**
Owen Taylor3473f882001-02-23 17:55:21 +000010470 * xmlRecoverDoc:
10471 * @cur: a pointer to an array of xmlChar
10472 *
10473 * parse an XML in-memory document and build a tree.
10474 * In the case the document is not Well Formed, a tree is built anyway
10475 *
10476 * Returns the resulting document tree
10477 */
10478
10479xmlDocPtr
10480xmlRecoverDoc(xmlChar *cur) {
10481 return(xmlSAXParseDoc(NULL, cur, 1));
10482}
10483
10484/**
10485 * xmlParseFile:
10486 * @filename: the filename
10487 *
10488 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10489 * compressed document is provided by default if found at compile-time.
10490 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010491 * Returns the resulting document tree if the file was wellformed,
10492 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010493 */
10494
10495xmlDocPtr
10496xmlParseFile(const char *filename) {
10497 return(xmlSAXParseFile(NULL, filename, 0));
10498}
10499
10500/**
10501 * xmlRecoverFile:
10502 * @filename: the filename
10503 *
10504 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10505 * compressed document is provided by default if found at compile-time.
10506 * In the case the document is not Well Formed, a tree is built anyway
10507 *
10508 * Returns the resulting document tree
10509 */
10510
10511xmlDocPtr
10512xmlRecoverFile(const char *filename) {
10513 return(xmlSAXParseFile(NULL, filename, 1));
10514}
10515
10516
10517/**
10518 * xmlSetupParserForBuffer:
10519 * @ctxt: an XML parser context
10520 * @buffer: a xmlChar * buffer
10521 * @filename: a file name
10522 *
10523 * Setup the parser context to parse a new buffer; Clears any prior
10524 * contents from the parser context. The buffer parameter must not be
10525 * NULL, but the filename parameter can be
10526 */
10527void
10528xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10529 const char* filename)
10530{
10531 xmlParserInputPtr input;
10532
10533 input = xmlNewInputStream(ctxt);
10534 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010535 xmlGenericError(xmlGenericErrorContext,
10536 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010537 xmlFree(ctxt);
10538 return;
10539 }
10540
10541 xmlClearParserCtxt(ctxt);
10542 if (filename != NULL)
10543 input->filename = xmlMemStrdup(filename);
10544 input->base = buffer;
10545 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010546 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010547 inputPush(ctxt, input);
10548}
10549
10550/**
10551 * xmlSAXUserParseFile:
10552 * @sax: a SAX handler
10553 * @user_data: The user data returned on SAX callbacks
10554 * @filename: a file name
10555 *
10556 * parse an XML file and call the given SAX handler routines.
10557 * Automatic support for ZLIB/Compress compressed document is provided
10558 *
10559 * Returns 0 in case of success or a error number otherwise
10560 */
10561int
10562xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10563 const char *filename) {
10564 int ret = 0;
10565 xmlParserCtxtPtr ctxt;
10566
10567 ctxt = xmlCreateFileParserCtxt(filename);
10568 if (ctxt == NULL) return -1;
10569 if (ctxt->sax != &xmlDefaultSAXHandler)
10570 xmlFree(ctxt->sax);
10571 ctxt->sax = sax;
10572 if (user_data != NULL)
10573 ctxt->userData = user_data;
10574
10575 xmlParseDocument(ctxt);
10576
10577 if (ctxt->wellFormed)
10578 ret = 0;
10579 else {
10580 if (ctxt->errNo != 0)
10581 ret = ctxt->errNo;
10582 else
10583 ret = -1;
10584 }
10585 if (sax != NULL)
10586 ctxt->sax = NULL;
10587 xmlFreeParserCtxt(ctxt);
10588
10589 return ret;
10590}
10591
10592/************************************************************************
10593 * *
10594 * Front ends when parsing from memory *
10595 * *
10596 ************************************************************************/
10597
10598/**
10599 * xmlCreateMemoryParserCtxt:
10600 * @buffer: a pointer to a char array
10601 * @size: the size of the array
10602 *
10603 * Create a parser context for an XML in-memory document.
10604 *
10605 * Returns the new parser context or NULL
10606 */
10607xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010608xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010609 xmlParserCtxtPtr ctxt;
10610 xmlParserInputPtr input;
10611 xmlParserInputBufferPtr buf;
10612
10613 if (buffer == NULL)
10614 return(NULL);
10615 if (size <= 0)
10616 return(NULL);
10617
10618 ctxt = xmlNewParserCtxt();
10619 if (ctxt == NULL)
10620 return(NULL);
10621
10622 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010623 if (buf == NULL) {
10624 xmlFreeParserCtxt(ctxt);
10625 return(NULL);
10626 }
Owen Taylor3473f882001-02-23 17:55:21 +000010627
10628 input = xmlNewInputStream(ctxt);
10629 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010630 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010631 xmlFreeParserCtxt(ctxt);
10632 return(NULL);
10633 }
10634
10635 input->filename = NULL;
10636 input->buf = buf;
10637 input->base = input->buf->buffer->content;
10638 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010639 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010640
10641 inputPush(ctxt, input);
10642 return(ctxt);
10643}
10644
10645/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010646 * xmlSAXParseMemoryWithData:
10647 * @sax: the SAX handler block
10648 * @buffer: an pointer to a char array
10649 * @size: the size of the array
10650 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10651 * documents
10652 * @data: the userdata
10653 *
10654 * parse an XML in-memory block and use the given SAX function block
10655 * to handle the parsing callback. If sax is NULL, fallback to the default
10656 * DOM tree building routines.
10657 *
10658 * User data (void *) is stored within the parser context in the
10659 * context's _private member, so it is available nearly everywhere in libxml
10660 *
10661 * Returns the resulting document tree
10662 */
10663
10664xmlDocPtr
10665xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10666 int size, int recovery, void *data) {
10667 xmlDocPtr ret;
10668 xmlParserCtxtPtr ctxt;
10669
10670 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10671 if (ctxt == NULL) return(NULL);
10672 if (sax != NULL) {
10673 if (ctxt->sax != NULL)
10674 xmlFree(ctxt->sax);
10675 ctxt->sax = sax;
10676 }
10677 if (data!=NULL) {
10678 ctxt->_private=data;
10679 }
10680
10681 xmlParseDocument(ctxt);
10682
10683 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10684 else {
10685 ret = NULL;
10686 xmlFreeDoc(ctxt->myDoc);
10687 ctxt->myDoc = NULL;
10688 }
10689 if (sax != NULL)
10690 ctxt->sax = NULL;
10691 xmlFreeParserCtxt(ctxt);
10692
10693 return(ret);
10694}
10695
10696/**
Owen Taylor3473f882001-02-23 17:55:21 +000010697 * xmlSAXParseMemory:
10698 * @sax: the SAX handler block
10699 * @buffer: an pointer to a char array
10700 * @size: the size of the array
10701 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10702 * documents
10703 *
10704 * parse an XML in-memory block and use the given SAX function block
10705 * to handle the parsing callback. If sax is NULL, fallback to the default
10706 * DOM tree building routines.
10707 *
10708 * Returns the resulting document tree
10709 */
10710xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010711xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10712 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010713 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010714}
10715
10716/**
10717 * xmlParseMemory:
10718 * @buffer: an pointer to a char array
10719 * @size: the size of the array
10720 *
10721 * parse an XML in-memory block and build a tree.
10722 *
10723 * Returns the resulting document tree
10724 */
10725
Daniel Veillard50822cb2001-07-26 20:05:51 +000010726xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010727 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10728}
10729
10730/**
10731 * xmlRecoverMemory:
10732 * @buffer: an pointer to a char array
10733 * @size: the size of the array
10734 *
10735 * parse an XML in-memory block and build a tree.
10736 * In the case the document is not Well Formed, a tree is built anyway
10737 *
10738 * Returns the resulting document tree
10739 */
10740
Daniel Veillard50822cb2001-07-26 20:05:51 +000010741xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010742 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10743}
10744
10745/**
10746 * xmlSAXUserParseMemory:
10747 * @sax: a SAX handler
10748 * @user_data: The user data returned on SAX callbacks
10749 * @buffer: an in-memory XML document input
10750 * @size: the length of the XML document in bytes
10751 *
10752 * A better SAX parsing routine.
10753 * parse an XML in-memory buffer and call the given SAX handler routines.
10754 *
10755 * Returns 0 in case of success or a error number otherwise
10756 */
10757int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010758 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010759 int ret = 0;
10760 xmlParserCtxtPtr ctxt;
10761 xmlSAXHandlerPtr oldsax = NULL;
10762
Daniel Veillard9e923512002-08-14 08:48:52 +000010763 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010764 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10765 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010766 oldsax = ctxt->sax;
10767 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010768 if (user_data != NULL)
10769 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010770
10771 xmlParseDocument(ctxt);
10772
10773 if (ctxt->wellFormed)
10774 ret = 0;
10775 else {
10776 if (ctxt->errNo != 0)
10777 ret = ctxt->errNo;
10778 else
10779 ret = -1;
10780 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010781 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010782 xmlFreeParserCtxt(ctxt);
10783
10784 return ret;
10785}
10786
10787/**
10788 * xmlCreateDocParserCtxt:
10789 * @cur: a pointer to an array of xmlChar
10790 *
10791 * Creates a parser context for an XML in-memory document.
10792 *
10793 * Returns the new parser context or NULL
10794 */
10795xmlParserCtxtPtr
10796xmlCreateDocParserCtxt(xmlChar *cur) {
10797 int len;
10798
10799 if (cur == NULL)
10800 return(NULL);
10801 len = xmlStrlen(cur);
10802 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10803}
10804
10805/**
10806 * xmlSAXParseDoc:
10807 * @sax: the SAX handler block
10808 * @cur: a pointer to an array of xmlChar
10809 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10810 * documents
10811 *
10812 * parse an XML in-memory document and build a tree.
10813 * It use the given SAX function block to handle the parsing callback.
10814 * If sax is NULL, fallback to the default DOM tree building routines.
10815 *
10816 * Returns the resulting document tree
10817 */
10818
10819xmlDocPtr
10820xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10821 xmlDocPtr ret;
10822 xmlParserCtxtPtr ctxt;
10823
10824 if (cur == NULL) return(NULL);
10825
10826
10827 ctxt = xmlCreateDocParserCtxt(cur);
10828 if (ctxt == NULL) return(NULL);
10829 if (sax != NULL) {
10830 ctxt->sax = sax;
10831 ctxt->userData = NULL;
10832 }
10833
10834 xmlParseDocument(ctxt);
10835 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10836 else {
10837 ret = NULL;
10838 xmlFreeDoc(ctxt->myDoc);
10839 ctxt->myDoc = NULL;
10840 }
10841 if (sax != NULL)
10842 ctxt->sax = NULL;
10843 xmlFreeParserCtxt(ctxt);
10844
10845 return(ret);
10846}
10847
10848/**
10849 * xmlParseDoc:
10850 * @cur: a pointer to an array of xmlChar
10851 *
10852 * parse an XML in-memory document and build a tree.
10853 *
10854 * Returns the resulting document tree
10855 */
10856
10857xmlDocPtr
10858xmlParseDoc(xmlChar *cur) {
10859 return(xmlSAXParseDoc(NULL, cur, 0));
10860}
10861
Daniel Veillard8107a222002-01-13 14:10:10 +000010862/************************************************************************
10863 * *
10864 * Specific function to keep track of entities references *
10865 * and used by the XSLT debugger *
10866 * *
10867 ************************************************************************/
10868
10869static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10870
10871/**
10872 * xmlAddEntityReference:
10873 * @ent : A valid entity
10874 * @firstNode : A valid first node for children of entity
10875 * @lastNode : A valid last node of children entity
10876 *
10877 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10878 */
10879static void
10880xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10881 xmlNodePtr lastNode)
10882{
10883 if (xmlEntityRefFunc != NULL) {
10884 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10885 }
10886}
10887
10888
10889/**
10890 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000010891 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000010892 *
10893 * Set the function to call call back when a xml reference has been made
10894 */
10895void
10896xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10897{
10898 xmlEntityRefFunc = func;
10899}
Owen Taylor3473f882001-02-23 17:55:21 +000010900
10901/************************************************************************
10902 * *
10903 * Miscellaneous *
10904 * *
10905 ************************************************************************/
10906
10907#ifdef LIBXML_XPATH_ENABLED
10908#include <libxml/xpath.h>
10909#endif
10910
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010911extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010912static int xmlParserInitialized = 0;
10913
10914/**
10915 * xmlInitParser:
10916 *
10917 * Initialization function for the XML parser.
10918 * This is not reentrant. Call once before processing in case of
10919 * use in multithreaded programs.
10920 */
10921
10922void
10923xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010924 if (xmlParserInitialized != 0)
10925 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010926
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010927 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10928 (xmlGenericError == NULL))
10929 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010930 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010931 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010932 xmlInitCharEncodingHandlers();
10933 xmlInitializePredefinedEntities();
10934 xmlDefaultSAXHandlerInit();
10935 xmlRegisterDefaultInputCallbacks();
10936 xmlRegisterDefaultOutputCallbacks();
10937#ifdef LIBXML_HTML_ENABLED
10938 htmlInitAutoClose();
10939 htmlDefaultSAXHandlerInit();
10940#endif
10941#ifdef LIBXML_XPATH_ENABLED
10942 xmlXPathInit();
10943#endif
10944 xmlParserInitialized = 1;
10945}
10946
10947/**
10948 * xmlCleanupParser:
10949 *
10950 * Cleanup function for the XML parser. It tries to reclaim all
10951 * parsing related global memory allocated for the parser processing.
10952 * It doesn't deallocate any document related memory. Calling this
10953 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000010954 * One should call xmlCleanupParser() only when the process has
10955 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000010956 */
10957
10958void
10959xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010960 xmlCleanupCharEncodingHandlers();
10961 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010962#ifdef LIBXML_CATALOG_ENABLED
10963 xmlCatalogCleanup();
10964#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010965 xmlCleanupThreads();
10966 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010967}