blob: ba9327cef31be78dd3a3780fbaba910acdeccbdb [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard21a0f912001-02-25 19:54:14 +000088#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000089#define XML_PARSER_BUFFER_SIZE 100
90
Daniel Veillard5997aca2002-03-18 18:36:20 +000091#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
92
Owen Taylor3473f882001-02-23 17:55:21 +000093/*
Owen Taylor3473f882001-02-23 17:55:21 +000094 * List of XML prefixed PI allowed by W3C specs
95 */
96
Daniel Veillardb44025c2001-10-11 22:55:55 +000097static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000098 "xml-stylesheet",
99 NULL
100};
101
102/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000103xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
104 const xmlChar **str);
105
Daniel Veillard257d9102001-05-08 10:41:44 +0000106static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000107xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
108 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000109 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000111
Daniel Veillard8107a222002-01-13 14:10:10 +0000112static void
113xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
114 xmlNodePtr lastNode);
115
Daniel Veillard328f48c2002-11-15 15:24:34 +0000116static int
117xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
118 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000119/************************************************************************
120 * *
121 * Parser stacks related functions and macros *
122 * *
123 ************************************************************************/
124
125xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
126 const xmlChar ** str);
127
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128/**
129 * inputPush:
130 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000131 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000132 *
133 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000134 *
135 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000137extern int
138inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
139{
140 if (ctxt->inputNr >= ctxt->inputMax) {
141 ctxt->inputMax *= 2;
142 ctxt->inputTab =
143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
144 ctxt->inputMax *
145 sizeof(ctxt->inputTab[0]));
146 if (ctxt->inputTab == NULL) {
147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
148 return (0);
149 }
150 }
151 ctxt->inputTab[ctxt->inputNr] = value;
152 ctxt->input = value;
153 return (ctxt->inputNr++);
154}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000155/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000156 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000157 * @ctxt: an XML parser context
158 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000159 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000160 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000161 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000163extern xmlParserInputPtr
164inputPop(xmlParserCtxtPtr ctxt)
165{
166 xmlParserInputPtr ret;
167
168 if (ctxt->inputNr <= 0)
169 return (0);
170 ctxt->inputNr--;
171 if (ctxt->inputNr > 0)
172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
173 else
174 ctxt->input = NULL;
175 ret = ctxt->inputTab[ctxt->inputNr];
176 ctxt->inputTab[ctxt->inputNr] = 0;
177 return (ret);
178}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179/**
180 * nodePush:
181 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000182 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 *
184 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000185 *
186 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000187 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000188extern int
189nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
190{
191 if (ctxt->nodeNr >= ctxt->nodeMax) {
192 ctxt->nodeMax *= 2;
193 ctxt->nodeTab =
194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
195 ctxt->nodeMax *
196 sizeof(ctxt->nodeTab[0]));
197 if (ctxt->nodeTab == NULL) {
198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
199 return (0);
200 }
201 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202#ifdef MAX_DEPTH
203 if (ctxt->nodeNr > MAX_DEPTH) {
204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
205 ctxt->sax->error(ctxt->userData,
206 "Excessive depth in document: change MAX_DEPTH = %d\n",
207 MAX_DEPTH);
208 ctxt->wellFormed = 0;
209 ctxt->instate = XML_PARSER_EOF;
210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
211 return(0);
212 }
213#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000214 ctxt->nodeTab[ctxt->nodeNr] = value;
215 ctxt->node = value;
216 return (ctxt->nodeNr++);
217}
218/**
219 * nodePop:
220 * @ctxt: an XML parser context
221 *
222 * Pops the top element node from the node stack
223 *
224 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000226extern xmlNodePtr
227nodePop(xmlParserCtxtPtr ctxt)
228{
229 xmlNodePtr ret;
230
231 if (ctxt->nodeNr <= 0)
232 return (0);
233 ctxt->nodeNr--;
234 if (ctxt->nodeNr > 0)
235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
236 else
237 ctxt->node = NULL;
238 ret = ctxt->nodeTab[ctxt->nodeNr];
239 ctxt->nodeTab[ctxt->nodeNr] = 0;
240 return (ret);
241}
242/**
243 * namePush:
244 * @ctxt: an XML parser context
245 * @value: the element name
246 *
247 * Pushes a new element name on top of the name stack
248 *
249 * Returns 0 in case of error, the index in the stack otherwise
250 */
251extern int
252namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
253{
254 if (ctxt->nameNr >= ctxt->nameMax) {
255 ctxt->nameMax *= 2;
256 ctxt->nameTab =
257 (xmlChar * *)xmlRealloc(ctxt->nameTab,
258 ctxt->nameMax *
259 sizeof(ctxt->nameTab[0]));
260 if (ctxt->nameTab == NULL) {
261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
262 return (0);
263 }
264 }
265 ctxt->nameTab[ctxt->nameNr] = value;
266 ctxt->name = value;
267 return (ctxt->nameNr++);
268}
269/**
270 * namePop:
271 * @ctxt: an XML parser context
272 *
273 * Pops the top element name from the name stack
274 *
275 * Returns the name just removed
276 */
277extern xmlChar *
278namePop(xmlParserCtxtPtr ctxt)
279{
280 xmlChar *ret;
281
282 if (ctxt->nameNr <= 0)
283 return (0);
284 ctxt->nameNr--;
285 if (ctxt->nameNr > 0)
286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
287 else
288 ctxt->name = NULL;
289 ret = ctxt->nameTab[ctxt->nameNr];
290 ctxt->nameTab[ctxt->nameNr] = 0;
291 return (ret);
292}
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000294static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000295 if (ctxt->spaceNr >= ctxt->spaceMax) {
296 ctxt->spaceMax *= 2;
297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
299 if (ctxt->spaceTab == NULL) {
300 xmlGenericError(xmlGenericErrorContext,
301 "realloc failed !\n");
302 return(0);
303 }
304 }
305 ctxt->spaceTab[ctxt->spaceNr] = val;
306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
307 return(ctxt->spaceNr++);
308}
309
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000311 int ret;
312 if (ctxt->spaceNr <= 0) return(0);
313 ctxt->spaceNr--;
314 if (ctxt->spaceNr > 0)
315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
316 else
317 ctxt->space = NULL;
318 ret = ctxt->spaceTab[ctxt->spaceNr];
319 ctxt->spaceTab[ctxt->spaceNr] = -1;
320 return(ret);
321}
322
323/*
324 * Macros for accessing the content. Those should be used only by the parser,
325 * and not exported.
326 *
327 * Dirty macros, i.e. one often need to make assumption on the context to
328 * use them
329 *
330 * CUR_PTR return the current pointer to the xmlChar to be parsed.
331 * To be used with extreme caution since operations consuming
332 * characters may move the input buffer to a different location !
333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
334 * This should be used internally by the parser
335 * only to compare to ASCII values otherwise it would break when
336 * running with UTF-8 encoding.
337 * RAW same as CUR but in the input buffer, bypass any token
338 * extraction that may have been done
339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
340 * to compare on ASCII based substring.
341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
342 * strings within the parser.
343 *
344 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
345 *
346 * NEXT Skip to the next character, this does the proper decoding
347 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000348 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000349 * CUR_CHAR(l) returns the current unicode character (int), set l
350 * to the number of xmlChars used for the encoding [0-5].
351 * CUR_SCHAR same but operate on a string instead of the context
352 * COPY_BUF copy the current unicode char to the target buffer, increment
353 * the index
354 * GROW, SHRINK handling of input buffers
355 */
356
Daniel Veillardfdc91562002-07-01 21:52:03 +0000357#define RAW (*ctxt->input->cur)
358#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000359#define NXT(val) ctxt->input->cur[(val)]
360#define CUR_PTR ctxt->input->cur
361
362#define SKIP(val) do { \
363 ctxt->nbChars += (val),ctxt->input->cur += (val); \
364 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000365 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000366 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
367 xmlPopInput(ctxt); \
368 } while (0)
369
Daniel Veillard46de64e2002-05-29 08:21:33 +0000370#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
371 xmlSHRINK (ctxt);
372
373static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
374 xmlParserInputShrink(ctxt->input);
375 if ((*ctxt->input->cur == 0) &&
376 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
377 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000378 }
Owen Taylor3473f882001-02-23 17:55:21 +0000379
Daniel Veillard46de64e2002-05-29 08:21:33 +0000380#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
381 xmlGROW (ctxt);
382
383static void xmlGROW (xmlParserCtxtPtr ctxt) {
384 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
385 if ((*ctxt->input->cur == 0) &&
386 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
387 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000388 }
Owen Taylor3473f882001-02-23 17:55:21 +0000389
390#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
391
392#define NEXT xmlNextChar(ctxt)
393
Daniel Veillard21a0f912001-02-25 19:54:14 +0000394#define NEXT1 { \
395 ctxt->input->cur++; \
396 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000397 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000398 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
399 }
400
Owen Taylor3473f882001-02-23 17:55:21 +0000401#define NEXTL(l) do { \
402 if (*(ctxt->input->cur) == '\n') { \
403 ctxt->input->line++; ctxt->input->col = 1; \
404 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000405 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000406 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000407 } while (0)
408
409#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
410#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
411
412#define COPY_BUF(l,b,i,v) \
413 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000414 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000415
416/**
417 * xmlSkipBlankChars:
418 * @ctxt: the XML parser context
419 *
420 * skip all blanks character found at that point in the input streams.
421 * It pops up finished entities in the process if allowable at that point.
422 *
423 * Returns the number of space chars skipped
424 */
425
426int
427xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000428 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000429
430 /*
431 * It's Okay to use CUR/NEXT here since all the blanks are on
432 * the ASCII range.
433 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000434 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
435 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000436 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000437 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000438 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000439 cur = ctxt->input->cur;
440 while (IS_BLANK(*cur)) {
441 if (*cur == '\n') {
442 ctxt->input->line++; ctxt->input->col = 1;
443 }
444 cur++;
445 res++;
446 if (*cur == 0) {
447 ctxt->input->cur = cur;
448 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
449 cur = ctxt->input->cur;
450 }
451 }
452 ctxt->input->cur = cur;
453 } else {
454 int cur;
455 do {
456 cur = CUR;
457 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
458 NEXT;
459 cur = CUR;
460 res++;
461 }
462 while ((cur == 0) && (ctxt->inputNr > 1) &&
463 (ctxt->instate != XML_PARSER_COMMENT)) {
464 xmlPopInput(ctxt);
465 cur = CUR;
466 }
467 /*
468 * Need to handle support of entities branching here
469 */
470 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
471 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
472 }
Owen Taylor3473f882001-02-23 17:55:21 +0000473 return(res);
474}
475
476/************************************************************************
477 * *
478 * Commodity functions to handle entities *
479 * *
480 ************************************************************************/
481
482/**
483 * xmlPopInput:
484 * @ctxt: an XML parser context
485 *
486 * xmlPopInput: the current input pointed by ctxt->input came to an end
487 * pop it and return the next char.
488 *
489 * Returns the current xmlChar in the parser context
490 */
491xmlChar
492xmlPopInput(xmlParserCtxtPtr ctxt) {
493 if (ctxt->inputNr == 1) return(0); /* End of main Input */
494 if (xmlParserDebugEntities)
495 xmlGenericError(xmlGenericErrorContext,
496 "Popping input %d\n", ctxt->inputNr);
497 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000498 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000499 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
500 return(xmlPopInput(ctxt));
501 return(CUR);
502}
503
504/**
505 * xmlPushInput:
506 * @ctxt: an XML parser context
507 * @input: an XML parser input fragment (entity, XML fragment ...).
508 *
509 * xmlPushInput: switch to a new input stream which is stacked on top
510 * of the previous one(s).
511 */
512void
513xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
514 if (input == NULL) return;
515
516 if (xmlParserDebugEntities) {
517 if ((ctxt->input != NULL) && (ctxt->input->filename))
518 xmlGenericError(xmlGenericErrorContext,
519 "%s(%d): ", ctxt->input->filename,
520 ctxt->input->line);
521 xmlGenericError(xmlGenericErrorContext,
522 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
523 }
524 inputPush(ctxt, input);
525 GROW;
526}
527
528/**
529 * xmlParseCharRef:
530 * @ctxt: an XML parser context
531 *
532 * parse Reference declarations
533 *
534 * [66] CharRef ::= '&#' [0-9]+ ';' |
535 * '&#x' [0-9a-fA-F]+ ';'
536 *
537 * [ WFC: Legal Character ]
538 * Characters referred to using character references must match the
539 * production for Char.
540 *
541 * Returns the value parsed (as an int), 0 in case of error
542 */
543int
544xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000545 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000546 int count = 0;
547
Owen Taylor3473f882001-02-23 17:55:21 +0000548 /*
549 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
550 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000551 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000552 (NXT(2) == 'x')) {
553 SKIP(3);
554 GROW;
555 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000556 if (count++ > 20) {
557 count = 0;
558 GROW;
559 }
560 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000561 val = val * 16 + (CUR - '0');
562 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
563 val = val * 16 + (CUR - 'a') + 10;
564 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
565 val = val * 16 + (CUR - 'A') + 10;
566 else {
567 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
569 ctxt->sax->error(ctxt->userData,
570 "xmlParseCharRef: invalid hexadecimal value\n");
571 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000572 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000573 val = 0;
574 break;
575 }
576 NEXT;
577 count++;
578 }
579 if (RAW == ';') {
580 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
581 ctxt->nbChars ++;
582 ctxt->input->cur++;
583 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000584 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000585 SKIP(2);
586 GROW;
587 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000588 if (count++ > 20) {
589 count = 0;
590 GROW;
591 }
592 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000593 val = val * 10 + (CUR - '0');
594 else {
595 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
597 ctxt->sax->error(ctxt->userData,
598 "xmlParseCharRef: invalid decimal value\n");
599 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000600 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000601 val = 0;
602 break;
603 }
604 NEXT;
605 count++;
606 }
607 if (RAW == ';') {
608 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
609 ctxt->nbChars ++;
610 ctxt->input->cur++;
611 }
612 } else {
613 ctxt->errNo = XML_ERR_INVALID_CHARREF;
614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
615 ctxt->sax->error(ctxt->userData,
616 "xmlParseCharRef: invalid value\n");
617 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000618 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000619 }
620
621 /*
622 * [ WFC: Legal Character ]
623 * Characters referred to using character references must match the
624 * production for Char.
625 */
626 if (IS_CHAR(val)) {
627 return(val);
628 } else {
629 ctxt->errNo = XML_ERR_INVALID_CHAR;
630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000631 ctxt->sax->error(ctxt->userData,
632 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000633 val);
634 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000635 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000636 }
637 return(0);
638}
639
640/**
641 * xmlParseStringCharRef:
642 * @ctxt: an XML parser context
643 * @str: a pointer to an index in the string
644 *
645 * parse Reference declarations, variant parsing from a string rather
646 * than an an input flow.
647 *
648 * [66] CharRef ::= '&#' [0-9]+ ';' |
649 * '&#x' [0-9a-fA-F]+ ';'
650 *
651 * [ WFC: Legal Character ]
652 * Characters referred to using character references must match the
653 * production for Char.
654 *
655 * Returns the value parsed (as an int), 0 in case of error, str will be
656 * updated to the current value of the index
657 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000658static int
Owen Taylor3473f882001-02-23 17:55:21 +0000659xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
660 const xmlChar *ptr;
661 xmlChar cur;
662 int val = 0;
663
664 if ((str == NULL) || (*str == NULL)) return(0);
665 ptr = *str;
666 cur = *ptr;
667 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
668 ptr += 3;
669 cur = *ptr;
670 while (cur != ';') { /* Non input consuming loop */
671 if ((cur >= '0') && (cur <= '9'))
672 val = val * 16 + (cur - '0');
673 else if ((cur >= 'a') && (cur <= 'f'))
674 val = val * 16 + (cur - 'a') + 10;
675 else if ((cur >= 'A') && (cur <= 'F'))
676 val = val * 16 + (cur - 'A') + 10;
677 else {
678 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
680 ctxt->sax->error(ctxt->userData,
681 "xmlParseStringCharRef: invalid hexadecimal value\n");
682 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000683 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000684 val = 0;
685 break;
686 }
687 ptr++;
688 cur = *ptr;
689 }
690 if (cur == ';')
691 ptr++;
692 } else if ((cur == '&') && (ptr[1] == '#')){
693 ptr += 2;
694 cur = *ptr;
695 while (cur != ';') { /* Non input consuming loops */
696 if ((cur >= '0') && (cur <= '9'))
697 val = val * 10 + (cur - '0');
698 else {
699 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
701 ctxt->sax->error(ctxt->userData,
702 "xmlParseStringCharRef: invalid decimal value\n");
703 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000704 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000705 val = 0;
706 break;
707 }
708 ptr++;
709 cur = *ptr;
710 }
711 if (cur == ';')
712 ptr++;
713 } else {
714 ctxt->errNo = XML_ERR_INVALID_CHARREF;
715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
716 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000717 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000718 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000719 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000720 return(0);
721 }
722 *str = ptr;
723
724 /*
725 * [ WFC: Legal Character ]
726 * Characters referred to using character references must match the
727 * production for Char.
728 */
729 if (IS_CHAR(val)) {
730 return(val);
731 } else {
732 ctxt->errNo = XML_ERR_INVALID_CHAR;
733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
734 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000735 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000736 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000737 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000738 }
739 return(0);
740}
741
742/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000743 * xmlNewBlanksWrapperInputStream:
744 * @ctxt: an XML parser context
745 * @entity: an Entity pointer
746 *
747 * Create a new input stream for wrapping
748 * blanks around a PEReference
749 *
750 * Returns the new input stream or NULL
751 */
752
753static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
754
Daniel Veillardf4862f02002-09-10 11:13:43 +0000755static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000756xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
757 xmlParserInputPtr input;
758 xmlChar *buffer;
759 size_t length;
760 if (entity == NULL) {
761 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
763 ctxt->sax->error(ctxt->userData,
764 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
765 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
766 return(NULL);
767 }
768 if (xmlParserDebugEntities)
769 xmlGenericError(xmlGenericErrorContext,
770 "new blanks wrapper for entity: %s\n", entity->name);
771 input = xmlNewInputStream(ctxt);
772 if (input == NULL) {
773 return(NULL);
774 }
775 length = xmlStrlen(entity->name) + 5;
776 buffer = xmlMalloc(length);
777 if (buffer == NULL) {
778 return(NULL);
779 }
780 buffer [0] = ' ';
781 buffer [1] = '%';
782 buffer [length-3] = ';';
783 buffer [length-2] = ' ';
784 buffer [length-1] = 0;
785 memcpy(buffer + 2, entity->name, length - 5);
786 input->free = deallocblankswrapper;
787 input->base = buffer;
788 input->cur = buffer;
789 input->length = length;
790 input->end = &buffer[length];
791 return(input);
792}
793
794/**
Owen Taylor3473f882001-02-23 17:55:21 +0000795 * xmlParserHandlePEReference:
796 * @ctxt: the parser context
797 *
798 * [69] PEReference ::= '%' Name ';'
799 *
800 * [ WFC: No Recursion ]
801 * A parsed entity must not contain a recursive
802 * reference to itself, either directly or indirectly.
803 *
804 * [ WFC: Entity Declared ]
805 * In a document without any DTD, a document with only an internal DTD
806 * subset which contains no parameter entity references, or a document
807 * with "standalone='yes'", ... ... The declaration of a parameter
808 * entity must precede any reference to it...
809 *
810 * [ VC: Entity Declared ]
811 * In a document with an external subset or external parameter entities
812 * with "standalone='no'", ... ... The declaration of a parameter entity
813 * must precede any reference to it...
814 *
815 * [ WFC: In DTD ]
816 * Parameter-entity references may only appear in the DTD.
817 * NOTE: misleading but this is handled.
818 *
819 * A PEReference may have been detected in the current input stream
820 * the handling is done accordingly to
821 * http://www.w3.org/TR/REC-xml#entproc
822 * i.e.
823 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000824 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000825 */
826void
827xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
828 xmlChar *name;
829 xmlEntityPtr entity = NULL;
830 xmlParserInputPtr input;
831
Owen Taylor3473f882001-02-23 17:55:21 +0000832 if (RAW != '%') return;
833 switch(ctxt->instate) {
834 case XML_PARSER_CDATA_SECTION:
835 return;
836 case XML_PARSER_COMMENT:
837 return;
838 case XML_PARSER_START_TAG:
839 return;
840 case XML_PARSER_END_TAG:
841 return;
842 case XML_PARSER_EOF:
843 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
844 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
845 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
846 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000847 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000848 return;
849 case XML_PARSER_PROLOG:
850 case XML_PARSER_START:
851 case XML_PARSER_MISC:
852 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
854 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
855 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000856 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000857 return;
858 case XML_PARSER_ENTITY_DECL:
859 case XML_PARSER_CONTENT:
860 case XML_PARSER_ATTRIBUTE_VALUE:
861 case XML_PARSER_PI:
862 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000863 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000864 /* we just ignore it there */
865 return;
866 case XML_PARSER_EPILOG:
867 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
869 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
870 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000871 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000872 return;
873 case XML_PARSER_ENTITY_VALUE:
874 /*
875 * NOTE: in the case of entity values, we don't do the
876 * substitution here since we need the literal
877 * entity value to be able to save the internal
878 * subset of the document.
879 * This will be handled by xmlStringDecodeEntities
880 */
881 return;
882 case XML_PARSER_DTD:
883 /*
884 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
885 * In the internal DTD subset, parameter-entity references
886 * can occur only where markup declarations can occur, not
887 * within markup declarations.
888 * In that case this is handled in xmlParseMarkupDecl
889 */
890 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
891 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000892 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
893 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000894 break;
895 case XML_PARSER_IGNORE:
896 return;
897 }
898
899 NEXT;
900 name = xmlParseName(ctxt);
901 if (xmlParserDebugEntities)
902 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000903 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000904 if (name == NULL) {
905 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000907 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000908 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000909 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000910 } else {
911 if (RAW == ';') {
912 NEXT;
913 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
914 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
915 if (entity == NULL) {
916
917 /*
918 * [ WFC: Entity Declared ]
919 * In a document without any DTD, a document with only an
920 * internal DTD subset which contains no parameter entity
921 * references, or a document with "standalone='yes'", ...
922 * ... The declaration of a parameter entity must precede
923 * any reference to it...
924 */
925 if ((ctxt->standalone == 1) ||
926 ((ctxt->hasExternalSubset == 0) &&
927 (ctxt->hasPErefs == 0))) {
928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
929 ctxt->sax->error(ctxt->userData,
930 "PEReference: %%%s; not found\n", name);
931 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000932 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000933 } else {
934 /*
935 * [ VC: Entity Declared ]
936 * In a document with an external subset or external
937 * parameter entities with "standalone='no'", ...
938 * ... The declaration of a parameter entity must precede
939 * any reference to it...
940 */
941 if ((!ctxt->disableSAX) &&
942 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
943 ctxt->vctxt.error(ctxt->vctxt.userData,
944 "PEReference: %%%s; not found\n", name);
945 } else if ((!ctxt->disableSAX) &&
946 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
947 ctxt->sax->warning(ctxt->userData,
948 "PEReference: %%%s; not found\n", name);
949 ctxt->valid = 0;
950 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000951 } else if (ctxt->input->free != deallocblankswrapper) {
952 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
953 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000954 } else {
955 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
956 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000957 xmlChar start[4];
958 xmlCharEncoding enc;
959
Owen Taylor3473f882001-02-23 17:55:21 +0000960 /*
961 * handle the extra spaces added before and after
962 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000963 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000964 */
965 input = xmlNewEntityInputStream(ctxt, entity);
966 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000967
968 /*
969 * Get the 4 first bytes and decode the charset
970 * if enc != XML_CHAR_ENCODING_NONE
971 * plug some encoding conversion routines.
972 */
973 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000974 if (entity->length >= 4) {
975 start[0] = RAW;
976 start[1] = NXT(1);
977 start[2] = NXT(2);
978 start[3] = NXT(3);
979 enc = xmlDetectCharEncoding(start, 4);
980 if (enc != XML_CHAR_ENCODING_NONE) {
981 xmlSwitchEncoding(ctxt, enc);
982 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000983 }
984
Owen Taylor3473f882001-02-23 17:55:21 +0000985 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
986 (RAW == '<') && (NXT(1) == '?') &&
987 (NXT(2) == 'x') && (NXT(3) == 'm') &&
988 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
989 xmlParseTextDecl(ctxt);
990 }
Owen Taylor3473f882001-02-23 17:55:21 +0000991 } else {
992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
993 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000994 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000995 name);
996 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000997 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000998 }
999 }
1000 } else {
1001 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1003 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001004 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001005 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001006 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001007 }
1008 xmlFree(name);
1009 }
1010}
1011
1012/*
1013 * Macro used to grow the current buffer.
1014 */
1015#define growBuffer(buffer) { \
1016 buffer##_size *= 2; \
1017 buffer = (xmlChar *) \
1018 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1019 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001020 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001021 return(NULL); \
1022 } \
1023}
1024
1025/**
1026 * xmlStringDecodeEntities:
1027 * @ctxt: the parser context
1028 * @str: the input string
1029 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1030 * @end: an end marker xmlChar, 0 if none
1031 * @end2: an end marker xmlChar, 0 if none
1032 * @end3: an end marker xmlChar, 0 if none
1033 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001034 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001035 *
1036 * [67] Reference ::= EntityRef | CharRef
1037 *
1038 * [69] PEReference ::= '%' Name ';'
1039 *
1040 * Returns A newly allocated string with the substitution done. The caller
1041 * must deallocate it !
1042 */
1043xmlChar *
1044xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1045 xmlChar end, xmlChar end2, xmlChar end3) {
1046 xmlChar *buffer = NULL;
1047 int buffer_size = 0;
1048
1049 xmlChar *current = NULL;
1050 xmlEntityPtr ent;
1051 int c,l;
1052 int nbchars = 0;
1053
1054 if (str == NULL)
1055 return(NULL);
1056
1057 if (ctxt->depth > 40) {
1058 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1060 ctxt->sax->error(ctxt->userData,
1061 "Detected entity reference loop\n");
1062 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001063 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001064 return(NULL);
1065 }
1066
1067 /*
1068 * allocate a translation buffer.
1069 */
1070 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1071 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1072 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001073 xmlGenericError(xmlGenericErrorContext,
1074 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001075 return(NULL);
1076 }
1077
1078 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001079 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001080 * we are operating on already parsed values.
1081 */
1082 c = CUR_SCHAR(str, l);
1083 while ((c != 0) && (c != end) && /* non input consuming loop */
1084 (c != end2) && (c != end3)) {
1085
1086 if (c == 0) break;
1087 if ((c == '&') && (str[1] == '#')) {
1088 int val = xmlParseStringCharRef(ctxt, &str);
1089 if (val != 0) {
1090 COPY_BUF(0,buffer,nbchars,val);
1091 }
1092 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1093 if (xmlParserDebugEntities)
1094 xmlGenericError(xmlGenericErrorContext,
1095 "String decoding Entity Reference: %.30s\n",
1096 str);
1097 ent = xmlParseStringEntityRef(ctxt, &str);
1098 if ((ent != NULL) &&
1099 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1100 if (ent->content != NULL) {
1101 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1102 } else {
1103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1104 ctxt->sax->error(ctxt->userData,
1105 "internal error entity has no content\n");
1106 }
1107 } else if ((ent != NULL) && (ent->content != NULL)) {
1108 xmlChar *rep;
1109
1110 ctxt->depth++;
1111 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1112 0, 0, 0);
1113 ctxt->depth--;
1114 if (rep != NULL) {
1115 current = rep;
1116 while (*current != 0) { /* non input consuming loop */
1117 buffer[nbchars++] = *current++;
1118 if (nbchars >
1119 buffer_size - XML_PARSER_BUFFER_SIZE) {
1120 growBuffer(buffer);
1121 }
1122 }
1123 xmlFree(rep);
1124 }
1125 } else if (ent != NULL) {
1126 int i = xmlStrlen(ent->name);
1127 const xmlChar *cur = ent->name;
1128
1129 buffer[nbchars++] = '&';
1130 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1131 growBuffer(buffer);
1132 }
1133 for (;i > 0;i--)
1134 buffer[nbchars++] = *cur++;
1135 buffer[nbchars++] = ';';
1136 }
1137 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1138 if (xmlParserDebugEntities)
1139 xmlGenericError(xmlGenericErrorContext,
1140 "String decoding PE Reference: %.30s\n", str);
1141 ent = xmlParseStringPEReference(ctxt, &str);
1142 if (ent != NULL) {
1143 xmlChar *rep;
1144
1145 ctxt->depth++;
1146 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1147 0, 0, 0);
1148 ctxt->depth--;
1149 if (rep != NULL) {
1150 current = rep;
1151 while (*current != 0) { /* non input consuming loop */
1152 buffer[nbchars++] = *current++;
1153 if (nbchars >
1154 buffer_size - XML_PARSER_BUFFER_SIZE) {
1155 growBuffer(buffer);
1156 }
1157 }
1158 xmlFree(rep);
1159 }
1160 }
1161 } else {
1162 COPY_BUF(l,buffer,nbchars,c);
1163 str += l;
1164 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1165 growBuffer(buffer);
1166 }
1167 }
1168 c = CUR_SCHAR(str, l);
1169 }
1170 buffer[nbchars++] = 0;
1171 return(buffer);
1172}
1173
1174
1175/************************************************************************
1176 * *
1177 * Commodity functions to handle xmlChars *
1178 * *
1179 ************************************************************************/
1180
1181/**
1182 * xmlStrndup:
1183 * @cur: the input xmlChar *
1184 * @len: the len of @cur
1185 *
1186 * a strndup for array of xmlChar's
1187 *
1188 * Returns a new xmlChar * or NULL
1189 */
1190xmlChar *
1191xmlStrndup(const xmlChar *cur, int len) {
1192 xmlChar *ret;
1193
1194 if ((cur == NULL) || (len < 0)) return(NULL);
1195 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1196 if (ret == NULL) {
1197 xmlGenericError(xmlGenericErrorContext,
1198 "malloc of %ld byte failed\n",
1199 (len + 1) * (long)sizeof(xmlChar));
1200 return(NULL);
1201 }
1202 memcpy(ret, cur, len * sizeof(xmlChar));
1203 ret[len] = 0;
1204 return(ret);
1205}
1206
1207/**
1208 * xmlStrdup:
1209 * @cur: the input xmlChar *
1210 *
1211 * a strdup for array of xmlChar's. Since they are supposed to be
1212 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1213 * a termination mark of '0'.
1214 *
1215 * Returns a new xmlChar * or NULL
1216 */
1217xmlChar *
1218xmlStrdup(const xmlChar *cur) {
1219 const xmlChar *p = cur;
1220
1221 if (cur == NULL) return(NULL);
1222 while (*p != 0) p++; /* non input consuming */
1223 return(xmlStrndup(cur, p - cur));
1224}
1225
1226/**
1227 * xmlCharStrndup:
1228 * @cur: the input char *
1229 * @len: the len of @cur
1230 *
1231 * a strndup for char's to xmlChar's
1232 *
1233 * Returns a new xmlChar * or NULL
1234 */
1235
1236xmlChar *
1237xmlCharStrndup(const char *cur, int len) {
1238 int i;
1239 xmlChar *ret;
1240
1241 if ((cur == NULL) || (len < 0)) return(NULL);
1242 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1243 if (ret == NULL) {
1244 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1245 (len + 1) * (long)sizeof(xmlChar));
1246 return(NULL);
1247 }
1248 for (i = 0;i < len;i++)
1249 ret[i] = (xmlChar) cur[i];
1250 ret[len] = 0;
1251 return(ret);
1252}
1253
1254/**
1255 * xmlCharStrdup:
1256 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001257 *
1258 * a strdup for char's to xmlChar's
1259 *
1260 * Returns a new xmlChar * or NULL
1261 */
1262
1263xmlChar *
1264xmlCharStrdup(const char *cur) {
1265 const char *p = cur;
1266
1267 if (cur == NULL) return(NULL);
1268 while (*p != '\0') p++; /* non input consuming */
1269 return(xmlCharStrndup(cur, p - cur));
1270}
1271
1272/**
1273 * xmlStrcmp:
1274 * @str1: the first xmlChar *
1275 * @str2: the second xmlChar *
1276 *
1277 * a strcmp for xmlChar's
1278 *
1279 * Returns the integer result of the comparison
1280 */
1281
1282int
1283xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1284 register int tmp;
1285
1286 if (str1 == str2) return(0);
1287 if (str1 == NULL) return(-1);
1288 if (str2 == NULL) return(1);
1289 do {
1290 tmp = *str1++ - *str2;
1291 if (tmp != 0) return(tmp);
1292 } while (*str2++ != 0);
1293 return 0;
1294}
1295
1296/**
1297 * xmlStrEqual:
1298 * @str1: the first xmlChar *
1299 * @str2: the second xmlChar *
1300 *
1301 * Check if both string are equal of have same content
1302 * Should be a bit more readable and faster than xmlStrEqual()
1303 *
1304 * Returns 1 if they are equal, 0 if they are different
1305 */
1306
1307int
1308xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1309 if (str1 == str2) return(1);
1310 if (str1 == NULL) return(0);
1311 if (str2 == NULL) return(0);
1312 do {
1313 if (*str1++ != *str2) return(0);
1314 } while (*str2++);
1315 return(1);
1316}
1317
1318/**
1319 * xmlStrncmp:
1320 * @str1: the first xmlChar *
1321 * @str2: the second xmlChar *
1322 * @len: the max comparison length
1323 *
1324 * a strncmp for xmlChar's
1325 *
1326 * Returns the integer result of the comparison
1327 */
1328
1329int
1330xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1331 register int tmp;
1332
1333 if (len <= 0) return(0);
1334 if (str1 == str2) return(0);
1335 if (str1 == NULL) return(-1);
1336 if (str2 == NULL) return(1);
1337 do {
1338 tmp = *str1++ - *str2;
1339 if (tmp != 0 || --len == 0) return(tmp);
1340 } while (*str2++ != 0);
1341 return 0;
1342}
1343
Daniel Veillardb44025c2001-10-11 22:55:55 +00001344static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001345 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1346 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1347 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1348 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1349 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1350 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1351 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1352 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1353 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1354 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1355 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1356 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1357 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1358 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1359 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1360 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1361 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1362 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1363 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1364 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1365 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1366 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1367 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1368 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1369 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1370 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1371 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1372 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1373 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1374 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1375 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1376 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1377};
1378
1379/**
1380 * xmlStrcasecmp:
1381 * @str1: the first xmlChar *
1382 * @str2: the second xmlChar *
1383 *
1384 * a strcasecmp for xmlChar's
1385 *
1386 * Returns the integer result of the comparison
1387 */
1388
1389int
1390xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1391 register int tmp;
1392
1393 if (str1 == str2) return(0);
1394 if (str1 == NULL) return(-1);
1395 if (str2 == NULL) return(1);
1396 do {
1397 tmp = casemap[*str1++] - casemap[*str2];
1398 if (tmp != 0) return(tmp);
1399 } while (*str2++ != 0);
1400 return 0;
1401}
1402
1403/**
1404 * xmlStrncasecmp:
1405 * @str1: the first xmlChar *
1406 * @str2: the second xmlChar *
1407 * @len: the max comparison length
1408 *
1409 * a strncasecmp for xmlChar's
1410 *
1411 * Returns the integer result of the comparison
1412 */
1413
1414int
1415xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1416 register int tmp;
1417
1418 if (len <= 0) return(0);
1419 if (str1 == str2) return(0);
1420 if (str1 == NULL) return(-1);
1421 if (str2 == NULL) return(1);
1422 do {
1423 tmp = casemap[*str1++] - casemap[*str2];
1424 if (tmp != 0 || --len == 0) return(tmp);
1425 } while (*str2++ != 0);
1426 return 0;
1427}
1428
1429/**
1430 * xmlStrchr:
1431 * @str: the xmlChar * array
1432 * @val: the xmlChar to search
1433 *
1434 * a strchr for xmlChar's
1435 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001436 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001437 */
1438
1439const xmlChar *
1440xmlStrchr(const xmlChar *str, xmlChar val) {
1441 if (str == NULL) return(NULL);
1442 while (*str != 0) { /* non input consuming */
1443 if (*str == val) return((xmlChar *) str);
1444 str++;
1445 }
1446 return(NULL);
1447}
1448
1449/**
1450 * xmlStrstr:
1451 * @str: the xmlChar * array (haystack)
1452 * @val: the xmlChar to search (needle)
1453 *
1454 * a strstr for xmlChar's
1455 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001456 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001457 */
1458
1459const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001460xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001461 int n;
1462
1463 if (str == NULL) return(NULL);
1464 if (val == NULL) return(NULL);
1465 n = xmlStrlen(val);
1466
1467 if (n == 0) return(str);
1468 while (*str != 0) { /* non input consuming */
1469 if (*str == *val) {
1470 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1471 }
1472 str++;
1473 }
1474 return(NULL);
1475}
1476
1477/**
1478 * xmlStrcasestr:
1479 * @str: the xmlChar * array (haystack)
1480 * @val: the xmlChar to search (needle)
1481 *
1482 * a case-ignoring strstr for xmlChar's
1483 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001484 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001485 */
1486
1487const xmlChar *
1488xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1489 int n;
1490
1491 if (str == NULL) return(NULL);
1492 if (val == NULL) return(NULL);
1493 n = xmlStrlen(val);
1494
1495 if (n == 0) return(str);
1496 while (*str != 0) { /* non input consuming */
1497 if (casemap[*str] == casemap[*val])
1498 if (!xmlStrncasecmp(str, val, n)) return(str);
1499 str++;
1500 }
1501 return(NULL);
1502}
1503
1504/**
1505 * xmlStrsub:
1506 * @str: the xmlChar * array (haystack)
1507 * @start: the index of the first char (zero based)
1508 * @len: the length of the substring
1509 *
1510 * Extract a substring of a given string
1511 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001512 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001513 */
1514
1515xmlChar *
1516xmlStrsub(const xmlChar *str, int start, int len) {
1517 int i;
1518
1519 if (str == NULL) return(NULL);
1520 if (start < 0) return(NULL);
1521 if (len < 0) return(NULL);
1522
1523 for (i = 0;i < start;i++) {
1524 if (*str == 0) return(NULL);
1525 str++;
1526 }
1527 if (*str == 0) return(NULL);
1528 return(xmlStrndup(str, len));
1529}
1530
1531/**
1532 * xmlStrlen:
1533 * @str: the xmlChar * array
1534 *
1535 * length of a xmlChar's string
1536 *
1537 * Returns the number of xmlChar contained in the ARRAY.
1538 */
1539
1540int
1541xmlStrlen(const xmlChar *str) {
1542 int len = 0;
1543
1544 if (str == NULL) return(0);
1545 while (*str != 0) { /* non input consuming */
1546 str++;
1547 len++;
1548 }
1549 return(len);
1550}
1551
1552/**
1553 * xmlStrncat:
1554 * @cur: the original xmlChar * array
1555 * @add: the xmlChar * array added
1556 * @len: the length of @add
1557 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001558 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001559 * first bytes of @add.
1560 *
1561 * Returns a new xmlChar *, the original @cur is reallocated if needed
1562 * and should not be freed
1563 */
1564
1565xmlChar *
1566xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1567 int size;
1568 xmlChar *ret;
1569
1570 if ((add == NULL) || (len == 0))
1571 return(cur);
1572 if (cur == NULL)
1573 return(xmlStrndup(add, len));
1574
1575 size = xmlStrlen(cur);
1576 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1577 if (ret == NULL) {
1578 xmlGenericError(xmlGenericErrorContext,
1579 "xmlStrncat: realloc of %ld byte failed\n",
1580 (size + len + 1) * (long)sizeof(xmlChar));
1581 return(cur);
1582 }
1583 memcpy(&ret[size], add, len * sizeof(xmlChar));
1584 ret[size + len] = 0;
1585 return(ret);
1586}
1587
1588/**
1589 * xmlStrcat:
1590 * @cur: the original xmlChar * array
1591 * @add: the xmlChar * array added
1592 *
1593 * a strcat for array of xmlChar's. Since they are supposed to be
1594 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1595 * a termination mark of '0'.
1596 *
1597 * Returns a new xmlChar * containing the concatenated string.
1598 */
1599xmlChar *
1600xmlStrcat(xmlChar *cur, const xmlChar *add) {
1601 const xmlChar *p = add;
1602
1603 if (add == NULL) return(cur);
1604 if (cur == NULL)
1605 return(xmlStrdup(add));
1606
1607 while (*p != 0) p++; /* non input consuming */
1608 return(xmlStrncat(cur, add, p - add));
1609}
1610
1611/************************************************************************
1612 * *
1613 * Commodity functions, cleanup needed ? *
1614 * *
1615 ************************************************************************/
1616
1617/**
1618 * areBlanks:
1619 * @ctxt: an XML parser context
1620 * @str: a xmlChar *
1621 * @len: the size of @str
1622 *
1623 * Is this a sequence of blank chars that one can ignore ?
1624 *
1625 * Returns 1 if ignorable 0 otherwise.
1626 */
1627
1628static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1629 int i, ret;
1630 xmlNodePtr lastChild;
1631
Daniel Veillard05c13a22001-09-09 08:38:09 +00001632 /*
1633 * Don't spend time trying to differentiate them, the same callback is
1634 * used !
1635 */
1636 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001637 return(0);
1638
Owen Taylor3473f882001-02-23 17:55:21 +00001639 /*
1640 * Check for xml:space value.
1641 */
1642 if (*(ctxt->space) == 1)
1643 return(0);
1644
1645 /*
1646 * Check that the string is made of blanks
1647 */
1648 for (i = 0;i < len;i++)
1649 if (!(IS_BLANK(str[i]))) return(0);
1650
1651 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001652 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001653 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001654 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001655 if (ctxt->myDoc != NULL) {
1656 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1657 if (ret == 0) return(1);
1658 if (ret == 1) return(0);
1659 }
1660
1661 /*
1662 * Otherwise, heuristic :-\
1663 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001664 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001665 if ((ctxt->node->children == NULL) &&
1666 (RAW == '<') && (NXT(1) == '/')) return(0);
1667
1668 lastChild = xmlGetLastChild(ctxt->node);
1669 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001670 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1671 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001672 } else if (xmlNodeIsText(lastChild))
1673 return(0);
1674 else if ((ctxt->node->children != NULL) &&
1675 (xmlNodeIsText(ctxt->node->children)))
1676 return(0);
1677 return(1);
1678}
1679
Owen Taylor3473f882001-02-23 17:55:21 +00001680/************************************************************************
1681 * *
1682 * Extra stuff for namespace support *
1683 * Relates to http://www.w3.org/TR/WD-xml-names *
1684 * *
1685 ************************************************************************/
1686
1687/**
1688 * xmlSplitQName:
1689 * @ctxt: an XML parser context
1690 * @name: an XML parser context
1691 * @prefix: a xmlChar **
1692 *
1693 * parse an UTF8 encoded XML qualified name string
1694 *
1695 * [NS 5] QName ::= (Prefix ':')? LocalPart
1696 *
1697 * [NS 6] Prefix ::= NCName
1698 *
1699 * [NS 7] LocalPart ::= NCName
1700 *
1701 * Returns the local part, and prefix is updated
1702 * to get the Prefix if any.
1703 */
1704
1705xmlChar *
1706xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1707 xmlChar buf[XML_MAX_NAMELEN + 5];
1708 xmlChar *buffer = NULL;
1709 int len = 0;
1710 int max = XML_MAX_NAMELEN;
1711 xmlChar *ret = NULL;
1712 const xmlChar *cur = name;
1713 int c;
1714
1715 *prefix = NULL;
1716
1717#ifndef XML_XML_NAMESPACE
1718 /* xml: prefix is not really a namespace */
1719 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1720 (cur[2] == 'l') && (cur[3] == ':'))
1721 return(xmlStrdup(name));
1722#endif
1723
1724 /* nasty but valid */
1725 if (cur[0] == ':')
1726 return(xmlStrdup(name));
1727
1728 c = *cur++;
1729 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1730 buf[len++] = c;
1731 c = *cur++;
1732 }
1733 if (len >= max) {
1734 /*
1735 * Okay someone managed to make a huge name, so he's ready to pay
1736 * for the processing speed.
1737 */
1738 max = len * 2;
1739
1740 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1741 if (buffer == NULL) {
1742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1743 ctxt->sax->error(ctxt->userData,
1744 "xmlSplitQName: out of memory\n");
1745 return(NULL);
1746 }
1747 memcpy(buffer, buf, len);
1748 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1749 if (len + 10 > max) {
1750 max *= 2;
1751 buffer = (xmlChar *) xmlRealloc(buffer,
1752 max * sizeof(xmlChar));
1753 if (buffer == NULL) {
1754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1755 ctxt->sax->error(ctxt->userData,
1756 "xmlSplitQName: out of memory\n");
1757 return(NULL);
1758 }
1759 }
1760 buffer[len++] = c;
1761 c = *cur++;
1762 }
1763 buffer[len] = 0;
1764 }
1765
1766 if (buffer == NULL)
1767 ret = xmlStrndup(buf, len);
1768 else {
1769 ret = buffer;
1770 buffer = NULL;
1771 max = XML_MAX_NAMELEN;
1772 }
1773
1774
1775 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001776 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001777 if (c == 0) return(ret);
1778 *prefix = ret;
1779 len = 0;
1780
Daniel Veillardbb284f42002-10-16 18:02:47 +00001781 /*
1782 * Check that the first character is proper to start
1783 * a new name
1784 */
1785 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1786 ((c >= 0x41) && (c <= 0x5A)) ||
1787 (c == '_') || (c == ':'))) {
1788 int l;
1789 int first = CUR_SCHAR(cur, l);
1790
1791 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001792 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1793 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001794 ctxt->sax->error(ctxt->userData,
1795 "Name %s is not XML Namespace compliant\n",
1796 name);
1797 }
1798 }
1799 cur++;
1800
Owen Taylor3473f882001-02-23 17:55:21 +00001801 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1802 buf[len++] = c;
1803 c = *cur++;
1804 }
1805 if (len >= max) {
1806 /*
1807 * Okay someone managed to make a huge name, so he's ready to pay
1808 * for the processing speed.
1809 */
1810 max = len * 2;
1811
1812 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1813 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001814 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1815 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001816 ctxt->sax->error(ctxt->userData,
1817 "xmlSplitQName: out of memory\n");
1818 return(NULL);
1819 }
1820 memcpy(buffer, buf, len);
1821 while (c != 0) { /* tested bigname2.xml */
1822 if (len + 10 > max) {
1823 max *= 2;
1824 buffer = (xmlChar *) xmlRealloc(buffer,
1825 max * sizeof(xmlChar));
1826 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001827 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1828 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001829 ctxt->sax->error(ctxt->userData,
1830 "xmlSplitQName: out of memory\n");
1831 return(NULL);
1832 }
1833 }
1834 buffer[len++] = c;
1835 c = *cur++;
1836 }
1837 buffer[len] = 0;
1838 }
1839
1840 if (buffer == NULL)
1841 ret = xmlStrndup(buf, len);
1842 else {
1843 ret = buffer;
1844 }
1845 }
1846
1847 return(ret);
1848}
1849
1850/************************************************************************
1851 * *
1852 * The parser itself *
1853 * Relates to http://www.w3.org/TR/REC-xml *
1854 * *
1855 ************************************************************************/
1856
Daniel Veillard76d66f42001-05-16 21:05:17 +00001857static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001858/**
1859 * xmlParseName:
1860 * @ctxt: an XML parser context
1861 *
1862 * parse an XML name.
1863 *
1864 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1865 * CombiningChar | Extender
1866 *
1867 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1868 *
1869 * [6] Names ::= Name (S Name)*
1870 *
1871 * Returns the Name parsed or NULL
1872 */
1873
1874xmlChar *
1875xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001876 const xmlChar *in;
1877 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001878 int count = 0;
1879
1880 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001881
1882 /*
1883 * Accelerator for simple ASCII names
1884 */
1885 in = ctxt->input->cur;
1886 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1887 ((*in >= 0x41) && (*in <= 0x5A)) ||
1888 (*in == '_') || (*in == ':')) {
1889 in++;
1890 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1891 ((*in >= 0x41) && (*in <= 0x5A)) ||
1892 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001893 (*in == '_') || (*in == '-') ||
1894 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001895 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001896 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001897 count = in - ctxt->input->cur;
1898 ret = xmlStrndup(ctxt->input->cur, count);
1899 ctxt->input->cur = in;
1900 return(ret);
1901 }
1902 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001903 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001904}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001905
Daniel Veillard46de64e2002-05-29 08:21:33 +00001906/**
1907 * xmlParseNameAndCompare:
1908 * @ctxt: an XML parser context
1909 *
1910 * parse an XML name and compares for match
1911 * (specialized for endtag parsing)
1912 *
1913 *
1914 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1915 * and the name for mismatch
1916 */
1917
Daniel Veillardf4862f02002-09-10 11:13:43 +00001918static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001919xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1920 const xmlChar *cmp = other;
1921 const xmlChar *in;
1922 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001923
1924 GROW;
1925
1926 in = ctxt->input->cur;
1927 while (*in != 0 && *in == *cmp) {
1928 ++in;
1929 ++cmp;
1930 }
1931 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1932 /* success */
1933 ctxt->input->cur = in;
1934 return (xmlChar*) 1;
1935 }
1936 /* failure (or end of input buffer), check with full function */
1937 ret = xmlParseName (ctxt);
1938 if (ret != 0 && xmlStrEqual (ret, other)) {
1939 xmlFree (ret);
1940 return (xmlChar*) 1;
1941 }
1942 return ret;
1943}
1944
Daniel Veillard76d66f42001-05-16 21:05:17 +00001945static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001946xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1947 xmlChar buf[XML_MAX_NAMELEN + 5];
1948 int len = 0, l;
1949 int c;
1950 int count = 0;
1951
1952 /*
1953 * Handler for more complex cases
1954 */
1955 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001956 c = CUR_CHAR(l);
1957 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1958 (!IS_LETTER(c) && (c != '_') &&
1959 (c != ':'))) {
1960 return(NULL);
1961 }
1962
1963 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1964 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1965 (c == '.') || (c == '-') ||
1966 (c == '_') || (c == ':') ||
1967 (IS_COMBINING(c)) ||
1968 (IS_EXTENDER(c)))) {
1969 if (count++ > 100) {
1970 count = 0;
1971 GROW;
1972 }
1973 COPY_BUF(l,buf,len,c);
1974 NEXTL(l);
1975 c = CUR_CHAR(l);
1976 if (len >= XML_MAX_NAMELEN) {
1977 /*
1978 * Okay someone managed to make a huge name, so he's ready to pay
1979 * for the processing speed.
1980 */
1981 xmlChar *buffer;
1982 int max = len * 2;
1983
1984 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1985 if (buffer == NULL) {
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001988 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001989 return(NULL);
1990 }
1991 memcpy(buffer, buf, len);
1992 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1993 (c == '.') || (c == '-') ||
1994 (c == '_') || (c == ':') ||
1995 (IS_COMBINING(c)) ||
1996 (IS_EXTENDER(c))) {
1997 if (count++ > 100) {
1998 count = 0;
1999 GROW;
2000 }
2001 if (len + 10 > max) {
2002 max *= 2;
2003 buffer = (xmlChar *) xmlRealloc(buffer,
2004 max * sizeof(xmlChar));
2005 if (buffer == NULL) {
2006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2007 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00002008 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002009 return(NULL);
2010 }
2011 }
2012 COPY_BUF(l,buffer,len,c);
2013 NEXTL(l);
2014 c = CUR_CHAR(l);
2015 }
2016 buffer[len] = 0;
2017 return(buffer);
2018 }
2019 }
2020 return(xmlStrndup(buf, len));
2021}
2022
2023/**
2024 * xmlParseStringName:
2025 * @ctxt: an XML parser context
2026 * @str: a pointer to the string pointer (IN/OUT)
2027 *
2028 * parse an XML name.
2029 *
2030 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2031 * CombiningChar | Extender
2032 *
2033 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2034 *
2035 * [6] Names ::= Name (S Name)*
2036 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002037 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002038 * is updated to the current location in the string.
2039 */
2040
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002041static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002042xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2043 xmlChar buf[XML_MAX_NAMELEN + 5];
2044 const xmlChar *cur = *str;
2045 int len = 0, l;
2046 int c;
2047
2048 c = CUR_SCHAR(cur, l);
2049 if (!IS_LETTER(c) && (c != '_') &&
2050 (c != ':')) {
2051 return(NULL);
2052 }
2053
2054 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2055 (c == '.') || (c == '-') ||
2056 (c == '_') || (c == ':') ||
2057 (IS_COMBINING(c)) ||
2058 (IS_EXTENDER(c))) {
2059 COPY_BUF(l,buf,len,c);
2060 cur += l;
2061 c = CUR_SCHAR(cur, l);
2062 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2063 /*
2064 * Okay someone managed to make a huge name, so he's ready to pay
2065 * for the processing speed.
2066 */
2067 xmlChar *buffer;
2068 int max = len * 2;
2069
2070 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2071 if (buffer == NULL) {
2072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2073 ctxt->sax->error(ctxt->userData,
2074 "xmlParseStringName: out of memory\n");
2075 return(NULL);
2076 }
2077 memcpy(buffer, buf, len);
2078 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2079 (c == '.') || (c == '-') ||
2080 (c == '_') || (c == ':') ||
2081 (IS_COMBINING(c)) ||
2082 (IS_EXTENDER(c))) {
2083 if (len + 10 > max) {
2084 max *= 2;
2085 buffer = (xmlChar *) xmlRealloc(buffer,
2086 max * sizeof(xmlChar));
2087 if (buffer == NULL) {
2088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2089 ctxt->sax->error(ctxt->userData,
2090 "xmlParseStringName: out of memory\n");
2091 return(NULL);
2092 }
2093 }
2094 COPY_BUF(l,buffer,len,c);
2095 cur += l;
2096 c = CUR_SCHAR(cur, l);
2097 }
2098 buffer[len] = 0;
2099 *str = cur;
2100 return(buffer);
2101 }
2102 }
2103 *str = cur;
2104 return(xmlStrndup(buf, len));
2105}
2106
2107/**
2108 * xmlParseNmtoken:
2109 * @ctxt: an XML parser context
2110 *
2111 * parse an XML Nmtoken.
2112 *
2113 * [7] Nmtoken ::= (NameChar)+
2114 *
2115 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2116 *
2117 * Returns the Nmtoken parsed or NULL
2118 */
2119
2120xmlChar *
2121xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2122 xmlChar buf[XML_MAX_NAMELEN + 5];
2123 int len = 0, l;
2124 int c;
2125 int count = 0;
2126
2127 GROW;
2128 c = CUR_CHAR(l);
2129
2130 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2131 (c == '.') || (c == '-') ||
2132 (c == '_') || (c == ':') ||
2133 (IS_COMBINING(c)) ||
2134 (IS_EXTENDER(c))) {
2135 if (count++ > 100) {
2136 count = 0;
2137 GROW;
2138 }
2139 COPY_BUF(l,buf,len,c);
2140 NEXTL(l);
2141 c = CUR_CHAR(l);
2142 if (len >= XML_MAX_NAMELEN) {
2143 /*
2144 * Okay someone managed to make a huge token, so he's ready to pay
2145 * for the processing speed.
2146 */
2147 xmlChar *buffer;
2148 int max = len * 2;
2149
2150 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2151 if (buffer == NULL) {
2152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2153 ctxt->sax->error(ctxt->userData,
2154 "xmlParseNmtoken: out of memory\n");
2155 return(NULL);
2156 }
2157 memcpy(buffer, buf, len);
2158 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2159 (c == '.') || (c == '-') ||
2160 (c == '_') || (c == ':') ||
2161 (IS_COMBINING(c)) ||
2162 (IS_EXTENDER(c))) {
2163 if (count++ > 100) {
2164 count = 0;
2165 GROW;
2166 }
2167 if (len + 10 > max) {
2168 max *= 2;
2169 buffer = (xmlChar *) xmlRealloc(buffer,
2170 max * sizeof(xmlChar));
2171 if (buffer == NULL) {
2172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2173 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002174 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002175 return(NULL);
2176 }
2177 }
2178 COPY_BUF(l,buffer,len,c);
2179 NEXTL(l);
2180 c = CUR_CHAR(l);
2181 }
2182 buffer[len] = 0;
2183 return(buffer);
2184 }
2185 }
2186 if (len == 0)
2187 return(NULL);
2188 return(xmlStrndup(buf, len));
2189}
2190
2191/**
2192 * xmlParseEntityValue:
2193 * @ctxt: an XML parser context
2194 * @orig: if non-NULL store a copy of the original entity value
2195 *
2196 * parse a value for ENTITY declarations
2197 *
2198 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2199 * "'" ([^%&'] | PEReference | Reference)* "'"
2200 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002201 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002202 */
2203
2204xmlChar *
2205xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2206 xmlChar *buf = NULL;
2207 int len = 0;
2208 int size = XML_PARSER_BUFFER_SIZE;
2209 int c, l;
2210 xmlChar stop;
2211 xmlChar *ret = NULL;
2212 const xmlChar *cur = NULL;
2213 xmlParserInputPtr input;
2214
2215 if (RAW == '"') stop = '"';
2216 else if (RAW == '\'') stop = '\'';
2217 else {
2218 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2220 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002223 return(NULL);
2224 }
2225 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2226 if (buf == NULL) {
2227 xmlGenericError(xmlGenericErrorContext,
2228 "malloc of %d byte failed\n", size);
2229 return(NULL);
2230 }
2231
2232 /*
2233 * The content of the entity definition is copied in a buffer.
2234 */
2235
2236 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2237 input = ctxt->input;
2238 GROW;
2239 NEXT;
2240 c = CUR_CHAR(l);
2241 /*
2242 * NOTE: 4.4.5 Included in Literal
2243 * When a parameter entity reference appears in a literal entity
2244 * value, ... a single or double quote character in the replacement
2245 * text is always treated as a normal data character and will not
2246 * terminate the literal.
2247 * In practice it means we stop the loop only when back at parsing
2248 * the initial entity and the quote is found
2249 */
2250 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2251 (ctxt->input != input))) {
2252 if (len + 5 >= size) {
2253 size *= 2;
2254 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2255 if (buf == NULL) {
2256 xmlGenericError(xmlGenericErrorContext,
2257 "realloc of %d byte failed\n", size);
2258 return(NULL);
2259 }
2260 }
2261 COPY_BUF(l,buf,len,c);
2262 NEXTL(l);
2263 /*
2264 * Pop-up of finished entities.
2265 */
2266 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2267 xmlPopInput(ctxt);
2268
2269 GROW;
2270 c = CUR_CHAR(l);
2271 if (c == 0) {
2272 GROW;
2273 c = CUR_CHAR(l);
2274 }
2275 }
2276 buf[len] = 0;
2277
2278 /*
2279 * Raise problem w.r.t. '&' and '%' being used in non-entities
2280 * reference constructs. Note Charref will be handled in
2281 * xmlStringDecodeEntities()
2282 */
2283 cur = buf;
2284 while (*cur != 0) { /* non input consuming */
2285 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2286 xmlChar *name;
2287 xmlChar tmp = *cur;
2288
2289 cur++;
2290 name = xmlParseStringName(ctxt, &cur);
2291 if ((name == NULL) || (*cur != ';')) {
2292 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2294 ctxt->sax->error(ctxt->userData,
2295 "EntityValue: '%c' forbidden except for entities references\n",
2296 tmp);
2297 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002298 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002299 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002300 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2301 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002302 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2304 ctxt->sax->error(ctxt->userData,
2305 "EntityValue: PEReferences forbidden in internal subset\n",
2306 tmp);
2307 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002308 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002309 }
2310 if (name != NULL)
2311 xmlFree(name);
2312 }
2313 cur++;
2314 }
2315
2316 /*
2317 * Then PEReference entities are substituted.
2318 */
2319 if (c != stop) {
2320 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2322 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2323 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002324 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002325 xmlFree(buf);
2326 } else {
2327 NEXT;
2328 /*
2329 * NOTE: 4.4.7 Bypassed
2330 * When a general entity reference appears in the EntityValue in
2331 * an entity declaration, it is bypassed and left as is.
2332 * so XML_SUBSTITUTE_REF is not set here.
2333 */
2334 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2335 0, 0, 0);
2336 if (orig != NULL)
2337 *orig = buf;
2338 else
2339 xmlFree(buf);
2340 }
2341
2342 return(ret);
2343}
2344
2345/**
2346 * xmlParseAttValue:
2347 * @ctxt: an XML parser context
2348 *
2349 * parse a value for an attribute
2350 * Note: the parser won't do substitution of entities here, this
2351 * will be handled later in xmlStringGetNodeList
2352 *
2353 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2354 * "'" ([^<&'] | Reference)* "'"
2355 *
2356 * 3.3.3 Attribute-Value Normalization:
2357 * Before the value of an attribute is passed to the application or
2358 * checked for validity, the XML processor must normalize it as follows:
2359 * - a character reference is processed by appending the referenced
2360 * character to the attribute value
2361 * - an entity reference is processed by recursively processing the
2362 * replacement text of the entity
2363 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2364 * appending #x20 to the normalized value, except that only a single
2365 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2366 * parsed entity or the literal entity value of an internal parsed entity
2367 * - other characters are processed by appending them to the normalized value
2368 * If the declared value is not CDATA, then the XML processor must further
2369 * process the normalized attribute value by discarding any leading and
2370 * trailing space (#x20) characters, and by replacing sequences of space
2371 * (#x20) characters by a single space (#x20) character.
2372 * All attributes for which no declaration has been read should be treated
2373 * by a non-validating parser as if declared CDATA.
2374 *
2375 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2376 */
2377
2378xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002379xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2380
2381xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002382xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2383 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002384 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002385 xmlChar *ret = NULL;
2386 SHRINK;
2387 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002388 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002389 if (*in != '"' && *in != '\'') {
2390 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2392 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2393 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002394 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002395 return(NULL);
2396 }
2397 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2398 limit = *in;
2399 ++in;
2400
2401 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2402 *in != '&' && *in != '<'
2403 ) {
2404 ++in;
2405 }
2406 if (*in != limit) {
2407 return xmlParseAttValueComplex(ctxt);
2408 }
2409 ++in;
2410 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2411 CUR_PTR = in;
2412 return ret;
2413}
2414
Daniel Veillard01c13b52002-12-10 15:19:08 +00002415/**
2416 * xmlParseAttValueComplex:
2417 * @ctxt: an XML parser context
2418 *
2419 * parse a value for an attribute, this is the fallback function
2420 * of xmlParseAttValue() when the attribute parsing requires handling
2421 * of non-ASCII characters.
2422 *
2423 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2424 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002425xmlChar *
2426xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2427 xmlChar limit = 0;
2428 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002429 int len = 0;
2430 int buf_size = 0;
2431 int c, l;
2432 xmlChar *current = NULL;
2433 xmlEntityPtr ent;
2434
2435
2436 SHRINK;
2437 if (NXT(0) == '"') {
2438 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2439 limit = '"';
2440 NEXT;
2441 } else if (NXT(0) == '\'') {
2442 limit = '\'';
2443 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2444 NEXT;
2445 } else {
2446 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2448 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2449 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002450 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002451 return(NULL);
2452 }
2453
2454 /*
2455 * allocate a translation buffer.
2456 */
2457 buf_size = XML_PARSER_BUFFER_SIZE;
2458 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2459 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002460 xmlGenericError(xmlGenericErrorContext,
2461 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002462 return(NULL);
2463 }
2464
2465 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002466 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002467 */
2468 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002469 while ((NXT(0) != limit) && /* checked */
2470 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002471 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002472 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002473 if (NXT(1) == '#') {
2474 int val = xmlParseCharRef(ctxt);
2475 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002476 if (ctxt->replaceEntities) {
2477 if (len > buf_size - 10) {
2478 growBuffer(buf);
2479 }
2480 buf[len++] = '&';
2481 } else {
2482 /*
2483 * The reparsing will be done in xmlStringGetNodeList()
2484 * called by the attribute() function in SAX.c
2485 */
2486 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002487
Daniel Veillard319a7422001-09-11 09:27:09 +00002488 if (len > buf_size - 10) {
2489 growBuffer(buf);
2490 }
2491 current = &buffer[0];
2492 while (*current != 0) { /* non input consuming */
2493 buf[len++] = *current++;
2494 }
Owen Taylor3473f882001-02-23 17:55:21 +00002495 }
2496 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002497 if (len > buf_size - 10) {
2498 growBuffer(buf);
2499 }
Owen Taylor3473f882001-02-23 17:55:21 +00002500 len += xmlCopyChar(0, &buf[len], val);
2501 }
2502 } else {
2503 ent = xmlParseEntityRef(ctxt);
2504 if ((ent != NULL) &&
2505 (ctxt->replaceEntities != 0)) {
2506 xmlChar *rep;
2507
2508 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2509 rep = xmlStringDecodeEntities(ctxt, ent->content,
2510 XML_SUBSTITUTE_REF, 0, 0, 0);
2511 if (rep != NULL) {
2512 current = rep;
2513 while (*current != 0) { /* non input consuming */
2514 buf[len++] = *current++;
2515 if (len > buf_size - 10) {
2516 growBuffer(buf);
2517 }
2518 }
2519 xmlFree(rep);
2520 }
2521 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002522 if (len > buf_size - 10) {
2523 growBuffer(buf);
2524 }
Owen Taylor3473f882001-02-23 17:55:21 +00002525 if (ent->content != NULL)
2526 buf[len++] = ent->content[0];
2527 }
2528 } else if (ent != NULL) {
2529 int i = xmlStrlen(ent->name);
2530 const xmlChar *cur = ent->name;
2531
2532 /*
2533 * This may look absurd but is needed to detect
2534 * entities problems
2535 */
2536 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2537 (ent->content != NULL)) {
2538 xmlChar *rep;
2539 rep = xmlStringDecodeEntities(ctxt, ent->content,
2540 XML_SUBSTITUTE_REF, 0, 0, 0);
2541 if (rep != NULL)
2542 xmlFree(rep);
2543 }
2544
2545 /*
2546 * Just output the reference
2547 */
2548 buf[len++] = '&';
2549 if (len > buf_size - i - 10) {
2550 growBuffer(buf);
2551 }
2552 for (;i > 0;i--)
2553 buf[len++] = *cur++;
2554 buf[len++] = ';';
2555 }
2556 }
2557 } else {
2558 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2559 COPY_BUF(l,buf,len,0x20);
2560 if (len > buf_size - 10) {
2561 growBuffer(buf);
2562 }
2563 } else {
2564 COPY_BUF(l,buf,len,c);
2565 if (len > buf_size - 10) {
2566 growBuffer(buf);
2567 }
2568 }
2569 NEXTL(l);
2570 }
2571 GROW;
2572 c = CUR_CHAR(l);
2573 }
2574 buf[len++] = 0;
2575 if (RAW == '<') {
2576 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2578 ctxt->sax->error(ctxt->userData,
2579 "Unescaped '<' not allowed in attributes values\n");
2580 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002581 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002582 } else if (RAW != limit) {
2583 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2585 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2586 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002587 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002588 } else
2589 NEXT;
2590 return(buf);
2591}
2592
2593/**
2594 * xmlParseSystemLiteral:
2595 * @ctxt: an XML parser context
2596 *
2597 * parse an XML Literal
2598 *
2599 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2600 *
2601 * Returns the SystemLiteral parsed or NULL
2602 */
2603
2604xmlChar *
2605xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2606 xmlChar *buf = NULL;
2607 int len = 0;
2608 int size = XML_PARSER_BUFFER_SIZE;
2609 int cur, l;
2610 xmlChar stop;
2611 int state = ctxt->instate;
2612 int count = 0;
2613
2614 SHRINK;
2615 if (RAW == '"') {
2616 NEXT;
2617 stop = '"';
2618 } else if (RAW == '\'') {
2619 NEXT;
2620 stop = '\'';
2621 } else {
2622 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2624 ctxt->sax->error(ctxt->userData,
2625 "SystemLiteral \" or ' expected\n");
2626 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002627 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002628 return(NULL);
2629 }
2630
2631 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2632 if (buf == NULL) {
2633 xmlGenericError(xmlGenericErrorContext,
2634 "malloc of %d byte failed\n", size);
2635 return(NULL);
2636 }
2637 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2638 cur = CUR_CHAR(l);
2639 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2640 if (len + 5 >= size) {
2641 size *= 2;
2642 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2643 if (buf == NULL) {
2644 xmlGenericError(xmlGenericErrorContext,
2645 "realloc of %d byte failed\n", size);
2646 ctxt->instate = (xmlParserInputState) state;
2647 return(NULL);
2648 }
2649 }
2650 count++;
2651 if (count > 50) {
2652 GROW;
2653 count = 0;
2654 }
2655 COPY_BUF(l,buf,len,cur);
2656 NEXTL(l);
2657 cur = CUR_CHAR(l);
2658 if (cur == 0) {
2659 GROW;
2660 SHRINK;
2661 cur = CUR_CHAR(l);
2662 }
2663 }
2664 buf[len] = 0;
2665 ctxt->instate = (xmlParserInputState) state;
2666 if (!IS_CHAR(cur)) {
2667 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2669 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2670 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002671 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002672 } else {
2673 NEXT;
2674 }
2675 return(buf);
2676}
2677
2678/**
2679 * xmlParsePubidLiteral:
2680 * @ctxt: an XML parser context
2681 *
2682 * parse an XML public literal
2683 *
2684 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2685 *
2686 * Returns the PubidLiteral parsed or NULL.
2687 */
2688
2689xmlChar *
2690xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2691 xmlChar *buf = NULL;
2692 int len = 0;
2693 int size = XML_PARSER_BUFFER_SIZE;
2694 xmlChar cur;
2695 xmlChar stop;
2696 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002697 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002698
2699 SHRINK;
2700 if (RAW == '"') {
2701 NEXT;
2702 stop = '"';
2703 } else if (RAW == '\'') {
2704 NEXT;
2705 stop = '\'';
2706 } else {
2707 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2709 ctxt->sax->error(ctxt->userData,
2710 "SystemLiteral \" or ' expected\n");
2711 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002712 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002713 return(NULL);
2714 }
2715 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2716 if (buf == NULL) {
2717 xmlGenericError(xmlGenericErrorContext,
2718 "malloc of %d byte failed\n", size);
2719 return(NULL);
2720 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002721 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002722 cur = CUR;
2723 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2724 if (len + 1 >= size) {
2725 size *= 2;
2726 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2727 if (buf == NULL) {
2728 xmlGenericError(xmlGenericErrorContext,
2729 "realloc of %d byte failed\n", size);
2730 return(NULL);
2731 }
2732 }
2733 buf[len++] = cur;
2734 count++;
2735 if (count > 50) {
2736 GROW;
2737 count = 0;
2738 }
2739 NEXT;
2740 cur = CUR;
2741 if (cur == 0) {
2742 GROW;
2743 SHRINK;
2744 cur = CUR;
2745 }
2746 }
2747 buf[len] = 0;
2748 if (cur != stop) {
2749 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2751 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2752 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002753 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002754 } else {
2755 NEXT;
2756 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002757 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002758 return(buf);
2759}
2760
Daniel Veillard48b2f892001-02-25 16:11:03 +00002761void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002762/**
2763 * xmlParseCharData:
2764 * @ctxt: an XML parser context
2765 * @cdata: int indicating whether we are within a CDATA section
2766 *
2767 * parse a CharData section.
2768 * if we are within a CDATA section ']]>' marks an end of section.
2769 *
2770 * The right angle bracket (>) may be represented using the string "&gt;",
2771 * and must, for compatibility, be escaped using "&gt;" or a character
2772 * reference when it appears in the string "]]>" in content, when that
2773 * string is not marking the end of a CDATA section.
2774 *
2775 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2776 */
2777
2778void
2779xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002780 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002781 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002782 int line = ctxt->input->line;
2783 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002784
2785 SHRINK;
2786 GROW;
2787 /*
2788 * Accelerated common case where input don't need to be
2789 * modified before passing it to the handler.
2790 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002791 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002792 in = ctxt->input->cur;
2793 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002794get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002795 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2796 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002797 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002798 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002799 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002800 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002801 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002802 ctxt->input->line++;
2803 in++;
2804 }
2805 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002806 }
2807 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002808 if ((in[1] == ']') && (in[2] == '>')) {
2809 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2811 ctxt->sax->error(ctxt->userData,
2812 "Sequence ']]>' not allowed in content\n");
2813 ctxt->input->cur = in;
2814 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002815 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002816 return;
2817 }
2818 in++;
2819 goto get_more;
2820 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002821 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002822 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002823 if (IS_BLANK(*ctxt->input->cur)) {
2824 const xmlChar *tmp = ctxt->input->cur;
2825 ctxt->input->cur = in;
2826 if (areBlanks(ctxt, tmp, nbchar)) {
2827 if (ctxt->sax->ignorableWhitespace != NULL)
2828 ctxt->sax->ignorableWhitespace(ctxt->userData,
2829 tmp, nbchar);
2830 } else {
2831 if (ctxt->sax->characters != NULL)
2832 ctxt->sax->characters(ctxt->userData,
2833 tmp, nbchar);
2834 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002835 line = ctxt->input->line;
2836 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002837 } else {
2838 if (ctxt->sax->characters != NULL)
2839 ctxt->sax->characters(ctxt->userData,
2840 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002841 line = ctxt->input->line;
2842 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002843 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002844 }
2845 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002846 if (*in == 0xD) {
2847 in++;
2848 if (*in == 0xA) {
2849 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002850 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002851 ctxt->input->line++;
2852 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002853 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002854 in--;
2855 }
2856 if (*in == '<') {
2857 return;
2858 }
2859 if (*in == '&') {
2860 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002861 }
2862 SHRINK;
2863 GROW;
2864 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002865 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002866 nbchar = 0;
2867 }
Daniel Veillard50582112001-03-26 22:52:16 +00002868 ctxt->input->line = line;
2869 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002870 xmlParseCharDataComplex(ctxt, cdata);
2871}
2872
Daniel Veillard01c13b52002-12-10 15:19:08 +00002873/**
2874 * xmlParseCharDataComplex:
2875 * @ctxt: an XML parser context
2876 * @cdata: int indicating whether we are within a CDATA section
2877 *
2878 * parse a CharData section.this is the fallback function
2879 * of xmlParseCharData() when the parsing requires handling
2880 * of non-ASCII characters.
2881 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002882void
2883xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002884 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2885 int nbchar = 0;
2886 int cur, l;
2887 int count = 0;
2888
2889 SHRINK;
2890 GROW;
2891 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002892 while ((cur != '<') && /* checked */
2893 (cur != '&') &&
2894 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002895 if ((cur == ']') && (NXT(1) == ']') &&
2896 (NXT(2) == '>')) {
2897 if (cdata) break;
2898 else {
2899 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2901 ctxt->sax->error(ctxt->userData,
2902 "Sequence ']]>' not allowed in content\n");
2903 /* Should this be relaxed ??? I see a "must here */
2904 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002905 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002906 }
2907 }
2908 COPY_BUF(l,buf,nbchar,cur);
2909 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2910 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002911 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002912 */
2913 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2914 if (areBlanks(ctxt, buf, nbchar)) {
2915 if (ctxt->sax->ignorableWhitespace != NULL)
2916 ctxt->sax->ignorableWhitespace(ctxt->userData,
2917 buf, nbchar);
2918 } else {
2919 if (ctxt->sax->characters != NULL)
2920 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2921 }
2922 }
2923 nbchar = 0;
2924 }
2925 count++;
2926 if (count > 50) {
2927 GROW;
2928 count = 0;
2929 }
2930 NEXTL(l);
2931 cur = CUR_CHAR(l);
2932 }
2933 if (nbchar != 0) {
2934 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002935 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002936 */
2937 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2938 if (areBlanks(ctxt, buf, nbchar)) {
2939 if (ctxt->sax->ignorableWhitespace != NULL)
2940 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2941 } else {
2942 if (ctxt->sax->characters != NULL)
2943 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2944 }
2945 }
2946 }
2947}
2948
2949/**
2950 * xmlParseExternalID:
2951 * @ctxt: an XML parser context
2952 * @publicID: a xmlChar** receiving PubidLiteral
2953 * @strict: indicate whether we should restrict parsing to only
2954 * production [75], see NOTE below
2955 *
2956 * Parse an External ID or a Public ID
2957 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002958 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002959 * 'PUBLIC' S PubidLiteral S SystemLiteral
2960 *
2961 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2962 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2963 *
2964 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2965 *
2966 * Returns the function returns SystemLiteral and in the second
2967 * case publicID receives PubidLiteral, is strict is off
2968 * it is possible to return NULL and have publicID set.
2969 */
2970
2971xmlChar *
2972xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2973 xmlChar *URI = NULL;
2974
2975 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002976
2977 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002978 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2979 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2980 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2981 SKIP(6);
2982 if (!IS_BLANK(CUR)) {
2983 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2985 ctxt->sax->error(ctxt->userData,
2986 "Space required after 'SYSTEM'\n");
2987 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002988 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002989 }
2990 SKIP_BLANKS;
2991 URI = xmlParseSystemLiteral(ctxt);
2992 if (URI == NULL) {
2993 ctxt->errNo = XML_ERR_URI_REQUIRED;
2994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2995 ctxt->sax->error(ctxt->userData,
2996 "xmlParseExternalID: SYSTEM, no URI\n");
2997 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002998 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002999 }
3000 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3001 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3002 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3003 SKIP(6);
3004 if (!IS_BLANK(CUR)) {
3005 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3007 ctxt->sax->error(ctxt->userData,
3008 "Space required after 'PUBLIC'\n");
3009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003011 }
3012 SKIP_BLANKS;
3013 *publicID = xmlParsePubidLiteral(ctxt);
3014 if (*publicID == NULL) {
3015 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
3016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3017 ctxt->sax->error(ctxt->userData,
3018 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
3019 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003020 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003021 }
3022 if (strict) {
3023 /*
3024 * We don't handle [83] so "S SystemLiteral" is required.
3025 */
3026 if (!IS_BLANK(CUR)) {
3027 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3029 ctxt->sax->error(ctxt->userData,
3030 "Space required after the Public Identifier\n");
3031 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003032 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003033 }
3034 } else {
3035 /*
3036 * We handle [83] so we return immediately, if
3037 * "S SystemLiteral" is not detected. From a purely parsing
3038 * point of view that's a nice mess.
3039 */
3040 const xmlChar *ptr;
3041 GROW;
3042
3043 ptr = CUR_PTR;
3044 if (!IS_BLANK(*ptr)) return(NULL);
3045
3046 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3047 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3048 }
3049 SKIP_BLANKS;
3050 URI = xmlParseSystemLiteral(ctxt);
3051 if (URI == NULL) {
3052 ctxt->errNo = XML_ERR_URI_REQUIRED;
3053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3054 ctxt->sax->error(ctxt->userData,
3055 "xmlParseExternalID: PUBLIC, no URI\n");
3056 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003058 }
3059 }
3060 return(URI);
3061}
3062
3063/**
3064 * xmlParseComment:
3065 * @ctxt: an XML parser context
3066 *
3067 * Skip an XML (SGML) comment <!-- .... -->
3068 * The spec says that "For compatibility, the string "--" (double-hyphen)
3069 * must not occur within comments. "
3070 *
3071 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3072 */
3073void
3074xmlParseComment(xmlParserCtxtPtr ctxt) {
3075 xmlChar *buf = NULL;
3076 int len;
3077 int size = XML_PARSER_BUFFER_SIZE;
3078 int q, ql;
3079 int r, rl;
3080 int cur, l;
3081 xmlParserInputState state;
3082 xmlParserInputPtr input = ctxt->input;
3083 int count = 0;
3084
3085 /*
3086 * Check that there is a comment right here.
3087 */
3088 if ((RAW != '<') || (NXT(1) != '!') ||
3089 (NXT(2) != '-') || (NXT(3) != '-')) return;
3090
3091 state = ctxt->instate;
3092 ctxt->instate = XML_PARSER_COMMENT;
3093 SHRINK;
3094 SKIP(4);
3095 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3096 if (buf == NULL) {
3097 xmlGenericError(xmlGenericErrorContext,
3098 "malloc of %d byte failed\n", size);
3099 ctxt->instate = state;
3100 return;
3101 }
3102 q = CUR_CHAR(ql);
3103 NEXTL(ql);
3104 r = CUR_CHAR(rl);
3105 NEXTL(rl);
3106 cur = CUR_CHAR(l);
3107 len = 0;
3108 while (IS_CHAR(cur) && /* checked */
3109 ((cur != '>') ||
3110 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003111 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003112 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3114 ctxt->sax->error(ctxt->userData,
3115 "Comment must not contain '--' (double-hyphen)`\n");
3116 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003117 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003118 }
3119 if (len + 5 >= size) {
3120 size *= 2;
3121 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3122 if (buf == NULL) {
3123 xmlGenericError(xmlGenericErrorContext,
3124 "realloc of %d byte failed\n", size);
3125 ctxt->instate = state;
3126 return;
3127 }
3128 }
3129 COPY_BUF(ql,buf,len,q);
3130 q = r;
3131 ql = rl;
3132 r = cur;
3133 rl = l;
3134
3135 count++;
3136 if (count > 50) {
3137 GROW;
3138 count = 0;
3139 }
3140 NEXTL(l);
3141 cur = CUR_CHAR(l);
3142 if (cur == 0) {
3143 SHRINK;
3144 GROW;
3145 cur = CUR_CHAR(l);
3146 }
3147 }
3148 buf[len] = 0;
3149 if (!IS_CHAR(cur)) {
3150 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3152 ctxt->sax->error(ctxt->userData,
3153 "Comment not terminated \n<!--%.50s\n", buf);
3154 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003155 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003156 xmlFree(buf);
3157 } else {
3158 if (input != ctxt->input) {
3159 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3161 ctxt->sax->error(ctxt->userData,
3162"Comment doesn't start and stop in the same entity\n");
3163 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003164 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003165 }
3166 NEXT;
3167 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3168 (!ctxt->disableSAX))
3169 ctxt->sax->comment(ctxt->userData, buf);
3170 xmlFree(buf);
3171 }
3172 ctxt->instate = state;
3173}
3174
3175/**
3176 * xmlParsePITarget:
3177 * @ctxt: an XML parser context
3178 *
3179 * parse the name of a PI
3180 *
3181 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3182 *
3183 * Returns the PITarget name or NULL
3184 */
3185
3186xmlChar *
3187xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3188 xmlChar *name;
3189
3190 name = xmlParseName(ctxt);
3191 if ((name != NULL) &&
3192 ((name[0] == 'x') || (name[0] == 'X')) &&
3193 ((name[1] == 'm') || (name[1] == 'M')) &&
3194 ((name[2] == 'l') || (name[2] == 'L'))) {
3195 int i;
3196 if ((name[0] == 'x') && (name[1] == 'm') &&
3197 (name[2] == 'l') && (name[3] == 0)) {
3198 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3200 ctxt->sax->error(ctxt->userData,
3201 "XML declaration allowed only at the start of the document\n");
3202 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003203 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003204 return(name);
3205 } else if (name[3] == 0) {
3206 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3208 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3209 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003211 return(name);
3212 }
3213 for (i = 0;;i++) {
3214 if (xmlW3CPIs[i] == NULL) break;
3215 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3216 return(name);
3217 }
3218 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3219 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3220 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003221 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003222 }
3223 }
3224 return(name);
3225}
3226
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003227#ifdef LIBXML_CATALOG_ENABLED
3228/**
3229 * xmlParseCatalogPI:
3230 * @ctxt: an XML parser context
3231 * @catalog: the PI value string
3232 *
3233 * parse an XML Catalog Processing Instruction.
3234 *
3235 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3236 *
3237 * Occurs only if allowed by the user and if happening in the Misc
3238 * part of the document before any doctype informations
3239 * This will add the given catalog to the parsing context in order
3240 * to be used if there is a resolution need further down in the document
3241 */
3242
3243static void
3244xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3245 xmlChar *URL = NULL;
3246 const xmlChar *tmp, *base;
3247 xmlChar marker;
3248
3249 tmp = catalog;
3250 while (IS_BLANK(*tmp)) tmp++;
3251 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3252 goto error;
3253 tmp += 7;
3254 while (IS_BLANK(*tmp)) tmp++;
3255 if (*tmp != '=') {
3256 return;
3257 }
3258 tmp++;
3259 while (IS_BLANK(*tmp)) tmp++;
3260 marker = *tmp;
3261 if ((marker != '\'') && (marker != '"'))
3262 goto error;
3263 tmp++;
3264 base = tmp;
3265 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3266 if (*tmp == 0)
3267 goto error;
3268 URL = xmlStrndup(base, tmp - base);
3269 tmp++;
3270 while (IS_BLANK(*tmp)) tmp++;
3271 if (*tmp != 0)
3272 goto error;
3273
3274 if (URL != NULL) {
3275 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3276 xmlFree(URL);
3277 }
3278 return;
3279
3280error:
3281 ctxt->errNo = XML_WAR_CATALOG_PI;
3282 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3283 ctxt->sax->warning(ctxt->userData,
3284 "Catalog PI syntax error: %s\n", catalog);
3285 if (URL != NULL)
3286 xmlFree(URL);
3287}
3288#endif
3289
Owen Taylor3473f882001-02-23 17:55:21 +00003290/**
3291 * xmlParsePI:
3292 * @ctxt: an XML parser context
3293 *
3294 * parse an XML Processing Instruction.
3295 *
3296 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3297 *
3298 * The processing is transfered to SAX once parsed.
3299 */
3300
3301void
3302xmlParsePI(xmlParserCtxtPtr ctxt) {
3303 xmlChar *buf = NULL;
3304 int len = 0;
3305 int size = XML_PARSER_BUFFER_SIZE;
3306 int cur, l;
3307 xmlChar *target;
3308 xmlParserInputState state;
3309 int count = 0;
3310
3311 if ((RAW == '<') && (NXT(1) == '?')) {
3312 xmlParserInputPtr input = ctxt->input;
3313 state = ctxt->instate;
3314 ctxt->instate = XML_PARSER_PI;
3315 /*
3316 * this is a Processing Instruction.
3317 */
3318 SKIP(2);
3319 SHRINK;
3320
3321 /*
3322 * Parse the target name and check for special support like
3323 * namespace.
3324 */
3325 target = xmlParsePITarget(ctxt);
3326 if (target != NULL) {
3327 if ((RAW == '?') && (NXT(1) == '>')) {
3328 if (input != ctxt->input) {
3329 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3331 ctxt->sax->error(ctxt->userData,
3332 "PI declaration doesn't start and stop in the same entity\n");
3333 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003334 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003335 }
3336 SKIP(2);
3337
3338 /*
3339 * SAX: PI detected.
3340 */
3341 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3342 (ctxt->sax->processingInstruction != NULL))
3343 ctxt->sax->processingInstruction(ctxt->userData,
3344 target, NULL);
3345 ctxt->instate = state;
3346 xmlFree(target);
3347 return;
3348 }
3349 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3350 if (buf == NULL) {
3351 xmlGenericError(xmlGenericErrorContext,
3352 "malloc of %d byte failed\n", size);
3353 ctxt->instate = state;
3354 return;
3355 }
3356 cur = CUR;
3357 if (!IS_BLANK(cur)) {
3358 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3360 ctxt->sax->error(ctxt->userData,
3361 "xmlParsePI: PI %s space expected\n", target);
3362 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003363 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003364 }
3365 SKIP_BLANKS;
3366 cur = CUR_CHAR(l);
3367 while (IS_CHAR(cur) && /* checked */
3368 ((cur != '?') || (NXT(1) != '>'))) {
3369 if (len + 5 >= size) {
3370 size *= 2;
3371 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3372 if (buf == NULL) {
3373 xmlGenericError(xmlGenericErrorContext,
3374 "realloc of %d byte failed\n", size);
3375 ctxt->instate = state;
3376 return;
3377 }
3378 }
3379 count++;
3380 if (count > 50) {
3381 GROW;
3382 count = 0;
3383 }
3384 COPY_BUF(l,buf,len,cur);
3385 NEXTL(l);
3386 cur = CUR_CHAR(l);
3387 if (cur == 0) {
3388 SHRINK;
3389 GROW;
3390 cur = CUR_CHAR(l);
3391 }
3392 }
3393 buf[len] = 0;
3394 if (cur != '?') {
3395 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3397 ctxt->sax->error(ctxt->userData,
3398 "xmlParsePI: PI %s never end ...\n", target);
3399 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003400 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003401 } else {
3402 if (input != ctxt->input) {
3403 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3405 ctxt->sax->error(ctxt->userData,
3406 "PI declaration doesn't start and stop in the same entity\n");
3407 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003408 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003409 }
3410 SKIP(2);
3411
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003412#ifdef LIBXML_CATALOG_ENABLED
3413 if (((state == XML_PARSER_MISC) ||
3414 (state == XML_PARSER_START)) &&
3415 (xmlStrEqual(target, XML_CATALOG_PI))) {
3416 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3417 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3418 (allow == XML_CATA_ALLOW_ALL))
3419 xmlParseCatalogPI(ctxt, buf);
3420 }
3421#endif
3422
3423
Owen Taylor3473f882001-02-23 17:55:21 +00003424 /*
3425 * SAX: PI detected.
3426 */
3427 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3428 (ctxt->sax->processingInstruction != NULL))
3429 ctxt->sax->processingInstruction(ctxt->userData,
3430 target, buf);
3431 }
3432 xmlFree(buf);
3433 xmlFree(target);
3434 } else {
3435 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3437 ctxt->sax->error(ctxt->userData,
3438 "xmlParsePI : no target name\n");
3439 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003440 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003441 }
3442 ctxt->instate = state;
3443 }
3444}
3445
3446/**
3447 * xmlParseNotationDecl:
3448 * @ctxt: an XML parser context
3449 *
3450 * parse a notation declaration
3451 *
3452 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3453 *
3454 * Hence there is actually 3 choices:
3455 * 'PUBLIC' S PubidLiteral
3456 * 'PUBLIC' S PubidLiteral S SystemLiteral
3457 * and 'SYSTEM' S SystemLiteral
3458 *
3459 * See the NOTE on xmlParseExternalID().
3460 */
3461
3462void
3463xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3464 xmlChar *name;
3465 xmlChar *Pubid;
3466 xmlChar *Systemid;
3467
3468 if ((RAW == '<') && (NXT(1) == '!') &&
3469 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3470 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3471 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3472 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3473 xmlParserInputPtr input = ctxt->input;
3474 SHRINK;
3475 SKIP(10);
3476 if (!IS_BLANK(CUR)) {
3477 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3479 ctxt->sax->error(ctxt->userData,
3480 "Space required after '<!NOTATION'\n");
3481 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003482 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003483 return;
3484 }
3485 SKIP_BLANKS;
3486
Daniel Veillard76d66f42001-05-16 21:05:17 +00003487 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003488 if (name == NULL) {
3489 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3491 ctxt->sax->error(ctxt->userData,
3492 "NOTATION: Name expected here\n");
3493 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003494 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003495 return;
3496 }
3497 if (!IS_BLANK(CUR)) {
3498 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3500 ctxt->sax->error(ctxt->userData,
3501 "Space required after the NOTATION name'\n");
3502 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003503 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003504 return;
3505 }
3506 SKIP_BLANKS;
3507
3508 /*
3509 * Parse the IDs.
3510 */
3511 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3512 SKIP_BLANKS;
3513
3514 if (RAW == '>') {
3515 if (input != ctxt->input) {
3516 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3518 ctxt->sax->error(ctxt->userData,
3519"Notation declaration doesn't start and stop in the same entity\n");
3520 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003521 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003522 }
3523 NEXT;
3524 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3525 (ctxt->sax->notationDecl != NULL))
3526 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3527 } else {
3528 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3530 ctxt->sax->error(ctxt->userData,
3531 "'>' required to close NOTATION declaration\n");
3532 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003533 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003534 }
3535 xmlFree(name);
3536 if (Systemid != NULL) xmlFree(Systemid);
3537 if (Pubid != NULL) xmlFree(Pubid);
3538 }
3539}
3540
3541/**
3542 * xmlParseEntityDecl:
3543 * @ctxt: an XML parser context
3544 *
3545 * parse <!ENTITY declarations
3546 *
3547 * [70] EntityDecl ::= GEDecl | PEDecl
3548 *
3549 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3550 *
3551 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3552 *
3553 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3554 *
3555 * [74] PEDef ::= EntityValue | ExternalID
3556 *
3557 * [76] NDataDecl ::= S 'NDATA' S Name
3558 *
3559 * [ VC: Notation Declared ]
3560 * The Name must match the declared name of a notation.
3561 */
3562
3563void
3564xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3565 xmlChar *name = NULL;
3566 xmlChar *value = NULL;
3567 xmlChar *URI = NULL, *literal = NULL;
3568 xmlChar *ndata = NULL;
3569 int isParameter = 0;
3570 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003571 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003572
3573 GROW;
3574 if ((RAW == '<') && (NXT(1) == '!') &&
3575 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3576 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3577 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3578 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003579 SHRINK;
3580 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003581 skipped = SKIP_BLANKS;
3582 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003583 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3585 ctxt->sax->error(ctxt->userData,
3586 "Space required after '<!ENTITY'\n");
3587 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003588 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003589 }
Owen Taylor3473f882001-02-23 17:55:21 +00003590
3591 if (RAW == '%') {
3592 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003593 skipped = SKIP_BLANKS;
3594 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003595 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3597 ctxt->sax->error(ctxt->userData,
3598 "Space required after '%'\n");
3599 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003600 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003601 }
Owen Taylor3473f882001-02-23 17:55:21 +00003602 isParameter = 1;
3603 }
3604
Daniel Veillard76d66f42001-05-16 21:05:17 +00003605 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003606 if (name == NULL) {
3607 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3609 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3610 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003611 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003612 return;
3613 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003614 skipped = SKIP_BLANKS;
3615 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003616 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3618 ctxt->sax->error(ctxt->userData,
3619 "Space required after the entity name\n");
3620 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003621 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003622 }
Owen Taylor3473f882001-02-23 17:55:21 +00003623
Daniel Veillardf5582f12002-06-11 10:08:16 +00003624 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003625 /*
3626 * handle the various case of definitions...
3627 */
3628 if (isParameter) {
3629 if ((RAW == '"') || (RAW == '\'')) {
3630 value = xmlParseEntityValue(ctxt, &orig);
3631 if (value) {
3632 if ((ctxt->sax != NULL) &&
3633 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3634 ctxt->sax->entityDecl(ctxt->userData, name,
3635 XML_INTERNAL_PARAMETER_ENTITY,
3636 NULL, NULL, value);
3637 }
3638 } else {
3639 URI = xmlParseExternalID(ctxt, &literal, 1);
3640 if ((URI == NULL) && (literal == NULL)) {
3641 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3643 ctxt->sax->error(ctxt->userData,
3644 "Entity value required\n");
3645 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003646 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003647 }
3648 if (URI) {
3649 xmlURIPtr uri;
3650
3651 uri = xmlParseURI((const char *) URI);
3652 if (uri == NULL) {
3653 ctxt->errNo = XML_ERR_INVALID_URI;
3654 if ((ctxt->sax != NULL) &&
3655 (!ctxt->disableSAX) &&
3656 (ctxt->sax->error != NULL))
3657 ctxt->sax->error(ctxt->userData,
3658 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003659 /*
3660 * This really ought to be a well formedness error
3661 * but the XML Core WG decided otherwise c.f. issue
3662 * E26 of the XML erratas.
3663 */
Owen Taylor3473f882001-02-23 17:55:21 +00003664 } else {
3665 if (uri->fragment != NULL) {
3666 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3667 if ((ctxt->sax != NULL) &&
3668 (!ctxt->disableSAX) &&
3669 (ctxt->sax->error != NULL))
3670 ctxt->sax->error(ctxt->userData,
3671 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003672 /*
3673 * Okay this is foolish to block those but not
3674 * invalid URIs.
3675 */
Owen Taylor3473f882001-02-23 17:55:21 +00003676 ctxt->wellFormed = 0;
3677 } else {
3678 if ((ctxt->sax != NULL) &&
3679 (!ctxt->disableSAX) &&
3680 (ctxt->sax->entityDecl != NULL))
3681 ctxt->sax->entityDecl(ctxt->userData, name,
3682 XML_EXTERNAL_PARAMETER_ENTITY,
3683 literal, URI, NULL);
3684 }
3685 xmlFreeURI(uri);
3686 }
3687 }
3688 }
3689 } else {
3690 if ((RAW == '"') || (RAW == '\'')) {
3691 value = xmlParseEntityValue(ctxt, &orig);
3692 if ((ctxt->sax != NULL) &&
3693 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3694 ctxt->sax->entityDecl(ctxt->userData, name,
3695 XML_INTERNAL_GENERAL_ENTITY,
3696 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003697 /*
3698 * For expat compatibility in SAX mode.
3699 */
3700 if ((ctxt->myDoc == NULL) ||
3701 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3702 if (ctxt->myDoc == NULL) {
3703 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3704 }
3705 if (ctxt->myDoc->intSubset == NULL)
3706 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3707 BAD_CAST "fake", NULL, NULL);
3708
3709 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3710 NULL, NULL, value);
3711 }
Owen Taylor3473f882001-02-23 17:55:21 +00003712 } else {
3713 URI = xmlParseExternalID(ctxt, &literal, 1);
3714 if ((URI == NULL) && (literal == NULL)) {
3715 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3717 ctxt->sax->error(ctxt->userData,
3718 "Entity value required\n");
3719 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003720 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003721 }
3722 if (URI) {
3723 xmlURIPtr uri;
3724
3725 uri = xmlParseURI((const char *)URI);
3726 if (uri == NULL) {
3727 ctxt->errNo = XML_ERR_INVALID_URI;
3728 if ((ctxt->sax != NULL) &&
3729 (!ctxt->disableSAX) &&
3730 (ctxt->sax->error != NULL))
3731 ctxt->sax->error(ctxt->userData,
3732 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003733 /*
3734 * This really ought to be a well formedness error
3735 * but the XML Core WG decided otherwise c.f. issue
3736 * E26 of the XML erratas.
3737 */
Owen Taylor3473f882001-02-23 17:55:21 +00003738 } else {
3739 if (uri->fragment != NULL) {
3740 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3741 if ((ctxt->sax != NULL) &&
3742 (!ctxt->disableSAX) &&
3743 (ctxt->sax->error != NULL))
3744 ctxt->sax->error(ctxt->userData,
3745 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003746 /*
3747 * Okay this is foolish to block those but not
3748 * invalid URIs.
3749 */
Owen Taylor3473f882001-02-23 17:55:21 +00003750 ctxt->wellFormed = 0;
3751 }
3752 xmlFreeURI(uri);
3753 }
3754 }
3755 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3756 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3758 ctxt->sax->error(ctxt->userData,
3759 "Space required before 'NDATA'\n");
3760 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003761 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003762 }
3763 SKIP_BLANKS;
3764 if ((RAW == 'N') && (NXT(1) == 'D') &&
3765 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3766 (NXT(4) == 'A')) {
3767 SKIP(5);
3768 if (!IS_BLANK(CUR)) {
3769 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3771 ctxt->sax->error(ctxt->userData,
3772 "Space required after 'NDATA'\n");
3773 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003774 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003775 }
3776 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003777 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003778 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3779 (ctxt->sax->unparsedEntityDecl != NULL))
3780 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3781 literal, URI, ndata);
3782 } else {
3783 if ((ctxt->sax != NULL) &&
3784 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3785 ctxt->sax->entityDecl(ctxt->userData, name,
3786 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3787 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003788 /*
3789 * For expat compatibility in SAX mode.
3790 * assuming the entity repalcement was asked for
3791 */
3792 if ((ctxt->replaceEntities != 0) &&
3793 ((ctxt->myDoc == NULL) ||
3794 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3795 if (ctxt->myDoc == NULL) {
3796 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3797 }
3798
3799 if (ctxt->myDoc->intSubset == NULL)
3800 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3801 BAD_CAST "fake", NULL, NULL);
3802 entityDecl(ctxt, name,
3803 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3804 literal, URI, NULL);
3805 }
Owen Taylor3473f882001-02-23 17:55:21 +00003806 }
3807 }
3808 }
3809 SKIP_BLANKS;
3810 if (RAW != '>') {
3811 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3813 ctxt->sax->error(ctxt->userData,
3814 "xmlParseEntityDecl: entity %s not terminated\n", name);
3815 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003816 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003817 } else {
3818 if (input != ctxt->input) {
3819 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3821 ctxt->sax->error(ctxt->userData,
3822"Entity declaration doesn't start and stop in the same entity\n");
3823 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003824 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003825 }
3826 NEXT;
3827 }
3828 if (orig != NULL) {
3829 /*
3830 * Ugly mechanism to save the raw entity value.
3831 */
3832 xmlEntityPtr cur = NULL;
3833
3834 if (isParameter) {
3835 if ((ctxt->sax != NULL) &&
3836 (ctxt->sax->getParameterEntity != NULL))
3837 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3838 } else {
3839 if ((ctxt->sax != NULL) &&
3840 (ctxt->sax->getEntity != NULL))
3841 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003842 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3843 cur = getEntity(ctxt, name);
3844 }
Owen Taylor3473f882001-02-23 17:55:21 +00003845 }
3846 if (cur != NULL) {
3847 if (cur->orig != NULL)
3848 xmlFree(orig);
3849 else
3850 cur->orig = orig;
3851 } else
3852 xmlFree(orig);
3853 }
3854 if (name != NULL) xmlFree(name);
3855 if (value != NULL) xmlFree(value);
3856 if (URI != NULL) xmlFree(URI);
3857 if (literal != NULL) xmlFree(literal);
3858 if (ndata != NULL) xmlFree(ndata);
3859 }
3860}
3861
3862/**
3863 * xmlParseDefaultDecl:
3864 * @ctxt: an XML parser context
3865 * @value: Receive a possible fixed default value for the attribute
3866 *
3867 * Parse an attribute default declaration
3868 *
3869 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3870 *
3871 * [ VC: Required Attribute ]
3872 * if the default declaration is the keyword #REQUIRED, then the
3873 * attribute must be specified for all elements of the type in the
3874 * attribute-list declaration.
3875 *
3876 * [ VC: Attribute Default Legal ]
3877 * The declared default value must meet the lexical constraints of
3878 * the declared attribute type c.f. xmlValidateAttributeDecl()
3879 *
3880 * [ VC: Fixed Attribute Default ]
3881 * if an attribute has a default value declared with the #FIXED
3882 * keyword, instances of that attribute must match the default value.
3883 *
3884 * [ WFC: No < in Attribute Values ]
3885 * handled in xmlParseAttValue()
3886 *
3887 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3888 * or XML_ATTRIBUTE_FIXED.
3889 */
3890
3891int
3892xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3893 int val;
3894 xmlChar *ret;
3895
3896 *value = NULL;
3897 if ((RAW == '#') && (NXT(1) == 'R') &&
3898 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3899 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3900 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3901 (NXT(8) == 'D')) {
3902 SKIP(9);
3903 return(XML_ATTRIBUTE_REQUIRED);
3904 }
3905 if ((RAW == '#') && (NXT(1) == 'I') &&
3906 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3907 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3908 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3909 SKIP(8);
3910 return(XML_ATTRIBUTE_IMPLIED);
3911 }
3912 val = XML_ATTRIBUTE_NONE;
3913 if ((RAW == '#') && (NXT(1) == 'F') &&
3914 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3915 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3916 SKIP(6);
3917 val = XML_ATTRIBUTE_FIXED;
3918 if (!IS_BLANK(CUR)) {
3919 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3921 ctxt->sax->error(ctxt->userData,
3922 "Space required after '#FIXED'\n");
3923 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003924 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003925 }
3926 SKIP_BLANKS;
3927 }
3928 ret = xmlParseAttValue(ctxt);
3929 ctxt->instate = XML_PARSER_DTD;
3930 if (ret == NULL) {
3931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3932 ctxt->sax->error(ctxt->userData,
3933 "Attribute default value declaration error\n");
3934 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003935 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003936 } else
3937 *value = ret;
3938 return(val);
3939}
3940
3941/**
3942 * xmlParseNotationType:
3943 * @ctxt: an XML parser context
3944 *
3945 * parse an Notation attribute type.
3946 *
3947 * Note: the leading 'NOTATION' S part has already being parsed...
3948 *
3949 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3950 *
3951 * [ VC: Notation Attributes ]
3952 * Values of this type must match one of the notation names included
3953 * in the declaration; all notation names in the declaration must be declared.
3954 *
3955 * Returns: the notation attribute tree built while parsing
3956 */
3957
3958xmlEnumerationPtr
3959xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3960 xmlChar *name;
3961 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3962
3963 if (RAW != '(') {
3964 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3966 ctxt->sax->error(ctxt->userData,
3967 "'(' required to start 'NOTATION'\n");
3968 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003969 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003970 return(NULL);
3971 }
3972 SHRINK;
3973 do {
3974 NEXT;
3975 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003976 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003977 if (name == NULL) {
3978 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3979 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3980 ctxt->sax->error(ctxt->userData,
3981 "Name expected in NOTATION declaration\n");
3982 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003983 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003984 return(ret);
3985 }
3986 cur = xmlCreateEnumeration(name);
3987 xmlFree(name);
3988 if (cur == NULL) return(ret);
3989 if (last == NULL) ret = last = cur;
3990 else {
3991 last->next = cur;
3992 last = cur;
3993 }
3994 SKIP_BLANKS;
3995 } while (RAW == '|');
3996 if (RAW != ')') {
3997 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3999 ctxt->sax->error(ctxt->userData,
4000 "')' required to finish NOTATION declaration\n");
4001 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004002 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004003 if ((last != NULL) && (last != ret))
4004 xmlFreeEnumeration(last);
4005 return(ret);
4006 }
4007 NEXT;
4008 return(ret);
4009}
4010
4011/**
4012 * xmlParseEnumerationType:
4013 * @ctxt: an XML parser context
4014 *
4015 * parse an Enumeration attribute type.
4016 *
4017 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4018 *
4019 * [ VC: Enumeration ]
4020 * Values of this type must match one of the Nmtoken tokens in
4021 * the declaration
4022 *
4023 * Returns: the enumeration attribute tree built while parsing
4024 */
4025
4026xmlEnumerationPtr
4027xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4028 xmlChar *name;
4029 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4030
4031 if (RAW != '(') {
4032 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4034 ctxt->sax->error(ctxt->userData,
4035 "'(' required to start ATTLIST enumeration\n");
4036 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004037 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004038 return(NULL);
4039 }
4040 SHRINK;
4041 do {
4042 NEXT;
4043 SKIP_BLANKS;
4044 name = xmlParseNmtoken(ctxt);
4045 if (name == NULL) {
4046 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4048 ctxt->sax->error(ctxt->userData,
4049 "NmToken expected in ATTLIST enumeration\n");
4050 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004051 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004052 return(ret);
4053 }
4054 cur = xmlCreateEnumeration(name);
4055 xmlFree(name);
4056 if (cur == NULL) return(ret);
4057 if (last == NULL) ret = last = cur;
4058 else {
4059 last->next = cur;
4060 last = cur;
4061 }
4062 SKIP_BLANKS;
4063 } while (RAW == '|');
4064 if (RAW != ')') {
4065 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4067 ctxt->sax->error(ctxt->userData,
4068 "')' required to finish ATTLIST enumeration\n");
4069 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004070 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004071 return(ret);
4072 }
4073 NEXT;
4074 return(ret);
4075}
4076
4077/**
4078 * xmlParseEnumeratedType:
4079 * @ctxt: an XML parser context
4080 * @tree: the enumeration tree built while parsing
4081 *
4082 * parse an Enumerated attribute type.
4083 *
4084 * [57] EnumeratedType ::= NotationType | Enumeration
4085 *
4086 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4087 *
4088 *
4089 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4090 */
4091
4092int
4093xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4094 if ((RAW == 'N') && (NXT(1) == 'O') &&
4095 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4096 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4097 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4098 SKIP(8);
4099 if (!IS_BLANK(CUR)) {
4100 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4102 ctxt->sax->error(ctxt->userData,
4103 "Space required after 'NOTATION'\n");
4104 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004105 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004106 return(0);
4107 }
4108 SKIP_BLANKS;
4109 *tree = xmlParseNotationType(ctxt);
4110 if (*tree == NULL) return(0);
4111 return(XML_ATTRIBUTE_NOTATION);
4112 }
4113 *tree = xmlParseEnumerationType(ctxt);
4114 if (*tree == NULL) return(0);
4115 return(XML_ATTRIBUTE_ENUMERATION);
4116}
4117
4118/**
4119 * xmlParseAttributeType:
4120 * @ctxt: an XML parser context
4121 * @tree: the enumeration tree built while parsing
4122 *
4123 * parse the Attribute list def for an element
4124 *
4125 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4126 *
4127 * [55] StringType ::= 'CDATA'
4128 *
4129 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4130 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4131 *
4132 * Validity constraints for attribute values syntax are checked in
4133 * xmlValidateAttributeValue()
4134 *
4135 * [ VC: ID ]
4136 * Values of type ID must match the Name production. A name must not
4137 * appear more than once in an XML document as a value of this type;
4138 * i.e., ID values must uniquely identify the elements which bear them.
4139 *
4140 * [ VC: One ID per Element Type ]
4141 * No element type may have more than one ID attribute specified.
4142 *
4143 * [ VC: ID Attribute Default ]
4144 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4145 *
4146 * [ VC: IDREF ]
4147 * Values of type IDREF must match the Name production, and values
4148 * of type IDREFS must match Names; each IDREF Name must match the value
4149 * of an ID attribute on some element in the XML document; i.e. IDREF
4150 * values must match the value of some ID attribute.
4151 *
4152 * [ VC: Entity Name ]
4153 * Values of type ENTITY must match the Name production, values
4154 * of type ENTITIES must match Names; each Entity Name must match the
4155 * name of an unparsed entity declared in the DTD.
4156 *
4157 * [ VC: Name Token ]
4158 * Values of type NMTOKEN must match the Nmtoken production; values
4159 * of type NMTOKENS must match Nmtokens.
4160 *
4161 * Returns the attribute type
4162 */
4163int
4164xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4165 SHRINK;
4166 if ((RAW == 'C') && (NXT(1) == 'D') &&
4167 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4168 (NXT(4) == 'A')) {
4169 SKIP(5);
4170 return(XML_ATTRIBUTE_CDATA);
4171 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4172 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4173 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4174 SKIP(6);
4175 return(XML_ATTRIBUTE_IDREFS);
4176 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4177 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4178 (NXT(4) == 'F')) {
4179 SKIP(5);
4180 return(XML_ATTRIBUTE_IDREF);
4181 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4182 SKIP(2);
4183 return(XML_ATTRIBUTE_ID);
4184 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4185 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4186 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4187 SKIP(6);
4188 return(XML_ATTRIBUTE_ENTITY);
4189 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4190 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4191 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4192 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4193 SKIP(8);
4194 return(XML_ATTRIBUTE_ENTITIES);
4195 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4196 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4197 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4198 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4199 SKIP(8);
4200 return(XML_ATTRIBUTE_NMTOKENS);
4201 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4202 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4203 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4204 (NXT(6) == 'N')) {
4205 SKIP(7);
4206 return(XML_ATTRIBUTE_NMTOKEN);
4207 }
4208 return(xmlParseEnumeratedType(ctxt, tree));
4209}
4210
4211/**
4212 * xmlParseAttributeListDecl:
4213 * @ctxt: an XML parser context
4214 *
4215 * : parse the Attribute list def for an element
4216 *
4217 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4218 *
4219 * [53] AttDef ::= S Name S AttType S DefaultDecl
4220 *
4221 */
4222void
4223xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4224 xmlChar *elemName;
4225 xmlChar *attrName;
4226 xmlEnumerationPtr tree;
4227
4228 if ((RAW == '<') && (NXT(1) == '!') &&
4229 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4230 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4231 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4232 (NXT(8) == 'T')) {
4233 xmlParserInputPtr input = ctxt->input;
4234
4235 SKIP(9);
4236 if (!IS_BLANK(CUR)) {
4237 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4239 ctxt->sax->error(ctxt->userData,
4240 "Space required after '<!ATTLIST'\n");
4241 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004242 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004243 }
4244 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004245 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004246 if (elemName == NULL) {
4247 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4249 ctxt->sax->error(ctxt->userData,
4250 "ATTLIST: no name for Element\n");
4251 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004252 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004253 return;
4254 }
4255 SKIP_BLANKS;
4256 GROW;
4257 while (RAW != '>') {
4258 const xmlChar *check = CUR_PTR;
4259 int type;
4260 int def;
4261 xmlChar *defaultValue = NULL;
4262
4263 GROW;
4264 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004265 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004266 if (attrName == NULL) {
4267 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4269 ctxt->sax->error(ctxt->userData,
4270 "ATTLIST: no name for Attribute\n");
4271 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004272 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004273 break;
4274 }
4275 GROW;
4276 if (!IS_BLANK(CUR)) {
4277 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4279 ctxt->sax->error(ctxt->userData,
4280 "Space required after the attribute name\n");
4281 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004282 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004283 if (attrName != NULL)
4284 xmlFree(attrName);
4285 if (defaultValue != NULL)
4286 xmlFree(defaultValue);
4287 break;
4288 }
4289 SKIP_BLANKS;
4290
4291 type = xmlParseAttributeType(ctxt, &tree);
4292 if (type <= 0) {
4293 if (attrName != NULL)
4294 xmlFree(attrName);
4295 if (defaultValue != NULL)
4296 xmlFree(defaultValue);
4297 break;
4298 }
4299
4300 GROW;
4301 if (!IS_BLANK(CUR)) {
4302 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4304 ctxt->sax->error(ctxt->userData,
4305 "Space required after the attribute type\n");
4306 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004307 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004308 if (attrName != NULL)
4309 xmlFree(attrName);
4310 if (defaultValue != NULL)
4311 xmlFree(defaultValue);
4312 if (tree != NULL)
4313 xmlFreeEnumeration(tree);
4314 break;
4315 }
4316 SKIP_BLANKS;
4317
4318 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4319 if (def <= 0) {
4320 if (attrName != NULL)
4321 xmlFree(attrName);
4322 if (defaultValue != NULL)
4323 xmlFree(defaultValue);
4324 if (tree != NULL)
4325 xmlFreeEnumeration(tree);
4326 break;
4327 }
4328
4329 GROW;
4330 if (RAW != '>') {
4331 if (!IS_BLANK(CUR)) {
4332 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4334 ctxt->sax->error(ctxt->userData,
4335 "Space required after the attribute default value\n");
4336 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004337 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004338 if (attrName != NULL)
4339 xmlFree(attrName);
4340 if (defaultValue != NULL)
4341 xmlFree(defaultValue);
4342 if (tree != NULL)
4343 xmlFreeEnumeration(tree);
4344 break;
4345 }
4346 SKIP_BLANKS;
4347 }
4348 if (check == CUR_PTR) {
4349 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4351 ctxt->sax->error(ctxt->userData,
4352 "xmlParseAttributeListDecl: detected internal error\n");
4353 if (attrName != NULL)
4354 xmlFree(attrName);
4355 if (defaultValue != NULL)
4356 xmlFree(defaultValue);
4357 if (tree != NULL)
4358 xmlFreeEnumeration(tree);
4359 break;
4360 }
4361 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4362 (ctxt->sax->attributeDecl != NULL))
4363 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4364 type, def, defaultValue, tree);
4365 if (attrName != NULL)
4366 xmlFree(attrName);
4367 if (defaultValue != NULL)
4368 xmlFree(defaultValue);
4369 GROW;
4370 }
4371 if (RAW == '>') {
4372 if (input != ctxt->input) {
4373 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4375 ctxt->sax->error(ctxt->userData,
4376"Attribute list declaration doesn't start and stop in the same entity\n");
4377 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004378 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004379 }
4380 NEXT;
4381 }
4382
4383 xmlFree(elemName);
4384 }
4385}
4386
4387/**
4388 * xmlParseElementMixedContentDecl:
4389 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004390 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004391 *
4392 * parse the declaration for a Mixed Element content
4393 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4394 *
4395 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4396 * '(' S? '#PCDATA' S? ')'
4397 *
4398 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4399 *
4400 * [ VC: No Duplicate Types ]
4401 * The same name must not appear more than once in a single
4402 * mixed-content declaration.
4403 *
4404 * returns: the list of the xmlElementContentPtr describing the element choices
4405 */
4406xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004407xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004408 xmlElementContentPtr ret = NULL, cur = NULL, n;
4409 xmlChar *elem = NULL;
4410
4411 GROW;
4412 if ((RAW == '#') && (NXT(1) == 'P') &&
4413 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4414 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4415 (NXT(6) == 'A')) {
4416 SKIP(7);
4417 SKIP_BLANKS;
4418 SHRINK;
4419 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004420 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4421 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4422 if (ctxt->vctxt.error != NULL)
4423 ctxt->vctxt.error(ctxt->vctxt.userData,
4424"Element content declaration doesn't start and stop in the same entity\n");
4425 ctxt->valid = 0;
4426 }
Owen Taylor3473f882001-02-23 17:55:21 +00004427 NEXT;
4428 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4429 if (RAW == '*') {
4430 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4431 NEXT;
4432 }
4433 return(ret);
4434 }
4435 if ((RAW == '(') || (RAW == '|')) {
4436 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4437 if (ret == NULL) return(NULL);
4438 }
4439 while (RAW == '|') {
4440 NEXT;
4441 if (elem == NULL) {
4442 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4443 if (ret == NULL) return(NULL);
4444 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004445 if (cur != NULL)
4446 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004447 cur = ret;
4448 } else {
4449 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4450 if (n == NULL) return(NULL);
4451 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004452 if (n->c1 != NULL)
4453 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004454 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004455 if (n != NULL)
4456 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004457 cur = n;
4458 xmlFree(elem);
4459 }
4460 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004461 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004462 if (elem == NULL) {
4463 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4465 ctxt->sax->error(ctxt->userData,
4466 "xmlParseElementMixedContentDecl : Name expected\n");
4467 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004468 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004469 xmlFreeElementContent(cur);
4470 return(NULL);
4471 }
4472 SKIP_BLANKS;
4473 GROW;
4474 }
4475 if ((RAW == ')') && (NXT(1) == '*')) {
4476 if (elem != NULL) {
4477 cur->c2 = xmlNewElementContent(elem,
4478 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004479 if (cur->c2 != NULL)
4480 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004481 xmlFree(elem);
4482 }
4483 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004484 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4485 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4486 if (ctxt->vctxt.error != NULL)
4487 ctxt->vctxt.error(ctxt->vctxt.userData,
4488"Element content declaration doesn't start and stop in the same entity\n");
4489 ctxt->valid = 0;
4490 }
Owen Taylor3473f882001-02-23 17:55:21 +00004491 SKIP(2);
4492 } else {
4493 if (elem != NULL) xmlFree(elem);
4494 xmlFreeElementContent(ret);
4495 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4497 ctxt->sax->error(ctxt->userData,
4498 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4499 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004500 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004501 return(NULL);
4502 }
4503
4504 } else {
4505 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4507 ctxt->sax->error(ctxt->userData,
4508 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4509 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004510 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004511 }
4512 return(ret);
4513}
4514
4515/**
4516 * xmlParseElementChildrenContentDecl:
4517 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004518 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004519 *
4520 * parse the declaration for a Mixed Element content
4521 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4522 *
4523 *
4524 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4525 *
4526 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4527 *
4528 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4529 *
4530 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4531 *
4532 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4533 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004534 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004535 * opening or closing parentheses in a choice, seq, or Mixed
4536 * construct is contained in the replacement text for a parameter
4537 * entity, both must be contained in the same replacement text. For
4538 * interoperability, if a parameter-entity reference appears in a
4539 * choice, seq, or Mixed construct, its replacement text should not
4540 * be empty, and neither the first nor last non-blank character of
4541 * the replacement text should be a connector (| or ,).
4542 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004543 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004544 * hierarchy.
4545 */
4546xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004547xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004548(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004549 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4550 xmlChar *elem;
4551 xmlChar type = 0;
4552
4553 SKIP_BLANKS;
4554 GROW;
4555 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004556 xmlParserInputPtr input = ctxt->input;
4557
Owen Taylor3473f882001-02-23 17:55:21 +00004558 /* Recurse on first child */
4559 NEXT;
4560 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004561 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004562 SKIP_BLANKS;
4563 GROW;
4564 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004565 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004566 if (elem == NULL) {
4567 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4569 ctxt->sax->error(ctxt->userData,
4570 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4571 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004572 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004573 return(NULL);
4574 }
4575 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4576 GROW;
4577 if (RAW == '?') {
4578 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4579 NEXT;
4580 } else if (RAW == '*') {
4581 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4582 NEXT;
4583 } else if (RAW == '+') {
4584 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4585 NEXT;
4586 } else {
4587 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4588 }
4589 xmlFree(elem);
4590 GROW;
4591 }
4592 SKIP_BLANKS;
4593 SHRINK;
4594 while (RAW != ')') {
4595 /*
4596 * Each loop we parse one separator and one element.
4597 */
4598 if (RAW == ',') {
4599 if (type == 0) type = CUR;
4600
4601 /*
4602 * Detect "Name | Name , Name" error
4603 */
4604 else if (type != CUR) {
4605 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4607 ctxt->sax->error(ctxt->userData,
4608 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4609 type);
4610 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004611 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004612 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004613 xmlFreeElementContent(last);
4614 if (ret != NULL)
4615 xmlFreeElementContent(ret);
4616 return(NULL);
4617 }
4618 NEXT;
4619
4620 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4621 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004622 if ((last != NULL) && (last != ret))
4623 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004624 xmlFreeElementContent(ret);
4625 return(NULL);
4626 }
4627 if (last == NULL) {
4628 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004629 if (ret != NULL)
4630 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004631 ret = cur = op;
4632 } else {
4633 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004634 if (op != NULL)
4635 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004636 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004637 if (last != NULL)
4638 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004639 cur =op;
4640 last = NULL;
4641 }
4642 } else if (RAW == '|') {
4643 if (type == 0) type = CUR;
4644
4645 /*
4646 * Detect "Name , Name | Name" error
4647 */
4648 else if (type != CUR) {
4649 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4651 ctxt->sax->error(ctxt->userData,
4652 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4653 type);
4654 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004656 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004657 xmlFreeElementContent(last);
4658 if (ret != NULL)
4659 xmlFreeElementContent(ret);
4660 return(NULL);
4661 }
4662 NEXT;
4663
4664 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4665 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004666 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004667 xmlFreeElementContent(last);
4668 if (ret != NULL)
4669 xmlFreeElementContent(ret);
4670 return(NULL);
4671 }
4672 if (last == NULL) {
4673 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004674 if (ret != NULL)
4675 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004676 ret = cur = op;
4677 } else {
4678 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004679 if (op != NULL)
4680 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004681 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004682 if (last != NULL)
4683 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004684 cur =op;
4685 last = NULL;
4686 }
4687 } else {
4688 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4690 ctxt->sax->error(ctxt->userData,
4691 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4692 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004693 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004694 if (ret != NULL)
4695 xmlFreeElementContent(ret);
4696 return(NULL);
4697 }
4698 GROW;
4699 SKIP_BLANKS;
4700 GROW;
4701 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004702 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004703 /* Recurse on second child */
4704 NEXT;
4705 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004706 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004707 SKIP_BLANKS;
4708 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004709 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004710 if (elem == NULL) {
4711 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4713 ctxt->sax->error(ctxt->userData,
4714 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4715 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004716 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004717 if (ret != NULL)
4718 xmlFreeElementContent(ret);
4719 return(NULL);
4720 }
4721 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4722 xmlFree(elem);
4723 if (RAW == '?') {
4724 last->ocur = XML_ELEMENT_CONTENT_OPT;
4725 NEXT;
4726 } else if (RAW == '*') {
4727 last->ocur = XML_ELEMENT_CONTENT_MULT;
4728 NEXT;
4729 } else if (RAW == '+') {
4730 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4731 NEXT;
4732 } else {
4733 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4734 }
4735 }
4736 SKIP_BLANKS;
4737 GROW;
4738 }
4739 if ((cur != NULL) && (last != NULL)) {
4740 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004741 if (last != NULL)
4742 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004743 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004744 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4745 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4746 if (ctxt->vctxt.error != NULL)
4747 ctxt->vctxt.error(ctxt->vctxt.userData,
4748"Element content declaration doesn't start and stop in the same entity\n");
4749 ctxt->valid = 0;
4750 }
Owen Taylor3473f882001-02-23 17:55:21 +00004751 NEXT;
4752 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004753 if (ret != NULL)
4754 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004755 NEXT;
4756 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004757 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004758 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004759 cur = ret;
4760 /*
4761 * Some normalization:
4762 * (a | b* | c?)* == (a | b | c)*
4763 */
4764 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4765 if ((cur->c1 != NULL) &&
4766 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4767 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4768 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4769 if ((cur->c2 != NULL) &&
4770 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4771 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4772 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4773 cur = cur->c2;
4774 }
4775 }
Owen Taylor3473f882001-02-23 17:55:21 +00004776 NEXT;
4777 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004778 if (ret != NULL) {
4779 int found = 0;
4780
Daniel Veillarde470df72001-04-18 21:41:07 +00004781 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004782 /*
4783 * Some normalization:
4784 * (a | b*)+ == (a | b)*
4785 * (a | b?)+ == (a | b)*
4786 */
4787 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4788 if ((cur->c1 != NULL) &&
4789 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4790 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4791 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4792 found = 1;
4793 }
4794 if ((cur->c2 != NULL) &&
4795 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4796 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4797 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4798 found = 1;
4799 }
4800 cur = cur->c2;
4801 }
4802 if (found)
4803 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4804 }
Owen Taylor3473f882001-02-23 17:55:21 +00004805 NEXT;
4806 }
4807 return(ret);
4808}
4809
4810/**
4811 * xmlParseElementContentDecl:
4812 * @ctxt: an XML parser context
4813 * @name: the name of the element being defined.
4814 * @result: the Element Content pointer will be stored here if any
4815 *
4816 * parse the declaration for an Element content either Mixed or Children,
4817 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4818 *
4819 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4820 *
4821 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4822 */
4823
4824int
4825xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4826 xmlElementContentPtr *result) {
4827
4828 xmlElementContentPtr tree = NULL;
4829 xmlParserInputPtr input = ctxt->input;
4830 int res;
4831
4832 *result = NULL;
4833
4834 if (RAW != '(') {
4835 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4837 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004838 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004839 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004840 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004841 return(-1);
4842 }
4843 NEXT;
4844 GROW;
4845 SKIP_BLANKS;
4846 if ((RAW == '#') && (NXT(1) == 'P') &&
4847 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4848 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4849 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004850 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004851 res = XML_ELEMENT_TYPE_MIXED;
4852 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004853 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004854 res = XML_ELEMENT_TYPE_ELEMENT;
4855 }
Owen Taylor3473f882001-02-23 17:55:21 +00004856 SKIP_BLANKS;
4857 *result = tree;
4858 return(res);
4859}
4860
4861/**
4862 * xmlParseElementDecl:
4863 * @ctxt: an XML parser context
4864 *
4865 * parse an Element declaration.
4866 *
4867 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4868 *
4869 * [ VC: Unique Element Type Declaration ]
4870 * No element type may be declared more than once
4871 *
4872 * Returns the type of the element, or -1 in case of error
4873 */
4874int
4875xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4876 xmlChar *name;
4877 int ret = -1;
4878 xmlElementContentPtr content = NULL;
4879
4880 GROW;
4881 if ((RAW == '<') && (NXT(1) == '!') &&
4882 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4883 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4884 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4885 (NXT(8) == 'T')) {
4886 xmlParserInputPtr input = ctxt->input;
4887
4888 SKIP(9);
4889 if (!IS_BLANK(CUR)) {
4890 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4892 ctxt->sax->error(ctxt->userData,
4893 "Space required after 'ELEMENT'\n");
4894 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004895 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004896 }
4897 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004898 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004899 if (name == NULL) {
4900 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4902 ctxt->sax->error(ctxt->userData,
4903 "xmlParseElementDecl: no name for Element\n");
4904 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004905 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004906 return(-1);
4907 }
4908 while ((RAW == 0) && (ctxt->inputNr > 1))
4909 xmlPopInput(ctxt);
4910 if (!IS_BLANK(CUR)) {
4911 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4913 ctxt->sax->error(ctxt->userData,
4914 "Space required after the element name\n");
4915 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004916 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004917 }
4918 SKIP_BLANKS;
4919 if ((RAW == 'E') && (NXT(1) == 'M') &&
4920 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4921 (NXT(4) == 'Y')) {
4922 SKIP(5);
4923 /*
4924 * Element must always be empty.
4925 */
4926 ret = XML_ELEMENT_TYPE_EMPTY;
4927 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4928 (NXT(2) == 'Y')) {
4929 SKIP(3);
4930 /*
4931 * Element is a generic container.
4932 */
4933 ret = XML_ELEMENT_TYPE_ANY;
4934 } else if (RAW == '(') {
4935 ret = xmlParseElementContentDecl(ctxt, name, &content);
4936 } else {
4937 /*
4938 * [ WFC: PEs in Internal Subset ] error handling.
4939 */
4940 if ((RAW == '%') && (ctxt->external == 0) &&
4941 (ctxt->inputNr == 1)) {
4942 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4944 ctxt->sax->error(ctxt->userData,
4945 "PEReference: forbidden within markup decl in internal subset\n");
4946 } else {
4947 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4949 ctxt->sax->error(ctxt->userData,
4950 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4951 }
4952 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004953 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004954 if (name != NULL) xmlFree(name);
4955 return(-1);
4956 }
4957
4958 SKIP_BLANKS;
4959 /*
4960 * Pop-up of finished entities.
4961 */
4962 while ((RAW == 0) && (ctxt->inputNr > 1))
4963 xmlPopInput(ctxt);
4964 SKIP_BLANKS;
4965
4966 if (RAW != '>') {
4967 ctxt->errNo = XML_ERR_GT_REQUIRED;
4968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4969 ctxt->sax->error(ctxt->userData,
4970 "xmlParseElementDecl: expected '>' at the end\n");
4971 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004972 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004973 } else {
4974 if (input != ctxt->input) {
4975 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4977 ctxt->sax->error(ctxt->userData,
4978"Element declaration doesn't start and stop in the same entity\n");
4979 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004980 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004981 }
4982
4983 NEXT;
4984 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4985 (ctxt->sax->elementDecl != NULL))
4986 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4987 content);
4988 }
4989 if (content != NULL) {
4990 xmlFreeElementContent(content);
4991 }
4992 if (name != NULL) {
4993 xmlFree(name);
4994 }
4995 }
4996 return(ret);
4997}
4998
4999/**
Owen Taylor3473f882001-02-23 17:55:21 +00005000 * xmlParseConditionalSections
5001 * @ctxt: an XML parser context
5002 *
5003 * [61] conditionalSect ::= includeSect | ignoreSect
5004 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5005 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5006 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5007 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5008 */
5009
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005010static void
Owen Taylor3473f882001-02-23 17:55:21 +00005011xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5012 SKIP(3);
5013 SKIP_BLANKS;
5014 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5015 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5016 (NXT(6) == 'E')) {
5017 SKIP(7);
5018 SKIP_BLANKS;
5019 if (RAW != '[') {
5020 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5022 ctxt->sax->error(ctxt->userData,
5023 "XML conditional section '[' expected\n");
5024 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005026 } else {
5027 NEXT;
5028 }
5029 if (xmlParserDebugEntities) {
5030 if ((ctxt->input != NULL) && (ctxt->input->filename))
5031 xmlGenericError(xmlGenericErrorContext,
5032 "%s(%d): ", ctxt->input->filename,
5033 ctxt->input->line);
5034 xmlGenericError(xmlGenericErrorContext,
5035 "Entering INCLUDE Conditional Section\n");
5036 }
5037
5038 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5039 (NXT(2) != '>'))) {
5040 const xmlChar *check = CUR_PTR;
5041 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005042
5043 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5044 xmlParseConditionalSections(ctxt);
5045 } else if (IS_BLANK(CUR)) {
5046 NEXT;
5047 } else if (RAW == '%') {
5048 xmlParsePEReference(ctxt);
5049 } else
5050 xmlParseMarkupDecl(ctxt);
5051
5052 /*
5053 * Pop-up of finished entities.
5054 */
5055 while ((RAW == 0) && (ctxt->inputNr > 1))
5056 xmlPopInput(ctxt);
5057
Daniel Veillardfdc91562002-07-01 21:52:03 +00005058 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005059 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5061 ctxt->sax->error(ctxt->userData,
5062 "Content error in the external subset\n");
5063 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005064 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005065 break;
5066 }
5067 }
5068 if (xmlParserDebugEntities) {
5069 if ((ctxt->input != NULL) && (ctxt->input->filename))
5070 xmlGenericError(xmlGenericErrorContext,
5071 "%s(%d): ", ctxt->input->filename,
5072 ctxt->input->line);
5073 xmlGenericError(xmlGenericErrorContext,
5074 "Leaving INCLUDE Conditional Section\n");
5075 }
5076
5077 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5078 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5079 int state;
5080 int instate;
5081 int depth = 0;
5082
5083 SKIP(6);
5084 SKIP_BLANKS;
5085 if (RAW != '[') {
5086 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5088 ctxt->sax->error(ctxt->userData,
5089 "XML conditional section '[' expected\n");
5090 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005091 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005092 } else {
5093 NEXT;
5094 }
5095 if (xmlParserDebugEntities) {
5096 if ((ctxt->input != NULL) && (ctxt->input->filename))
5097 xmlGenericError(xmlGenericErrorContext,
5098 "%s(%d): ", ctxt->input->filename,
5099 ctxt->input->line);
5100 xmlGenericError(xmlGenericErrorContext,
5101 "Entering IGNORE Conditional Section\n");
5102 }
5103
5104 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005105 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005106 * But disable SAX event generating DTD building in the meantime
5107 */
5108 state = ctxt->disableSAX;
5109 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005110 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005111 ctxt->instate = XML_PARSER_IGNORE;
5112
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005113 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005114 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5115 depth++;
5116 SKIP(3);
5117 continue;
5118 }
5119 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5120 if (--depth >= 0) SKIP(3);
5121 continue;
5122 }
5123 NEXT;
5124 continue;
5125 }
5126
5127 ctxt->disableSAX = state;
5128 ctxt->instate = instate;
5129
5130 if (xmlParserDebugEntities) {
5131 if ((ctxt->input != NULL) && (ctxt->input->filename))
5132 xmlGenericError(xmlGenericErrorContext,
5133 "%s(%d): ", ctxt->input->filename,
5134 ctxt->input->line);
5135 xmlGenericError(xmlGenericErrorContext,
5136 "Leaving IGNORE Conditional Section\n");
5137 }
5138
5139 } else {
5140 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5142 ctxt->sax->error(ctxt->userData,
5143 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5144 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005145 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005146 }
5147
5148 if (RAW == 0)
5149 SHRINK;
5150
5151 if (RAW == 0) {
5152 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5154 ctxt->sax->error(ctxt->userData,
5155 "XML conditional section not closed\n");
5156 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005157 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005158 } else {
5159 SKIP(3);
5160 }
5161}
5162
5163/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005164 * xmlParseMarkupDecl:
5165 * @ctxt: an XML parser context
5166 *
5167 * parse Markup declarations
5168 *
5169 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5170 * NotationDecl | PI | Comment
5171 *
5172 * [ VC: Proper Declaration/PE Nesting ]
5173 * Parameter-entity replacement text must be properly nested with
5174 * markup declarations. That is to say, if either the first character
5175 * or the last character of a markup declaration (markupdecl above) is
5176 * contained in the replacement text for a parameter-entity reference,
5177 * both must be contained in the same replacement text.
5178 *
5179 * [ WFC: PEs in Internal Subset ]
5180 * In the internal DTD subset, parameter-entity references can occur
5181 * only where markup declarations can occur, not within markup declarations.
5182 * (This does not apply to references that occur in external parameter
5183 * entities or to the external subset.)
5184 */
5185void
5186xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5187 GROW;
5188 xmlParseElementDecl(ctxt);
5189 xmlParseAttributeListDecl(ctxt);
5190 xmlParseEntityDecl(ctxt);
5191 xmlParseNotationDecl(ctxt);
5192 xmlParsePI(ctxt);
5193 xmlParseComment(ctxt);
5194 /*
5195 * This is only for internal subset. On external entities,
5196 * the replacement is done before parsing stage
5197 */
5198 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5199 xmlParsePEReference(ctxt);
5200
5201 /*
5202 * Conditional sections are allowed from entities included
5203 * by PE References in the internal subset.
5204 */
5205 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5206 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5207 xmlParseConditionalSections(ctxt);
5208 }
5209 }
5210
5211 ctxt->instate = XML_PARSER_DTD;
5212}
5213
5214/**
5215 * xmlParseTextDecl:
5216 * @ctxt: an XML parser context
5217 *
5218 * parse an XML declaration header for external entities
5219 *
5220 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5221 *
5222 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5223 */
5224
5225void
5226xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5227 xmlChar *version;
5228
5229 /*
5230 * We know that '<?xml' is here.
5231 */
5232 if ((RAW == '<') && (NXT(1) == '?') &&
5233 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5234 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5235 SKIP(5);
5236 } else {
5237 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5239 ctxt->sax->error(ctxt->userData,
5240 "Text declaration '<?xml' required\n");
5241 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005242 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005243
5244 return;
5245 }
5246
5247 if (!IS_BLANK(CUR)) {
5248 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5250 ctxt->sax->error(ctxt->userData,
5251 "Space needed after '<?xml'\n");
5252 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005253 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005254 }
5255 SKIP_BLANKS;
5256
5257 /*
5258 * We may have the VersionInfo here.
5259 */
5260 version = xmlParseVersionInfo(ctxt);
5261 if (version == NULL)
5262 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005263 else {
5264 if (!IS_BLANK(CUR)) {
5265 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5267 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5268 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005269 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005270 }
5271 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005272 ctxt->input->version = version;
5273
5274 /*
5275 * We must have the encoding declaration
5276 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005277 xmlParseEncodingDecl(ctxt);
5278 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5279 /*
5280 * The XML REC instructs us to stop parsing right here
5281 */
5282 return;
5283 }
5284
5285 SKIP_BLANKS;
5286 if ((RAW == '?') && (NXT(1) == '>')) {
5287 SKIP(2);
5288 } else if (RAW == '>') {
5289 /* Deprecated old WD ... */
5290 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5292 ctxt->sax->error(ctxt->userData,
5293 "XML declaration must end-up with '?>'\n");
5294 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005295 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005296 NEXT;
5297 } else {
5298 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5300 ctxt->sax->error(ctxt->userData,
5301 "parsing XML declaration: '?>' expected\n");
5302 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005303 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005304 MOVETO_ENDTAG(CUR_PTR);
5305 NEXT;
5306 }
5307}
5308
5309/**
Owen Taylor3473f882001-02-23 17:55:21 +00005310 * xmlParseExternalSubset:
5311 * @ctxt: an XML parser context
5312 * @ExternalID: the external identifier
5313 * @SystemID: the system identifier (or URL)
5314 *
5315 * parse Markup declarations from an external subset
5316 *
5317 * [30] extSubset ::= textDecl? extSubsetDecl
5318 *
5319 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5320 */
5321void
5322xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5323 const xmlChar *SystemID) {
5324 GROW;
5325 if ((RAW == '<') && (NXT(1) == '?') &&
5326 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5327 (NXT(4) == 'l')) {
5328 xmlParseTextDecl(ctxt);
5329 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5330 /*
5331 * The XML REC instructs us to stop parsing right here
5332 */
5333 ctxt->instate = XML_PARSER_EOF;
5334 return;
5335 }
5336 }
5337 if (ctxt->myDoc == NULL) {
5338 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5339 }
5340 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5341 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5342
5343 ctxt->instate = XML_PARSER_DTD;
5344 ctxt->external = 1;
5345 while (((RAW == '<') && (NXT(1) == '?')) ||
5346 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005347 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005348 const xmlChar *check = CUR_PTR;
5349 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005350
5351 GROW;
5352 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5353 xmlParseConditionalSections(ctxt);
5354 } else if (IS_BLANK(CUR)) {
5355 NEXT;
5356 } else if (RAW == '%') {
5357 xmlParsePEReference(ctxt);
5358 } else
5359 xmlParseMarkupDecl(ctxt);
5360
5361 /*
5362 * Pop-up of finished entities.
5363 */
5364 while ((RAW == 0) && (ctxt->inputNr > 1))
5365 xmlPopInput(ctxt);
5366
Daniel Veillardfdc91562002-07-01 21:52:03 +00005367 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005368 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5370 ctxt->sax->error(ctxt->userData,
5371 "Content error in the external subset\n");
5372 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005373 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005374 break;
5375 }
5376 }
5377
5378 if (RAW != 0) {
5379 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5381 ctxt->sax->error(ctxt->userData,
5382 "Extra content at the end of the document\n");
5383 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005384 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005385 }
5386
5387}
5388
5389/**
5390 * xmlParseReference:
5391 * @ctxt: an XML parser context
5392 *
5393 * parse and handle entity references in content, depending on the SAX
5394 * interface, this may end-up in a call to character() if this is a
5395 * CharRef, a predefined entity, if there is no reference() callback.
5396 * or if the parser was asked to switch to that mode.
5397 *
5398 * [67] Reference ::= EntityRef | CharRef
5399 */
5400void
5401xmlParseReference(xmlParserCtxtPtr ctxt) {
5402 xmlEntityPtr ent;
5403 xmlChar *val;
5404 if (RAW != '&') return;
5405
5406 if (NXT(1) == '#') {
5407 int i = 0;
5408 xmlChar out[10];
5409 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005410 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005411
5412 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5413 /*
5414 * So we are using non-UTF-8 buffers
5415 * Check that the char fit on 8bits, if not
5416 * generate a CharRef.
5417 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005418 if (value <= 0xFF) {
5419 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005420 out[1] = 0;
5421 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5422 (!ctxt->disableSAX))
5423 ctxt->sax->characters(ctxt->userData, out, 1);
5424 } else {
5425 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005426 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005427 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005428 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005429 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5430 (!ctxt->disableSAX))
5431 ctxt->sax->reference(ctxt->userData, out);
5432 }
5433 } else {
5434 /*
5435 * Just encode the value in UTF-8
5436 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005437 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005438 out[i] = 0;
5439 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5440 (!ctxt->disableSAX))
5441 ctxt->sax->characters(ctxt->userData, out, i);
5442 }
5443 } else {
5444 ent = xmlParseEntityRef(ctxt);
5445 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005446 if (!ctxt->wellFormed)
5447 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005448 if ((ent->name != NULL) &&
5449 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5450 xmlNodePtr list = NULL;
5451 int ret;
5452
5453
5454 /*
5455 * The first reference to the entity trigger a parsing phase
5456 * where the ent->children is filled with the result from
5457 * the parsing.
5458 */
5459 if (ent->children == NULL) {
5460 xmlChar *value;
5461 value = ent->content;
5462
5463 /*
5464 * Check that this entity is well formed
5465 */
5466 if ((value != NULL) &&
5467 (value[1] == 0) && (value[0] == '<') &&
5468 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5469 /*
5470 * DONE: get definite answer on this !!!
5471 * Lots of entity decls are used to declare a single
5472 * char
5473 * <!ENTITY lt "<">
5474 * Which seems to be valid since
5475 * 2.4: The ampersand character (&) and the left angle
5476 * bracket (<) may appear in their literal form only
5477 * when used ... They are also legal within the literal
5478 * entity value of an internal entity declaration;i
5479 * see "4.3.2 Well-Formed Parsed Entities".
5480 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5481 * Looking at the OASIS test suite and James Clark
5482 * tests, this is broken. However the XML REC uses
5483 * it. Is the XML REC not well-formed ????
5484 * This is a hack to avoid this problem
5485 *
5486 * ANSWER: since lt gt amp .. are already defined,
5487 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005488 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005489 * is lousy but acceptable.
5490 */
5491 list = xmlNewDocText(ctxt->myDoc, value);
5492 if (list != NULL) {
5493 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5494 (ent->children == NULL)) {
5495 ent->children = list;
5496 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005497 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005498 list->parent = (xmlNodePtr) ent;
5499 } else {
5500 xmlFreeNodeList(list);
5501 }
5502 } else if (list != NULL) {
5503 xmlFreeNodeList(list);
5504 }
5505 } else {
5506 /*
5507 * 4.3.2: An internal general parsed entity is well-formed
5508 * if its replacement text matches the production labeled
5509 * content.
5510 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005511
5512 void *user_data;
5513 /*
5514 * This is a bit hackish but this seems the best
5515 * way to make sure both SAX and DOM entity support
5516 * behaves okay.
5517 */
5518 if (ctxt->userData == ctxt)
5519 user_data = NULL;
5520 else
5521 user_data = ctxt->userData;
5522
Owen Taylor3473f882001-02-23 17:55:21 +00005523 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5524 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005525 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5526 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005527 ctxt->depth--;
5528 } else if (ent->etype ==
5529 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5530 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005531 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005532 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005533 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005534 ctxt->depth--;
5535 } else {
5536 ret = -1;
5537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5538 ctxt->sax->error(ctxt->userData,
5539 "Internal: invalid entity type\n");
5540 }
5541 if (ret == XML_ERR_ENTITY_LOOP) {
5542 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5544 ctxt->sax->error(ctxt->userData,
5545 "Detected entity reference loop\n");
5546 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005547 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005548 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005549 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005550 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5551 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005552 (ent->children == NULL)) {
5553 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005554 if (ctxt->replaceEntities) {
5555 /*
5556 * Prune it directly in the generated document
5557 * except for single text nodes.
5558 */
5559 if ((list->type == XML_TEXT_NODE) &&
5560 (list->next == NULL)) {
5561 list->parent = (xmlNodePtr) ent;
5562 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005563 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005564 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005565 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005566 while (list != NULL) {
5567 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005568 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005569 if (list->next == NULL)
5570 ent->last = list;
5571 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005572 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005573 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005574 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5575 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005576 }
5577 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005578 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005579 while (list != NULL) {
5580 list->parent = (xmlNodePtr) ent;
5581 if (list->next == NULL)
5582 ent->last = list;
5583 list = list->next;
5584 }
Owen Taylor3473f882001-02-23 17:55:21 +00005585 }
5586 } else {
5587 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005588 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005589 }
5590 } else if (ret > 0) {
5591 ctxt->errNo = ret;
5592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5593 ctxt->sax->error(ctxt->userData,
5594 "Entity value required\n");
5595 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005596 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005597 } else if (list != NULL) {
5598 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005599 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005600 }
5601 }
5602 }
5603 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5604 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5605 /*
5606 * Create a node.
5607 */
5608 ctxt->sax->reference(ctxt->userData, ent->name);
5609 return;
5610 } else if (ctxt->replaceEntities) {
5611 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5612 /*
5613 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005614 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005615 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005616 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005617 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005618 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005619 cur = ent->children;
5620 while (cur != NULL) {
5621 new = xmlCopyNode(cur, 1);
Daniel Veillard8f872442003-01-09 23:19:02 +00005622 if (new != NULL) {
5623 new->_private = cur->_private;
5624 if (firstChild == NULL){
5625 firstChild = new;
5626 }
5627 xmlAddChild(ctxt->node, new);
Daniel Veillard8107a222002-01-13 14:10:10 +00005628 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005629 if (cur == ent->last)
5630 break;
5631 cur = cur->next;
5632 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005633 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5634 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005635 } else {
5636 /*
5637 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005638 * node with a possible previous text one which
5639 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005640 */
5641 if (ent->children->type == XML_TEXT_NODE)
5642 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5643 if ((ent->last != ent->children) &&
5644 (ent->last->type == XML_TEXT_NODE))
5645 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5646 xmlAddChildList(ctxt->node, ent->children);
5647 }
5648
Owen Taylor3473f882001-02-23 17:55:21 +00005649 /*
5650 * This is to avoid a nasty side effect, see
5651 * characters() in SAX.c
5652 */
5653 ctxt->nodemem = 0;
5654 ctxt->nodelen = 0;
5655 return;
5656 } else {
5657 /*
5658 * Probably running in SAX mode
5659 */
5660 xmlParserInputPtr input;
5661
5662 input = xmlNewEntityInputStream(ctxt, ent);
5663 xmlPushInput(ctxt, input);
5664 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5665 (RAW == '<') && (NXT(1) == '?') &&
5666 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5667 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5668 xmlParseTextDecl(ctxt);
5669 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5670 /*
5671 * The XML REC instructs us to stop parsing right here
5672 */
5673 ctxt->instate = XML_PARSER_EOF;
5674 return;
5675 }
5676 if (input->standalone == 1) {
5677 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5679 ctxt->sax->error(ctxt->userData,
5680 "external parsed entities cannot be standalone\n");
5681 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005682 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005683 }
5684 }
5685 return;
5686 }
5687 }
5688 } else {
5689 val = ent->content;
5690 if (val == NULL) return;
5691 /*
5692 * inline the entity.
5693 */
5694 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5695 (!ctxt->disableSAX))
5696 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5697 }
5698 }
5699}
5700
5701/**
5702 * xmlParseEntityRef:
5703 * @ctxt: an XML parser context
5704 *
5705 * parse ENTITY references declarations
5706 *
5707 * [68] EntityRef ::= '&' Name ';'
5708 *
5709 * [ WFC: Entity Declared ]
5710 * In a document without any DTD, a document with only an internal DTD
5711 * subset which contains no parameter entity references, or a document
5712 * with "standalone='yes'", the Name given in the entity reference
5713 * must match that in an entity declaration, except that well-formed
5714 * documents need not declare any of the following entities: amp, lt,
5715 * gt, apos, quot. The declaration of a parameter entity must precede
5716 * any reference to it. Similarly, the declaration of a general entity
5717 * must precede any reference to it which appears in a default value in an
5718 * attribute-list declaration. Note that if entities are declared in the
5719 * external subset or in external parameter entities, a non-validating
5720 * processor is not obligated to read and process their declarations;
5721 * for such documents, the rule that an entity must be declared is a
5722 * well-formedness constraint only if standalone='yes'.
5723 *
5724 * [ WFC: Parsed Entity ]
5725 * An entity reference must not contain the name of an unparsed entity
5726 *
5727 * Returns the xmlEntityPtr if found, or NULL otherwise.
5728 */
5729xmlEntityPtr
5730xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5731 xmlChar *name;
5732 xmlEntityPtr ent = NULL;
5733
5734 GROW;
5735
5736 if (RAW == '&') {
5737 NEXT;
5738 name = xmlParseName(ctxt);
5739 if (name == NULL) {
5740 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5742 ctxt->sax->error(ctxt->userData,
5743 "xmlParseEntityRef: no name\n");
5744 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005745 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005746 } else {
5747 if (RAW == ';') {
5748 NEXT;
5749 /*
5750 * Ask first SAX for entity resolution, otherwise try the
5751 * predefined set.
5752 */
5753 if (ctxt->sax != NULL) {
5754 if (ctxt->sax->getEntity != NULL)
5755 ent = ctxt->sax->getEntity(ctxt->userData, name);
5756 if (ent == NULL)
5757 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005758 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5759 ent = getEntity(ctxt, name);
5760 }
Owen Taylor3473f882001-02-23 17:55:21 +00005761 }
5762 /*
5763 * [ WFC: Entity Declared ]
5764 * In a document without any DTD, a document with only an
5765 * internal DTD subset which contains no parameter entity
5766 * references, or a document with "standalone='yes'", the
5767 * Name given in the entity reference must match that in an
5768 * entity declaration, except that well-formed documents
5769 * need not declare any of the following entities: amp, lt,
5770 * gt, apos, quot.
5771 * The declaration of a parameter entity must precede any
5772 * reference to it.
5773 * Similarly, the declaration of a general entity must
5774 * precede any reference to it which appears in a default
5775 * value in an attribute-list declaration. Note that if
5776 * entities are declared in the external subset or in
5777 * external parameter entities, a non-validating processor
5778 * is not obligated to read and process their declarations;
5779 * for such documents, the rule that an entity must be
5780 * declared is a well-formedness constraint only if
5781 * standalone='yes'.
5782 */
5783 if (ent == NULL) {
5784 if ((ctxt->standalone == 1) ||
5785 ((ctxt->hasExternalSubset == 0) &&
5786 (ctxt->hasPErefs == 0))) {
5787 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5789 ctxt->sax->error(ctxt->userData,
5790 "Entity '%s' not defined\n", name);
5791 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005792 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005793 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005794 } else {
5795 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005797 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005798 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005799 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005800 }
5801 }
5802
5803 /*
5804 * [ WFC: Parsed Entity ]
5805 * An entity reference must not contain the name of an
5806 * unparsed entity
5807 */
5808 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5809 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5811 ctxt->sax->error(ctxt->userData,
5812 "Entity reference to unparsed entity %s\n", name);
5813 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005814 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005815 }
5816
5817 /*
5818 * [ WFC: No External Entity References ]
5819 * Attribute values cannot contain direct or indirect
5820 * entity references to external entities.
5821 */
5822 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5823 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5824 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5826 ctxt->sax->error(ctxt->userData,
5827 "Attribute references external entity '%s'\n", name);
5828 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005829 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005830 }
5831 /*
5832 * [ WFC: No < in Attribute Values ]
5833 * The replacement text of any entity referred to directly or
5834 * indirectly in an attribute value (other than "&lt;") must
5835 * not contain a <.
5836 */
5837 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5838 (ent != NULL) &&
5839 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5840 (ent->content != NULL) &&
5841 (xmlStrchr(ent->content, '<'))) {
5842 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5844 ctxt->sax->error(ctxt->userData,
5845 "'<' in entity '%s' is not allowed in attributes values\n", name);
5846 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005847 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005848 }
5849
5850 /*
5851 * Internal check, no parameter entities here ...
5852 */
5853 else {
5854 switch (ent->etype) {
5855 case XML_INTERNAL_PARAMETER_ENTITY:
5856 case XML_EXTERNAL_PARAMETER_ENTITY:
5857 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5859 ctxt->sax->error(ctxt->userData,
5860 "Attempt to reference the parameter entity '%s'\n", name);
5861 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005862 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005863 break;
5864 default:
5865 break;
5866 }
5867 }
5868
5869 /*
5870 * [ WFC: No Recursion ]
5871 * A parsed entity must not contain a recursive reference
5872 * to itself, either directly or indirectly.
5873 * Done somewhere else
5874 */
5875
5876 } else {
5877 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5879 ctxt->sax->error(ctxt->userData,
5880 "xmlParseEntityRef: expecting ';'\n");
5881 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005882 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005883 }
5884 xmlFree(name);
5885 }
5886 }
5887 return(ent);
5888}
5889
5890/**
5891 * xmlParseStringEntityRef:
5892 * @ctxt: an XML parser context
5893 * @str: a pointer to an index in the string
5894 *
5895 * parse ENTITY references declarations, but this version parses it from
5896 * a string value.
5897 *
5898 * [68] EntityRef ::= '&' Name ';'
5899 *
5900 * [ WFC: Entity Declared ]
5901 * In a document without any DTD, a document with only an internal DTD
5902 * subset which contains no parameter entity references, or a document
5903 * with "standalone='yes'", the Name given in the entity reference
5904 * must match that in an entity declaration, except that well-formed
5905 * documents need not declare any of the following entities: amp, lt,
5906 * gt, apos, quot. The declaration of a parameter entity must precede
5907 * any reference to it. Similarly, the declaration of a general entity
5908 * must precede any reference to it which appears in a default value in an
5909 * attribute-list declaration. Note that if entities are declared in the
5910 * external subset or in external parameter entities, a non-validating
5911 * processor is not obligated to read and process their declarations;
5912 * for such documents, the rule that an entity must be declared is a
5913 * well-formedness constraint only if standalone='yes'.
5914 *
5915 * [ WFC: Parsed Entity ]
5916 * An entity reference must not contain the name of an unparsed entity
5917 *
5918 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5919 * is updated to the current location in the string.
5920 */
5921xmlEntityPtr
5922xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5923 xmlChar *name;
5924 const xmlChar *ptr;
5925 xmlChar cur;
5926 xmlEntityPtr ent = NULL;
5927
5928 if ((str == NULL) || (*str == NULL))
5929 return(NULL);
5930 ptr = *str;
5931 cur = *ptr;
5932 if (cur == '&') {
5933 ptr++;
5934 cur = *ptr;
5935 name = xmlParseStringName(ctxt, &ptr);
5936 if (name == NULL) {
5937 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5939 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005940 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005941 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005942 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005943 } else {
5944 if (*ptr == ';') {
5945 ptr++;
5946 /*
5947 * Ask first SAX for entity resolution, otherwise try the
5948 * predefined set.
5949 */
5950 if (ctxt->sax != NULL) {
5951 if (ctxt->sax->getEntity != NULL)
5952 ent = ctxt->sax->getEntity(ctxt->userData, name);
5953 if (ent == NULL)
5954 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005955 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5956 ent = getEntity(ctxt, name);
5957 }
Owen Taylor3473f882001-02-23 17:55:21 +00005958 }
5959 /*
5960 * [ WFC: Entity Declared ]
5961 * In a document without any DTD, a document with only an
5962 * internal DTD subset which contains no parameter entity
5963 * references, or a document with "standalone='yes'", the
5964 * Name given in the entity reference must match that in an
5965 * entity declaration, except that well-formed documents
5966 * need not declare any of the following entities: amp, lt,
5967 * gt, apos, quot.
5968 * The declaration of a parameter entity must precede any
5969 * reference to it.
5970 * Similarly, the declaration of a general entity must
5971 * precede any reference to it which appears in a default
5972 * value in an attribute-list declaration. Note that if
5973 * entities are declared in the external subset or in
5974 * external parameter entities, a non-validating processor
5975 * is not obligated to read and process their declarations;
5976 * for such documents, the rule that an entity must be
5977 * declared is a well-formedness constraint only if
5978 * standalone='yes'.
5979 */
5980 if (ent == NULL) {
5981 if ((ctxt->standalone == 1) ||
5982 ((ctxt->hasExternalSubset == 0) &&
5983 (ctxt->hasPErefs == 0))) {
5984 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5986 ctxt->sax->error(ctxt->userData,
5987 "Entity '%s' not defined\n", name);
5988 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005989 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005990 } else {
5991 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5992 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5993 ctxt->sax->warning(ctxt->userData,
5994 "Entity '%s' not defined\n", name);
5995 }
5996 }
5997
5998 /*
5999 * [ WFC: Parsed Entity ]
6000 * An entity reference must not contain the name of an
6001 * unparsed entity
6002 */
6003 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6004 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6006 ctxt->sax->error(ctxt->userData,
6007 "Entity reference to unparsed entity %s\n", name);
6008 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006009 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006010 }
6011
6012 /*
6013 * [ WFC: No External Entity References ]
6014 * Attribute values cannot contain direct or indirect
6015 * entity references to external entities.
6016 */
6017 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6018 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6019 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6021 ctxt->sax->error(ctxt->userData,
6022 "Attribute references external entity '%s'\n", name);
6023 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006024 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006025 }
6026 /*
6027 * [ WFC: No < in Attribute Values ]
6028 * The replacement text of any entity referred to directly or
6029 * indirectly in an attribute value (other than "&lt;") must
6030 * not contain a <.
6031 */
6032 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6033 (ent != NULL) &&
6034 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6035 (ent->content != NULL) &&
6036 (xmlStrchr(ent->content, '<'))) {
6037 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6039 ctxt->sax->error(ctxt->userData,
6040 "'<' in entity '%s' is not allowed in attributes values\n", name);
6041 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006042 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006043 }
6044
6045 /*
6046 * Internal check, no parameter entities here ...
6047 */
6048 else {
6049 switch (ent->etype) {
6050 case XML_INTERNAL_PARAMETER_ENTITY:
6051 case XML_EXTERNAL_PARAMETER_ENTITY:
6052 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6054 ctxt->sax->error(ctxt->userData,
6055 "Attempt to reference the parameter entity '%s'\n", name);
6056 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006058 break;
6059 default:
6060 break;
6061 }
6062 }
6063
6064 /*
6065 * [ WFC: No Recursion ]
6066 * A parsed entity must not contain a recursive reference
6067 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006068 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006069 */
6070
6071 } else {
6072 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6074 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006075 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006076 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006077 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006078 }
6079 xmlFree(name);
6080 }
6081 }
6082 *str = ptr;
6083 return(ent);
6084}
6085
6086/**
6087 * xmlParsePEReference:
6088 * @ctxt: an XML parser context
6089 *
6090 * parse PEReference declarations
6091 * The entity content is handled directly by pushing it's content as
6092 * a new input stream.
6093 *
6094 * [69] PEReference ::= '%' Name ';'
6095 *
6096 * [ WFC: No Recursion ]
6097 * A parsed entity must not contain a recursive
6098 * reference to itself, either directly or indirectly.
6099 *
6100 * [ WFC: Entity Declared ]
6101 * In a document without any DTD, a document with only an internal DTD
6102 * subset which contains no parameter entity references, or a document
6103 * with "standalone='yes'", ... ... The declaration of a parameter
6104 * entity must precede any reference to it...
6105 *
6106 * [ VC: Entity Declared ]
6107 * In a document with an external subset or external parameter entities
6108 * with "standalone='no'", ... ... The declaration of a parameter entity
6109 * must precede any reference to it...
6110 *
6111 * [ WFC: In DTD ]
6112 * Parameter-entity references may only appear in the DTD.
6113 * NOTE: misleading but this is handled.
6114 */
6115void
6116xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6117 xmlChar *name;
6118 xmlEntityPtr entity = NULL;
6119 xmlParserInputPtr input;
6120
6121 if (RAW == '%') {
6122 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006123 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006124 if (name == NULL) {
6125 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6127 ctxt->sax->error(ctxt->userData,
6128 "xmlParsePEReference: no name\n");
6129 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006130 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006131 } else {
6132 if (RAW == ';') {
6133 NEXT;
6134 if ((ctxt->sax != NULL) &&
6135 (ctxt->sax->getParameterEntity != NULL))
6136 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6137 name);
6138 if (entity == NULL) {
6139 /*
6140 * [ WFC: Entity Declared ]
6141 * In a document without any DTD, a document with only an
6142 * internal DTD subset which contains no parameter entity
6143 * references, or a document with "standalone='yes'", ...
6144 * ... The declaration of a parameter entity must precede
6145 * any reference to it...
6146 */
6147 if ((ctxt->standalone == 1) ||
6148 ((ctxt->hasExternalSubset == 0) &&
6149 (ctxt->hasPErefs == 0))) {
6150 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6151 if ((!ctxt->disableSAX) &&
6152 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6153 ctxt->sax->error(ctxt->userData,
6154 "PEReference: %%%s; not found\n", name);
6155 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006156 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006157 } else {
6158 /*
6159 * [ VC: Entity Declared ]
6160 * In a document with an external subset or external
6161 * parameter entities with "standalone='no'", ...
6162 * ... The declaration of a parameter entity must precede
6163 * any reference to it...
6164 */
6165 if ((!ctxt->disableSAX) &&
6166 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6167 ctxt->sax->warning(ctxt->userData,
6168 "PEReference: %%%s; not found\n", name);
6169 ctxt->valid = 0;
6170 }
6171 } else {
6172 /*
6173 * Internal checking in case the entity quest barfed
6174 */
6175 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6176 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6177 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6178 ctxt->sax->warning(ctxt->userData,
6179 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006180 } else if (ctxt->input->free != deallocblankswrapper) {
6181 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6182 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006183 } else {
6184 /*
6185 * TODO !!!
6186 * handle the extra spaces added before and after
6187 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6188 */
6189 input = xmlNewEntityInputStream(ctxt, entity);
6190 xmlPushInput(ctxt, input);
6191 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6192 (RAW == '<') && (NXT(1) == '?') &&
6193 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6194 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6195 xmlParseTextDecl(ctxt);
6196 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6197 /*
6198 * The XML REC instructs us to stop parsing
6199 * right here
6200 */
6201 ctxt->instate = XML_PARSER_EOF;
6202 xmlFree(name);
6203 return;
6204 }
6205 }
Owen Taylor3473f882001-02-23 17:55:21 +00006206 }
6207 }
6208 ctxt->hasPErefs = 1;
6209 } else {
6210 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6212 ctxt->sax->error(ctxt->userData,
6213 "xmlParsePEReference: expecting ';'\n");
6214 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006215 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006216 }
6217 xmlFree(name);
6218 }
6219 }
6220}
6221
6222/**
6223 * xmlParseStringPEReference:
6224 * @ctxt: an XML parser context
6225 * @str: a pointer to an index in the string
6226 *
6227 * parse PEReference declarations
6228 *
6229 * [69] PEReference ::= '%' Name ';'
6230 *
6231 * [ WFC: No Recursion ]
6232 * A parsed entity must not contain a recursive
6233 * reference to itself, either directly or indirectly.
6234 *
6235 * [ WFC: Entity Declared ]
6236 * In a document without any DTD, a document with only an internal DTD
6237 * subset which contains no parameter entity references, or a document
6238 * with "standalone='yes'", ... ... The declaration of a parameter
6239 * entity must precede any reference to it...
6240 *
6241 * [ VC: Entity Declared ]
6242 * In a document with an external subset or external parameter entities
6243 * with "standalone='no'", ... ... The declaration of a parameter entity
6244 * must precede any reference to it...
6245 *
6246 * [ WFC: In DTD ]
6247 * Parameter-entity references may only appear in the DTD.
6248 * NOTE: misleading but this is handled.
6249 *
6250 * Returns the string of the entity content.
6251 * str is updated to the current value of the index
6252 */
6253xmlEntityPtr
6254xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6255 const xmlChar *ptr;
6256 xmlChar cur;
6257 xmlChar *name;
6258 xmlEntityPtr entity = NULL;
6259
6260 if ((str == NULL) || (*str == NULL)) return(NULL);
6261 ptr = *str;
6262 cur = *ptr;
6263 if (cur == '%') {
6264 ptr++;
6265 cur = *ptr;
6266 name = xmlParseStringName(ctxt, &ptr);
6267 if (name == NULL) {
6268 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6270 ctxt->sax->error(ctxt->userData,
6271 "xmlParseStringPEReference: no name\n");
6272 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006273 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006274 } else {
6275 cur = *ptr;
6276 if (cur == ';') {
6277 ptr++;
6278 cur = *ptr;
6279 if ((ctxt->sax != NULL) &&
6280 (ctxt->sax->getParameterEntity != NULL))
6281 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6282 name);
6283 if (entity == NULL) {
6284 /*
6285 * [ WFC: Entity Declared ]
6286 * In a document without any DTD, a document with only an
6287 * internal DTD subset which contains no parameter entity
6288 * references, or a document with "standalone='yes'", ...
6289 * ... The declaration of a parameter entity must precede
6290 * any reference to it...
6291 */
6292 if ((ctxt->standalone == 1) ||
6293 ((ctxt->hasExternalSubset == 0) &&
6294 (ctxt->hasPErefs == 0))) {
6295 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6296 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6297 ctxt->sax->error(ctxt->userData,
6298 "PEReference: %%%s; not found\n", name);
6299 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006300 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006301 } else {
6302 /*
6303 * [ VC: Entity Declared ]
6304 * In a document with an external subset or external
6305 * parameter entities with "standalone='no'", ...
6306 * ... The declaration of a parameter entity must
6307 * precede any reference to it...
6308 */
6309 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6310 ctxt->sax->warning(ctxt->userData,
6311 "PEReference: %%%s; not found\n", name);
6312 ctxt->valid = 0;
6313 }
6314 } else {
6315 /*
6316 * Internal checking in case the entity quest barfed
6317 */
6318 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6319 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6320 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6321 ctxt->sax->warning(ctxt->userData,
6322 "Internal: %%%s; is not a parameter entity\n", name);
6323 }
6324 }
6325 ctxt->hasPErefs = 1;
6326 } else {
6327 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6329 ctxt->sax->error(ctxt->userData,
6330 "xmlParseStringPEReference: expecting ';'\n");
6331 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006332 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006333 }
6334 xmlFree(name);
6335 }
6336 }
6337 *str = ptr;
6338 return(entity);
6339}
6340
6341/**
6342 * xmlParseDocTypeDecl:
6343 * @ctxt: an XML parser context
6344 *
6345 * parse a DOCTYPE declaration
6346 *
6347 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6348 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6349 *
6350 * [ VC: Root Element Type ]
6351 * The Name in the document type declaration must match the element
6352 * type of the root element.
6353 */
6354
6355void
6356xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6357 xmlChar *name = NULL;
6358 xmlChar *ExternalID = NULL;
6359 xmlChar *URI = NULL;
6360
6361 /*
6362 * We know that '<!DOCTYPE' has been detected.
6363 */
6364 SKIP(9);
6365
6366 SKIP_BLANKS;
6367
6368 /*
6369 * Parse the DOCTYPE name.
6370 */
6371 name = xmlParseName(ctxt);
6372 if (name == NULL) {
6373 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6375 ctxt->sax->error(ctxt->userData,
6376 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6377 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006378 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006379 }
6380 ctxt->intSubName = name;
6381
6382 SKIP_BLANKS;
6383
6384 /*
6385 * Check for SystemID and ExternalID
6386 */
6387 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6388
6389 if ((URI != NULL) || (ExternalID != NULL)) {
6390 ctxt->hasExternalSubset = 1;
6391 }
6392 ctxt->extSubURI = URI;
6393 ctxt->extSubSystem = ExternalID;
6394
6395 SKIP_BLANKS;
6396
6397 /*
6398 * Create and update the internal subset.
6399 */
6400 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6401 (!ctxt->disableSAX))
6402 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6403
6404 /*
6405 * Is there any internal subset declarations ?
6406 * they are handled separately in xmlParseInternalSubset()
6407 */
6408 if (RAW == '[')
6409 return;
6410
6411 /*
6412 * We should be at the end of the DOCTYPE declaration.
6413 */
6414 if (RAW != '>') {
6415 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006417 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006418 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006419 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006420 }
6421 NEXT;
6422}
6423
6424/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006425 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006426 * @ctxt: an XML parser context
6427 *
6428 * parse the internal subset declaration
6429 *
6430 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6431 */
6432
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006433static void
Owen Taylor3473f882001-02-23 17:55:21 +00006434xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6435 /*
6436 * Is there any DTD definition ?
6437 */
6438 if (RAW == '[') {
6439 ctxt->instate = XML_PARSER_DTD;
6440 NEXT;
6441 /*
6442 * Parse the succession of Markup declarations and
6443 * PEReferences.
6444 * Subsequence (markupdecl | PEReference | S)*
6445 */
6446 while (RAW != ']') {
6447 const xmlChar *check = CUR_PTR;
6448 int cons = ctxt->input->consumed;
6449
6450 SKIP_BLANKS;
6451 xmlParseMarkupDecl(ctxt);
6452 xmlParsePEReference(ctxt);
6453
6454 /*
6455 * Pop-up of finished entities.
6456 */
6457 while ((RAW == 0) && (ctxt->inputNr > 1))
6458 xmlPopInput(ctxt);
6459
6460 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6461 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6463 ctxt->sax->error(ctxt->userData,
6464 "xmlParseInternalSubset: error detected in Markup declaration\n");
6465 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006466 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006467 break;
6468 }
6469 }
6470 if (RAW == ']') {
6471 NEXT;
6472 SKIP_BLANKS;
6473 }
6474 }
6475
6476 /*
6477 * We should be at the end of the DOCTYPE declaration.
6478 */
6479 if (RAW != '>') {
6480 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006482 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006483 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006484 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006485 }
6486 NEXT;
6487}
6488
6489/**
6490 * xmlParseAttribute:
6491 * @ctxt: an XML parser context
6492 * @value: a xmlChar ** used to store the value of the attribute
6493 *
6494 * parse an attribute
6495 *
6496 * [41] Attribute ::= Name Eq AttValue
6497 *
6498 * [ WFC: No External Entity References ]
6499 * Attribute values cannot contain direct or indirect entity references
6500 * to external entities.
6501 *
6502 * [ WFC: No < in Attribute Values ]
6503 * The replacement text of any entity referred to directly or indirectly in
6504 * an attribute value (other than "&lt;") must not contain a <.
6505 *
6506 * [ VC: Attribute Value Type ]
6507 * The attribute must have been declared; the value must be of the type
6508 * declared for it.
6509 *
6510 * [25] Eq ::= S? '=' S?
6511 *
6512 * With namespace:
6513 *
6514 * [NS 11] Attribute ::= QName Eq AttValue
6515 *
6516 * Also the case QName == xmlns:??? is handled independently as a namespace
6517 * definition.
6518 *
6519 * Returns the attribute name, and the value in *value.
6520 */
6521
6522xmlChar *
6523xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6524 xmlChar *name, *val;
6525
6526 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006527 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006528 name = xmlParseName(ctxt);
6529 if (name == NULL) {
6530 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6532 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6533 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006534 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006535 return(NULL);
6536 }
6537
6538 /*
6539 * read the value
6540 */
6541 SKIP_BLANKS;
6542 if (RAW == '=') {
6543 NEXT;
6544 SKIP_BLANKS;
6545 val = xmlParseAttValue(ctxt);
6546 ctxt->instate = XML_PARSER_CONTENT;
6547 } else {
6548 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6550 ctxt->sax->error(ctxt->userData,
6551 "Specification mandate value for attribute %s\n", name);
6552 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006553 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006554 xmlFree(name);
6555 return(NULL);
6556 }
6557
6558 /*
6559 * Check that xml:lang conforms to the specification
6560 * No more registered as an error, just generate a warning now
6561 * since this was deprecated in XML second edition
6562 */
6563 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6564 if (!xmlCheckLanguageID(val)) {
6565 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6566 ctxt->sax->warning(ctxt->userData,
6567 "Malformed value for xml:lang : %s\n", val);
6568 }
6569 }
6570
6571 /*
6572 * Check that xml:space conforms to the specification
6573 */
6574 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6575 if (xmlStrEqual(val, BAD_CAST "default"))
6576 *(ctxt->space) = 0;
6577 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6578 *(ctxt->space) = 1;
6579 else {
6580 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6582 ctxt->sax->error(ctxt->userData,
6583"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6584 val);
6585 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006586 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006587 }
6588 }
6589
6590 *value = val;
6591 return(name);
6592}
6593
6594/**
6595 * xmlParseStartTag:
6596 * @ctxt: an XML parser context
6597 *
6598 * parse a start of tag either for rule element or
6599 * EmptyElement. In both case we don't parse the tag closing chars.
6600 *
6601 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6602 *
6603 * [ WFC: Unique Att Spec ]
6604 * No attribute name may appear more than once in the same start-tag or
6605 * empty-element tag.
6606 *
6607 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6608 *
6609 * [ WFC: Unique Att Spec ]
6610 * No attribute name may appear more than once in the same start-tag or
6611 * empty-element tag.
6612 *
6613 * With namespace:
6614 *
6615 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6616 *
6617 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6618 *
6619 * Returns the element name parsed
6620 */
6621
6622xmlChar *
6623xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6624 xmlChar *name;
6625 xmlChar *attname;
6626 xmlChar *attvalue;
6627 const xmlChar **atts = NULL;
6628 int nbatts = 0;
6629 int maxatts = 0;
6630 int i;
6631
6632 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006633 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006634
6635 name = xmlParseName(ctxt);
6636 if (name == NULL) {
6637 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6639 ctxt->sax->error(ctxt->userData,
6640 "xmlParseStartTag: invalid element name\n");
6641 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006642 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006643 return(NULL);
6644 }
6645
6646 /*
6647 * Now parse the attributes, it ends up with the ending
6648 *
6649 * (S Attribute)* S?
6650 */
6651 SKIP_BLANKS;
6652 GROW;
6653
Daniel Veillard21a0f912001-02-25 19:54:14 +00006654 while ((RAW != '>') &&
6655 ((RAW != '/') || (NXT(1) != '>')) &&
6656 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006657 const xmlChar *q = CUR_PTR;
6658 int cons = ctxt->input->consumed;
6659
6660 attname = xmlParseAttribute(ctxt, &attvalue);
6661 if ((attname != NULL) && (attvalue != NULL)) {
6662 /*
6663 * [ WFC: Unique Att Spec ]
6664 * No attribute name may appear more than once in the same
6665 * start-tag or empty-element tag.
6666 */
6667 for (i = 0; i < nbatts;i += 2) {
6668 if (xmlStrEqual(atts[i], attname)) {
6669 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6671 ctxt->sax->error(ctxt->userData,
6672 "Attribute %s redefined\n",
6673 attname);
6674 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006675 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006676 xmlFree(attname);
6677 xmlFree(attvalue);
6678 goto failed;
6679 }
6680 }
6681
6682 /*
6683 * Add the pair to atts
6684 */
6685 if (atts == NULL) {
6686 maxatts = 10;
6687 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6688 if (atts == NULL) {
6689 xmlGenericError(xmlGenericErrorContext,
6690 "malloc of %ld byte failed\n",
6691 maxatts * (long)sizeof(xmlChar *));
6692 return(NULL);
6693 }
6694 } else if (nbatts + 4 > maxatts) {
6695 maxatts *= 2;
6696 atts = (const xmlChar **) xmlRealloc((void *) atts,
6697 maxatts * sizeof(xmlChar *));
6698 if (atts == NULL) {
6699 xmlGenericError(xmlGenericErrorContext,
6700 "realloc of %ld byte failed\n",
6701 maxatts * (long)sizeof(xmlChar *));
6702 return(NULL);
6703 }
6704 }
6705 atts[nbatts++] = attname;
6706 atts[nbatts++] = attvalue;
6707 atts[nbatts] = NULL;
6708 atts[nbatts + 1] = NULL;
6709 } else {
6710 if (attname != NULL)
6711 xmlFree(attname);
6712 if (attvalue != NULL)
6713 xmlFree(attvalue);
6714 }
6715
6716failed:
6717
Daniel Veillard3772de32002-12-17 10:31:45 +00006718 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006719 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6720 break;
6721 if (!IS_BLANK(RAW)) {
6722 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6724 ctxt->sax->error(ctxt->userData,
6725 "attributes construct error\n");
6726 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006727 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006728 }
6729 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006730 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6731 (attname == NULL) && (attvalue == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006732 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6734 ctxt->sax->error(ctxt->userData,
6735 "xmlParseStartTag: problem parsing attributes\n");
6736 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006737 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006738 break;
6739 }
6740 GROW;
6741 }
6742
6743 /*
6744 * SAX: Start of Element !
6745 */
6746 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6747 (!ctxt->disableSAX))
6748 ctxt->sax->startElement(ctxt->userData, name, atts);
6749
6750 if (atts != NULL) {
6751 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6752 xmlFree((void *) atts);
6753 }
6754 return(name);
6755}
6756
6757/**
6758 * xmlParseEndTag:
6759 * @ctxt: an XML parser context
6760 *
6761 * parse an end of tag
6762 *
6763 * [42] ETag ::= '</' Name S? '>'
6764 *
6765 * With namespace
6766 *
6767 * [NS 9] ETag ::= '</' QName S? '>'
6768 */
6769
6770void
6771xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6772 xmlChar *name;
6773 xmlChar *oldname;
6774
6775 GROW;
6776 if ((RAW != '<') || (NXT(1) != '/')) {
6777 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6779 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6780 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006781 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006782 return;
6783 }
6784 SKIP(2);
6785
Daniel Veillard46de64e2002-05-29 08:21:33 +00006786 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006787
6788 /*
6789 * We should definitely be at the ending "S? '>'" part
6790 */
6791 GROW;
6792 SKIP_BLANKS;
6793 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6794 ctxt->errNo = XML_ERR_GT_REQUIRED;
6795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6796 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6797 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006798 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006799 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006800 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006801
6802 /*
6803 * [ WFC: Element Type Match ]
6804 * The Name in an element's end-tag must match the element type in the
6805 * start-tag.
6806 *
6807 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006808 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006809 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006811 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006812 ctxt->sax->error(ctxt->userData,
6813 "Opening and ending tag mismatch: %s and %s\n",
6814 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006815 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006816 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006817 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006818 }
6819
6820 }
6821 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006822 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6823#if 0
6824 else {
6825 /*
6826 * Recover in case of one missing close
6827 */
6828 if ((ctxt->nameNr > 2) &&
6829 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6830 namePop(ctxt);
6831 spacePop(ctxt);
6832 }
6833 }
6834#endif
6835 if (name != NULL)
6836 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006837 }
6838
6839 /*
6840 * SAX: End of Tag
6841 */
6842 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6843 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006844 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006845
Owen Taylor3473f882001-02-23 17:55:21 +00006846 oldname = namePop(ctxt);
6847 spacePop(ctxt);
6848 if (oldname != NULL) {
6849#ifdef DEBUG_STACK
6850 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6851#endif
6852 xmlFree(oldname);
6853 }
6854 return;
6855}
6856
6857/**
6858 * xmlParseCDSect:
6859 * @ctxt: an XML parser context
6860 *
6861 * Parse escaped pure raw content.
6862 *
6863 * [18] CDSect ::= CDStart CData CDEnd
6864 *
6865 * [19] CDStart ::= '<![CDATA['
6866 *
6867 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6868 *
6869 * [21] CDEnd ::= ']]>'
6870 */
6871void
6872xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6873 xmlChar *buf = NULL;
6874 int len = 0;
6875 int size = XML_PARSER_BUFFER_SIZE;
6876 int r, rl;
6877 int s, sl;
6878 int cur, l;
6879 int count = 0;
6880
6881 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6882 (NXT(2) == '[') && (NXT(3) == 'C') &&
6883 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6884 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6885 (NXT(8) == '[')) {
6886 SKIP(9);
6887 } else
6888 return;
6889
6890 ctxt->instate = XML_PARSER_CDATA_SECTION;
6891 r = CUR_CHAR(rl);
6892 if (!IS_CHAR(r)) {
6893 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6895 ctxt->sax->error(ctxt->userData,
6896 "CData section not finished\n");
6897 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006898 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006899 ctxt->instate = XML_PARSER_CONTENT;
6900 return;
6901 }
6902 NEXTL(rl);
6903 s = CUR_CHAR(sl);
6904 if (!IS_CHAR(s)) {
6905 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6907 ctxt->sax->error(ctxt->userData,
6908 "CData section not finished\n");
6909 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006910 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006911 ctxt->instate = XML_PARSER_CONTENT;
6912 return;
6913 }
6914 NEXTL(sl);
6915 cur = CUR_CHAR(l);
6916 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6917 if (buf == NULL) {
6918 xmlGenericError(xmlGenericErrorContext,
6919 "malloc of %d byte failed\n", size);
6920 return;
6921 }
6922 while (IS_CHAR(cur) &&
6923 ((r != ']') || (s != ']') || (cur != '>'))) {
6924 if (len + 5 >= size) {
6925 size *= 2;
6926 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6927 if (buf == NULL) {
6928 xmlGenericError(xmlGenericErrorContext,
6929 "realloc of %d byte failed\n", size);
6930 return;
6931 }
6932 }
6933 COPY_BUF(rl,buf,len,r);
6934 r = s;
6935 rl = sl;
6936 s = cur;
6937 sl = l;
6938 count++;
6939 if (count > 50) {
6940 GROW;
6941 count = 0;
6942 }
6943 NEXTL(l);
6944 cur = CUR_CHAR(l);
6945 }
6946 buf[len] = 0;
6947 ctxt->instate = XML_PARSER_CONTENT;
6948 if (cur != '>') {
6949 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6951 ctxt->sax->error(ctxt->userData,
6952 "CData section not finished\n%.50s\n", buf);
6953 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006954 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006955 xmlFree(buf);
6956 return;
6957 }
6958 NEXTL(l);
6959
6960 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006961 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006962 */
6963 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6964 if (ctxt->sax->cdataBlock != NULL)
6965 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006966 else if (ctxt->sax->characters != NULL)
6967 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006968 }
6969 xmlFree(buf);
6970}
6971
6972/**
6973 * xmlParseContent:
6974 * @ctxt: an XML parser context
6975 *
6976 * Parse a content:
6977 *
6978 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6979 */
6980
6981void
6982xmlParseContent(xmlParserCtxtPtr ctxt) {
6983 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006984 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006985 ((RAW != '<') || (NXT(1) != '/'))) {
6986 const xmlChar *test = CUR_PTR;
6987 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006988 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006989
6990 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006991 * First case : a Processing Instruction.
6992 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006993 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006994 xmlParsePI(ctxt);
6995 }
6996
6997 /*
6998 * Second case : a CDSection
6999 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007000 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007001 (NXT(2) == '[') && (NXT(3) == 'C') &&
7002 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7003 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7004 (NXT(8) == '[')) {
7005 xmlParseCDSect(ctxt);
7006 }
7007
7008 /*
7009 * Third case : a comment
7010 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007011 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007012 (NXT(2) == '-') && (NXT(3) == '-')) {
7013 xmlParseComment(ctxt);
7014 ctxt->instate = XML_PARSER_CONTENT;
7015 }
7016
7017 /*
7018 * Fourth case : a sub-element.
7019 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007020 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007021 xmlParseElement(ctxt);
7022 }
7023
7024 /*
7025 * Fifth case : a reference. If if has not been resolved,
7026 * parsing returns it's Name, create the node
7027 */
7028
Daniel Veillard21a0f912001-02-25 19:54:14 +00007029 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007030 xmlParseReference(ctxt);
7031 }
7032
7033 /*
7034 * Last case, text. Note that References are handled directly.
7035 */
7036 else {
7037 xmlParseCharData(ctxt, 0);
7038 }
7039
7040 GROW;
7041 /*
7042 * Pop-up of finished entities.
7043 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007044 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007045 xmlPopInput(ctxt);
7046 SHRINK;
7047
Daniel Veillardfdc91562002-07-01 21:52:03 +00007048 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007049 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7051 ctxt->sax->error(ctxt->userData,
7052 "detected an error in element content\n");
7053 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007054 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007055 ctxt->instate = XML_PARSER_EOF;
7056 break;
7057 }
7058 }
7059}
7060
7061/**
7062 * xmlParseElement:
7063 * @ctxt: an XML parser context
7064 *
7065 * parse an XML element, this is highly recursive
7066 *
7067 * [39] element ::= EmptyElemTag | STag content ETag
7068 *
7069 * [ WFC: Element Type Match ]
7070 * The Name in an element's end-tag must match the element type in the
7071 * start-tag.
7072 *
7073 * [ VC: Element Valid ]
7074 * An element is valid if there is a declaration matching elementdecl
7075 * where the Name matches the element type and one of the following holds:
7076 * - The declaration matches EMPTY and the element has no content.
7077 * - The declaration matches children and the sequence of child elements
7078 * belongs to the language generated by the regular expression in the
7079 * content model, with optional white space (characters matching the
7080 * nonterminal S) between each pair of child elements.
7081 * - The declaration matches Mixed and the content consists of character
7082 * data and child elements whose types match names in the content model.
7083 * - The declaration matches ANY, and the types of any child elements have
7084 * been declared.
7085 */
7086
7087void
7088xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007089 xmlChar *name;
7090 xmlChar *oldname;
7091 xmlParserNodeInfo node_info;
7092 xmlNodePtr ret;
7093
7094 /* Capture start position */
7095 if (ctxt->record_info) {
7096 node_info.begin_pos = ctxt->input->consumed +
7097 (CUR_PTR - ctxt->input->base);
7098 node_info.begin_line = ctxt->input->line;
7099 }
7100
7101 if (ctxt->spaceNr == 0)
7102 spacePush(ctxt, -1);
7103 else
7104 spacePush(ctxt, *ctxt->space);
7105
7106 name = xmlParseStartTag(ctxt);
7107 if (name == NULL) {
7108 spacePop(ctxt);
7109 return;
7110 }
7111 namePush(ctxt, name);
7112 ret = ctxt->node;
7113
7114 /*
7115 * [ VC: Root Element Type ]
7116 * The Name in the document type declaration must match the element
7117 * type of the root element.
7118 */
7119 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7120 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7121 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7122
7123 /*
7124 * Check for an Empty Element.
7125 */
7126 if ((RAW == '/') && (NXT(1) == '>')) {
7127 SKIP(2);
7128 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7129 (!ctxt->disableSAX))
7130 ctxt->sax->endElement(ctxt->userData, name);
7131 oldname = namePop(ctxt);
7132 spacePop(ctxt);
7133 if (oldname != NULL) {
7134#ifdef DEBUG_STACK
7135 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7136#endif
7137 xmlFree(oldname);
7138 }
7139 if ( ret != NULL && ctxt->record_info ) {
7140 node_info.end_pos = ctxt->input->consumed +
7141 (CUR_PTR - ctxt->input->base);
7142 node_info.end_line = ctxt->input->line;
7143 node_info.node = ret;
7144 xmlParserAddNodeInfo(ctxt, &node_info);
7145 }
7146 return;
7147 }
7148 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007149 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007150 } else {
7151 ctxt->errNo = XML_ERR_GT_REQUIRED;
7152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7153 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007154 "Couldn't find end of Start Tag %s\n",
7155 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007156 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007157 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007158
7159 /*
7160 * end of parsing of this node.
7161 */
7162 nodePop(ctxt);
7163 oldname = namePop(ctxt);
7164 spacePop(ctxt);
7165 if (oldname != NULL) {
7166#ifdef DEBUG_STACK
7167 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7168#endif
7169 xmlFree(oldname);
7170 }
7171
7172 /*
7173 * Capture end position and add node
7174 */
7175 if ( ret != NULL && ctxt->record_info ) {
7176 node_info.end_pos = ctxt->input->consumed +
7177 (CUR_PTR - ctxt->input->base);
7178 node_info.end_line = ctxt->input->line;
7179 node_info.node = ret;
7180 xmlParserAddNodeInfo(ctxt, &node_info);
7181 }
7182 return;
7183 }
7184
7185 /*
7186 * Parse the content of the element:
7187 */
7188 xmlParseContent(ctxt);
7189 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007190 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7192 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007193 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007194 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007195 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007196
7197 /*
7198 * end of parsing of this node.
7199 */
7200 nodePop(ctxt);
7201 oldname = namePop(ctxt);
7202 spacePop(ctxt);
7203 if (oldname != NULL) {
7204#ifdef DEBUG_STACK
7205 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7206#endif
7207 xmlFree(oldname);
7208 }
7209 return;
7210 }
7211
7212 /*
7213 * parse the end of tag: '</' should be here.
7214 */
7215 xmlParseEndTag(ctxt);
7216
7217 /*
7218 * Capture end position and add node
7219 */
7220 if ( ret != NULL && ctxt->record_info ) {
7221 node_info.end_pos = ctxt->input->consumed +
7222 (CUR_PTR - ctxt->input->base);
7223 node_info.end_line = ctxt->input->line;
7224 node_info.node = ret;
7225 xmlParserAddNodeInfo(ctxt, &node_info);
7226 }
7227}
7228
7229/**
7230 * xmlParseVersionNum:
7231 * @ctxt: an XML parser context
7232 *
7233 * parse the XML version value.
7234 *
7235 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7236 *
7237 * Returns the string giving the XML version number, or NULL
7238 */
7239xmlChar *
7240xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7241 xmlChar *buf = NULL;
7242 int len = 0;
7243 int size = 10;
7244 xmlChar cur;
7245
7246 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7247 if (buf == NULL) {
7248 xmlGenericError(xmlGenericErrorContext,
7249 "malloc of %d byte failed\n", size);
7250 return(NULL);
7251 }
7252 cur = CUR;
7253 while (((cur >= 'a') && (cur <= 'z')) ||
7254 ((cur >= 'A') && (cur <= 'Z')) ||
7255 ((cur >= '0') && (cur <= '9')) ||
7256 (cur == '_') || (cur == '.') ||
7257 (cur == ':') || (cur == '-')) {
7258 if (len + 1 >= size) {
7259 size *= 2;
7260 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7261 if (buf == NULL) {
7262 xmlGenericError(xmlGenericErrorContext,
7263 "realloc of %d byte failed\n", size);
7264 return(NULL);
7265 }
7266 }
7267 buf[len++] = cur;
7268 NEXT;
7269 cur=CUR;
7270 }
7271 buf[len] = 0;
7272 return(buf);
7273}
7274
7275/**
7276 * xmlParseVersionInfo:
7277 * @ctxt: an XML parser context
7278 *
7279 * parse the XML version.
7280 *
7281 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7282 *
7283 * [25] Eq ::= S? '=' S?
7284 *
7285 * Returns the version string, e.g. "1.0"
7286 */
7287
7288xmlChar *
7289xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7290 xmlChar *version = NULL;
7291 const xmlChar *q;
7292
7293 if ((RAW == 'v') && (NXT(1) == 'e') &&
7294 (NXT(2) == 'r') && (NXT(3) == 's') &&
7295 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7296 (NXT(6) == 'n')) {
7297 SKIP(7);
7298 SKIP_BLANKS;
7299 if (RAW != '=') {
7300 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7302 ctxt->sax->error(ctxt->userData,
7303 "xmlParseVersionInfo : expected '='\n");
7304 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007305 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007306 return(NULL);
7307 }
7308 NEXT;
7309 SKIP_BLANKS;
7310 if (RAW == '"') {
7311 NEXT;
7312 q = CUR_PTR;
7313 version = xmlParseVersionNum(ctxt);
7314 if (RAW != '"') {
7315 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7316 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7317 ctxt->sax->error(ctxt->userData,
7318 "String not closed\n%.50s\n", q);
7319 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007320 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007321 } else
7322 NEXT;
7323 } else if (RAW == '\''){
7324 NEXT;
7325 q = CUR_PTR;
7326 version = xmlParseVersionNum(ctxt);
7327 if (RAW != '\'') {
7328 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7330 ctxt->sax->error(ctxt->userData,
7331 "String not closed\n%.50s\n", q);
7332 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007333 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007334 } else
7335 NEXT;
7336 } else {
7337 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7339 ctxt->sax->error(ctxt->userData,
7340 "xmlParseVersionInfo : expected ' or \"\n");
7341 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007342 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007343 }
7344 }
7345 return(version);
7346}
7347
7348/**
7349 * xmlParseEncName:
7350 * @ctxt: an XML parser context
7351 *
7352 * parse the XML encoding name
7353 *
7354 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7355 *
7356 * Returns the encoding name value or NULL
7357 */
7358xmlChar *
7359xmlParseEncName(xmlParserCtxtPtr ctxt) {
7360 xmlChar *buf = NULL;
7361 int len = 0;
7362 int size = 10;
7363 xmlChar cur;
7364
7365 cur = CUR;
7366 if (((cur >= 'a') && (cur <= 'z')) ||
7367 ((cur >= 'A') && (cur <= 'Z'))) {
7368 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7369 if (buf == NULL) {
7370 xmlGenericError(xmlGenericErrorContext,
7371 "malloc of %d byte failed\n", size);
7372 return(NULL);
7373 }
7374
7375 buf[len++] = cur;
7376 NEXT;
7377 cur = CUR;
7378 while (((cur >= 'a') && (cur <= 'z')) ||
7379 ((cur >= 'A') && (cur <= 'Z')) ||
7380 ((cur >= '0') && (cur <= '9')) ||
7381 (cur == '.') || (cur == '_') ||
7382 (cur == '-')) {
7383 if (len + 1 >= size) {
7384 size *= 2;
7385 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7386 if (buf == NULL) {
7387 xmlGenericError(xmlGenericErrorContext,
7388 "realloc of %d byte failed\n", size);
7389 return(NULL);
7390 }
7391 }
7392 buf[len++] = cur;
7393 NEXT;
7394 cur = CUR;
7395 if (cur == 0) {
7396 SHRINK;
7397 GROW;
7398 cur = CUR;
7399 }
7400 }
7401 buf[len] = 0;
7402 } else {
7403 ctxt->errNo = XML_ERR_ENCODING_NAME;
7404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7405 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7406 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007407 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007408 }
7409 return(buf);
7410}
7411
7412/**
7413 * xmlParseEncodingDecl:
7414 * @ctxt: an XML parser context
7415 *
7416 * parse the XML encoding declaration
7417 *
7418 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7419 *
7420 * this setups the conversion filters.
7421 *
7422 * Returns the encoding value or NULL
7423 */
7424
7425xmlChar *
7426xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7427 xmlChar *encoding = NULL;
7428 const xmlChar *q;
7429
7430 SKIP_BLANKS;
7431 if ((RAW == 'e') && (NXT(1) == 'n') &&
7432 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7433 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7434 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7435 SKIP(8);
7436 SKIP_BLANKS;
7437 if (RAW != '=') {
7438 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7440 ctxt->sax->error(ctxt->userData,
7441 "xmlParseEncodingDecl : expected '='\n");
7442 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007443 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007444 return(NULL);
7445 }
7446 NEXT;
7447 SKIP_BLANKS;
7448 if (RAW == '"') {
7449 NEXT;
7450 q = CUR_PTR;
7451 encoding = xmlParseEncName(ctxt);
7452 if (RAW != '"') {
7453 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7455 ctxt->sax->error(ctxt->userData,
7456 "String not closed\n%.50s\n", q);
7457 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007458 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007459 } else
7460 NEXT;
7461 } else if (RAW == '\''){
7462 NEXT;
7463 q = CUR_PTR;
7464 encoding = xmlParseEncName(ctxt);
7465 if (RAW != '\'') {
7466 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7468 ctxt->sax->error(ctxt->userData,
7469 "String not closed\n%.50s\n", q);
7470 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007471 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007472 } else
7473 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007474 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007475 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7477 ctxt->sax->error(ctxt->userData,
7478 "xmlParseEncodingDecl : expected ' or \"\n");
7479 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007480 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007481 }
7482 if (encoding != NULL) {
7483 xmlCharEncoding enc;
7484 xmlCharEncodingHandlerPtr handler;
7485
7486 if (ctxt->input->encoding != NULL)
7487 xmlFree((xmlChar *) ctxt->input->encoding);
7488 ctxt->input->encoding = encoding;
7489
7490 enc = xmlParseCharEncoding((const char *) encoding);
7491 /*
7492 * registered set of known encodings
7493 */
7494 if (enc != XML_CHAR_ENCODING_ERROR) {
7495 xmlSwitchEncoding(ctxt, enc);
7496 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007497 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007498 xmlFree(encoding);
7499 return(NULL);
7500 }
7501 } else {
7502 /*
7503 * fallback for unknown encodings
7504 */
7505 handler = xmlFindCharEncodingHandler((const char *) encoding);
7506 if (handler != NULL) {
7507 xmlSwitchToEncoding(ctxt, handler);
7508 } else {
7509 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7511 ctxt->sax->error(ctxt->userData,
7512 "Unsupported encoding %s\n", encoding);
7513 return(NULL);
7514 }
7515 }
7516 }
7517 }
7518 return(encoding);
7519}
7520
7521/**
7522 * xmlParseSDDecl:
7523 * @ctxt: an XML parser context
7524 *
7525 * parse the XML standalone declaration
7526 *
7527 * [32] SDDecl ::= S 'standalone' Eq
7528 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7529 *
7530 * [ VC: Standalone Document Declaration ]
7531 * TODO The standalone document declaration must have the value "no"
7532 * if any external markup declarations contain declarations of:
7533 * - attributes with default values, if elements to which these
7534 * attributes apply appear in the document without specifications
7535 * of values for these attributes, or
7536 * - entities (other than amp, lt, gt, apos, quot), if references
7537 * to those entities appear in the document, or
7538 * - attributes with values subject to normalization, where the
7539 * attribute appears in the document with a value which will change
7540 * as a result of normalization, or
7541 * - element types with element content, if white space occurs directly
7542 * within any instance of those types.
7543 *
7544 * Returns 1 if standalone, 0 otherwise
7545 */
7546
7547int
7548xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7549 int standalone = -1;
7550
7551 SKIP_BLANKS;
7552 if ((RAW == 's') && (NXT(1) == 't') &&
7553 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7554 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7555 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7556 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7557 SKIP(10);
7558 SKIP_BLANKS;
7559 if (RAW != '=') {
7560 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7562 ctxt->sax->error(ctxt->userData,
7563 "XML standalone declaration : expected '='\n");
7564 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007565 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007566 return(standalone);
7567 }
7568 NEXT;
7569 SKIP_BLANKS;
7570 if (RAW == '\''){
7571 NEXT;
7572 if ((RAW == 'n') && (NXT(1) == 'o')) {
7573 standalone = 0;
7574 SKIP(2);
7575 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7576 (NXT(2) == 's')) {
7577 standalone = 1;
7578 SKIP(3);
7579 } else {
7580 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7582 ctxt->sax->error(ctxt->userData,
7583 "standalone accepts only 'yes' or 'no'\n");
7584 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007585 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007586 }
7587 if (RAW != '\'') {
7588 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7590 ctxt->sax->error(ctxt->userData, "String not closed\n");
7591 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007592 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007593 } else
7594 NEXT;
7595 } else if (RAW == '"'){
7596 NEXT;
7597 if ((RAW == 'n') && (NXT(1) == 'o')) {
7598 standalone = 0;
7599 SKIP(2);
7600 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7601 (NXT(2) == 's')) {
7602 standalone = 1;
7603 SKIP(3);
7604 } else {
7605 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7607 ctxt->sax->error(ctxt->userData,
7608 "standalone accepts only 'yes' or 'no'\n");
7609 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007610 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007611 }
7612 if (RAW != '"') {
7613 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7615 ctxt->sax->error(ctxt->userData, "String not closed\n");
7616 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007617 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007618 } else
7619 NEXT;
7620 } else {
7621 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7623 ctxt->sax->error(ctxt->userData,
7624 "Standalone value not found\n");
7625 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007626 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007627 }
7628 }
7629 return(standalone);
7630}
7631
7632/**
7633 * xmlParseXMLDecl:
7634 * @ctxt: an XML parser context
7635 *
7636 * parse an XML declaration header
7637 *
7638 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7639 */
7640
7641void
7642xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7643 xmlChar *version;
7644
7645 /*
7646 * We know that '<?xml' is here.
7647 */
7648 SKIP(5);
7649
7650 if (!IS_BLANK(RAW)) {
7651 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7653 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7654 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007656 }
7657 SKIP_BLANKS;
7658
7659 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007660 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007661 */
7662 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007663 if (version == NULL) {
7664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7665 ctxt->sax->error(ctxt->userData,
7666 "Malformed declaration expecting version\n");
7667 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007668 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007669 } else {
7670 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7671 /*
7672 * TODO: Blueberry should be detected here
7673 */
7674 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7675 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7676 version);
7677 }
7678 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007679 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007680 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007681 }
Owen Taylor3473f882001-02-23 17:55:21 +00007682
7683 /*
7684 * We may have the encoding declaration
7685 */
7686 if (!IS_BLANK(RAW)) {
7687 if ((RAW == '?') && (NXT(1) == '>')) {
7688 SKIP(2);
7689 return;
7690 }
7691 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7693 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7694 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007695 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007696 }
7697 xmlParseEncodingDecl(ctxt);
7698 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7699 /*
7700 * The XML REC instructs us to stop parsing right here
7701 */
7702 return;
7703 }
7704
7705 /*
7706 * We may have the standalone status.
7707 */
7708 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7709 if ((RAW == '?') && (NXT(1) == '>')) {
7710 SKIP(2);
7711 return;
7712 }
7713 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7715 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7716 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007717 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007718 }
7719 SKIP_BLANKS;
7720 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7721
7722 SKIP_BLANKS;
7723 if ((RAW == '?') && (NXT(1) == '>')) {
7724 SKIP(2);
7725 } else if (RAW == '>') {
7726 /* Deprecated old WD ... */
7727 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7729 ctxt->sax->error(ctxt->userData,
7730 "XML declaration must end-up with '?>'\n");
7731 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007732 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007733 NEXT;
7734 } else {
7735 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7737 ctxt->sax->error(ctxt->userData,
7738 "parsing XML declaration: '?>' expected\n");
7739 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007740 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007741 MOVETO_ENDTAG(CUR_PTR);
7742 NEXT;
7743 }
7744}
7745
7746/**
7747 * xmlParseMisc:
7748 * @ctxt: an XML parser context
7749 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007750 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007751 *
7752 * [27] Misc ::= Comment | PI | S
7753 */
7754
7755void
7756xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007757 while (((RAW == '<') && (NXT(1) == '?')) ||
7758 ((RAW == '<') && (NXT(1) == '!') &&
7759 (NXT(2) == '-') && (NXT(3) == '-')) ||
7760 IS_BLANK(CUR)) {
7761 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007762 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007763 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007764 NEXT;
7765 } else
7766 xmlParseComment(ctxt);
7767 }
7768}
7769
7770/**
7771 * xmlParseDocument:
7772 * @ctxt: an XML parser context
7773 *
7774 * parse an XML document (and build a tree if using the standard SAX
7775 * interface).
7776 *
7777 * [1] document ::= prolog element Misc*
7778 *
7779 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7780 *
7781 * Returns 0, -1 in case of error. the parser context is augmented
7782 * as a result of the parsing.
7783 */
7784
7785int
7786xmlParseDocument(xmlParserCtxtPtr ctxt) {
7787 xmlChar start[4];
7788 xmlCharEncoding enc;
7789
7790 xmlInitParser();
7791
7792 GROW;
7793
7794 /*
7795 * SAX: beginning of the document processing.
7796 */
7797 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7798 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7799
Daniel Veillard50f34372001-08-03 12:06:36 +00007800 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007801 /*
7802 * Get the 4 first bytes and decode the charset
7803 * if enc != XML_CHAR_ENCODING_NONE
7804 * plug some encoding conversion routines.
7805 */
7806 start[0] = RAW;
7807 start[1] = NXT(1);
7808 start[2] = NXT(2);
7809 start[3] = NXT(3);
7810 enc = xmlDetectCharEncoding(start, 4);
7811 if (enc != XML_CHAR_ENCODING_NONE) {
7812 xmlSwitchEncoding(ctxt, enc);
7813 }
Owen Taylor3473f882001-02-23 17:55:21 +00007814 }
7815
7816
7817 if (CUR == 0) {
7818 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7820 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7821 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007822 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007823 }
7824
7825 /*
7826 * Check for the XMLDecl in the Prolog.
7827 */
7828 GROW;
7829 if ((RAW == '<') && (NXT(1) == '?') &&
7830 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7831 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7832
7833 /*
7834 * Note that we will switch encoding on the fly.
7835 */
7836 xmlParseXMLDecl(ctxt);
7837 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7838 /*
7839 * The XML REC instructs us to stop parsing right here
7840 */
7841 return(-1);
7842 }
7843 ctxt->standalone = ctxt->input->standalone;
7844 SKIP_BLANKS;
7845 } else {
7846 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7847 }
7848 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7849 ctxt->sax->startDocument(ctxt->userData);
7850
7851 /*
7852 * The Misc part of the Prolog
7853 */
7854 GROW;
7855 xmlParseMisc(ctxt);
7856
7857 /*
7858 * Then possibly doc type declaration(s) and more Misc
7859 * (doctypedecl Misc*)?
7860 */
7861 GROW;
7862 if ((RAW == '<') && (NXT(1) == '!') &&
7863 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7864 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7865 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7866 (NXT(8) == 'E')) {
7867
7868 ctxt->inSubset = 1;
7869 xmlParseDocTypeDecl(ctxt);
7870 if (RAW == '[') {
7871 ctxt->instate = XML_PARSER_DTD;
7872 xmlParseInternalSubset(ctxt);
7873 }
7874
7875 /*
7876 * Create and update the external subset.
7877 */
7878 ctxt->inSubset = 2;
7879 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7880 (!ctxt->disableSAX))
7881 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7882 ctxt->extSubSystem, ctxt->extSubURI);
7883 ctxt->inSubset = 0;
7884
7885
7886 ctxt->instate = XML_PARSER_PROLOG;
7887 xmlParseMisc(ctxt);
7888 }
7889
7890 /*
7891 * Time to start parsing the tree itself
7892 */
7893 GROW;
7894 if (RAW != '<') {
7895 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7896 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7897 ctxt->sax->error(ctxt->userData,
7898 "Start tag expected, '<' not found\n");
7899 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007900 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007901 ctxt->instate = XML_PARSER_EOF;
7902 } else {
7903 ctxt->instate = XML_PARSER_CONTENT;
7904 xmlParseElement(ctxt);
7905 ctxt->instate = XML_PARSER_EPILOG;
7906
7907
7908 /*
7909 * The Misc part at the end
7910 */
7911 xmlParseMisc(ctxt);
7912
Daniel Veillard561b7f82002-03-20 21:55:57 +00007913 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007914 ctxt->errNo = XML_ERR_DOCUMENT_END;
7915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7916 ctxt->sax->error(ctxt->userData,
7917 "Extra content at the end of the document\n");
7918 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007919 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007920 }
7921 ctxt->instate = XML_PARSER_EOF;
7922 }
7923
7924 /*
7925 * SAX: end of the document processing.
7926 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007927 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007928 ctxt->sax->endDocument(ctxt->userData);
7929
Daniel Veillard5997aca2002-03-18 18:36:20 +00007930 /*
7931 * Remove locally kept entity definitions if the tree was not built
7932 */
7933 if ((ctxt->myDoc != NULL) &&
7934 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7935 xmlFreeDoc(ctxt->myDoc);
7936 ctxt->myDoc = NULL;
7937 }
7938
Daniel Veillardc7612992002-02-17 22:47:37 +00007939 if (! ctxt->wellFormed) {
7940 ctxt->valid = 0;
7941 return(-1);
7942 }
Owen Taylor3473f882001-02-23 17:55:21 +00007943 return(0);
7944}
7945
7946/**
7947 * xmlParseExtParsedEnt:
7948 * @ctxt: an XML parser context
7949 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007950 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007951 * An external general parsed entity is well-formed if it matches the
7952 * production labeled extParsedEnt.
7953 *
7954 * [78] extParsedEnt ::= TextDecl? content
7955 *
7956 * Returns 0, -1 in case of error. the parser context is augmented
7957 * as a result of the parsing.
7958 */
7959
7960int
7961xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7962 xmlChar start[4];
7963 xmlCharEncoding enc;
7964
7965 xmlDefaultSAXHandlerInit();
7966
7967 GROW;
7968
7969 /*
7970 * SAX: beginning of the document processing.
7971 */
7972 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7973 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7974
7975 /*
7976 * Get the 4 first bytes and decode the charset
7977 * if enc != XML_CHAR_ENCODING_NONE
7978 * plug some encoding conversion routines.
7979 */
7980 start[0] = RAW;
7981 start[1] = NXT(1);
7982 start[2] = NXT(2);
7983 start[3] = NXT(3);
7984 enc = xmlDetectCharEncoding(start, 4);
7985 if (enc != XML_CHAR_ENCODING_NONE) {
7986 xmlSwitchEncoding(ctxt, enc);
7987 }
7988
7989
7990 if (CUR == 0) {
7991 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7993 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7994 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007995 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007996 }
7997
7998 /*
7999 * Check for the XMLDecl in the Prolog.
8000 */
8001 GROW;
8002 if ((RAW == '<') && (NXT(1) == '?') &&
8003 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8004 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8005
8006 /*
8007 * Note that we will switch encoding on the fly.
8008 */
8009 xmlParseXMLDecl(ctxt);
8010 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8011 /*
8012 * The XML REC instructs us to stop parsing right here
8013 */
8014 return(-1);
8015 }
8016 SKIP_BLANKS;
8017 } else {
8018 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8019 }
8020 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8021 ctxt->sax->startDocument(ctxt->userData);
8022
8023 /*
8024 * Doing validity checking on chunk doesn't make sense
8025 */
8026 ctxt->instate = XML_PARSER_CONTENT;
8027 ctxt->validate = 0;
8028 ctxt->loadsubset = 0;
8029 ctxt->depth = 0;
8030
8031 xmlParseContent(ctxt);
8032
8033 if ((RAW == '<') && (NXT(1) == '/')) {
8034 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8036 ctxt->sax->error(ctxt->userData,
8037 "chunk is not well balanced\n");
8038 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008039 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008040 } else if (RAW != 0) {
8041 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8042 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8043 ctxt->sax->error(ctxt->userData,
8044 "extra content at the end of well balanced chunk\n");
8045 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008046 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008047 }
8048
8049 /*
8050 * SAX: end of the document processing.
8051 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008052 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008053 ctxt->sax->endDocument(ctxt->userData);
8054
8055 if (! ctxt->wellFormed) return(-1);
8056 return(0);
8057}
8058
8059/************************************************************************
8060 * *
8061 * Progressive parsing interfaces *
8062 * *
8063 ************************************************************************/
8064
8065/**
8066 * xmlParseLookupSequence:
8067 * @ctxt: an XML parser context
8068 * @first: the first char to lookup
8069 * @next: the next char to lookup or zero
8070 * @third: the next char to lookup or zero
8071 *
8072 * Try to find if a sequence (first, next, third) or just (first next) or
8073 * (first) is available in the input stream.
8074 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8075 * to avoid rescanning sequences of bytes, it DOES change the state of the
8076 * parser, do not use liberally.
8077 *
8078 * Returns the index to the current parsing point if the full sequence
8079 * is available, -1 otherwise.
8080 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008081static int
Owen Taylor3473f882001-02-23 17:55:21 +00008082xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8083 xmlChar next, xmlChar third) {
8084 int base, len;
8085 xmlParserInputPtr in;
8086 const xmlChar *buf;
8087
8088 in = ctxt->input;
8089 if (in == NULL) return(-1);
8090 base = in->cur - in->base;
8091 if (base < 0) return(-1);
8092 if (ctxt->checkIndex > base)
8093 base = ctxt->checkIndex;
8094 if (in->buf == NULL) {
8095 buf = in->base;
8096 len = in->length;
8097 } else {
8098 buf = in->buf->buffer->content;
8099 len = in->buf->buffer->use;
8100 }
8101 /* take into account the sequence length */
8102 if (third) len -= 2;
8103 else if (next) len --;
8104 for (;base < len;base++) {
8105 if (buf[base] == first) {
8106 if (third != 0) {
8107 if ((buf[base + 1] != next) ||
8108 (buf[base + 2] != third)) continue;
8109 } else if (next != 0) {
8110 if (buf[base + 1] != next) continue;
8111 }
8112 ctxt->checkIndex = 0;
8113#ifdef DEBUG_PUSH
8114 if (next == 0)
8115 xmlGenericError(xmlGenericErrorContext,
8116 "PP: lookup '%c' found at %d\n",
8117 first, base);
8118 else if (third == 0)
8119 xmlGenericError(xmlGenericErrorContext,
8120 "PP: lookup '%c%c' found at %d\n",
8121 first, next, base);
8122 else
8123 xmlGenericError(xmlGenericErrorContext,
8124 "PP: lookup '%c%c%c' found at %d\n",
8125 first, next, third, base);
8126#endif
8127 return(base - (in->cur - in->base));
8128 }
8129 }
8130 ctxt->checkIndex = base;
8131#ifdef DEBUG_PUSH
8132 if (next == 0)
8133 xmlGenericError(xmlGenericErrorContext,
8134 "PP: lookup '%c' failed\n", first);
8135 else if (third == 0)
8136 xmlGenericError(xmlGenericErrorContext,
8137 "PP: lookup '%c%c' failed\n", first, next);
8138 else
8139 xmlGenericError(xmlGenericErrorContext,
8140 "PP: lookup '%c%c%c' failed\n", first, next, third);
8141#endif
8142 return(-1);
8143}
8144
8145/**
8146 * xmlParseTryOrFinish:
8147 * @ctxt: an XML parser context
8148 * @terminate: last chunk indicator
8149 *
8150 * Try to progress on parsing
8151 *
8152 * Returns zero if no parsing was possible
8153 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008154static int
Owen Taylor3473f882001-02-23 17:55:21 +00008155xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8156 int ret = 0;
8157 int avail;
8158 xmlChar cur, next;
8159
8160#ifdef DEBUG_PUSH
8161 switch (ctxt->instate) {
8162 case XML_PARSER_EOF:
8163 xmlGenericError(xmlGenericErrorContext,
8164 "PP: try EOF\n"); break;
8165 case XML_PARSER_START:
8166 xmlGenericError(xmlGenericErrorContext,
8167 "PP: try START\n"); break;
8168 case XML_PARSER_MISC:
8169 xmlGenericError(xmlGenericErrorContext,
8170 "PP: try MISC\n");break;
8171 case XML_PARSER_COMMENT:
8172 xmlGenericError(xmlGenericErrorContext,
8173 "PP: try COMMENT\n");break;
8174 case XML_PARSER_PROLOG:
8175 xmlGenericError(xmlGenericErrorContext,
8176 "PP: try PROLOG\n");break;
8177 case XML_PARSER_START_TAG:
8178 xmlGenericError(xmlGenericErrorContext,
8179 "PP: try START_TAG\n");break;
8180 case XML_PARSER_CONTENT:
8181 xmlGenericError(xmlGenericErrorContext,
8182 "PP: try CONTENT\n");break;
8183 case XML_PARSER_CDATA_SECTION:
8184 xmlGenericError(xmlGenericErrorContext,
8185 "PP: try CDATA_SECTION\n");break;
8186 case XML_PARSER_END_TAG:
8187 xmlGenericError(xmlGenericErrorContext,
8188 "PP: try END_TAG\n");break;
8189 case XML_PARSER_ENTITY_DECL:
8190 xmlGenericError(xmlGenericErrorContext,
8191 "PP: try ENTITY_DECL\n");break;
8192 case XML_PARSER_ENTITY_VALUE:
8193 xmlGenericError(xmlGenericErrorContext,
8194 "PP: try ENTITY_VALUE\n");break;
8195 case XML_PARSER_ATTRIBUTE_VALUE:
8196 xmlGenericError(xmlGenericErrorContext,
8197 "PP: try ATTRIBUTE_VALUE\n");break;
8198 case XML_PARSER_DTD:
8199 xmlGenericError(xmlGenericErrorContext,
8200 "PP: try DTD\n");break;
8201 case XML_PARSER_EPILOG:
8202 xmlGenericError(xmlGenericErrorContext,
8203 "PP: try EPILOG\n");break;
8204 case XML_PARSER_PI:
8205 xmlGenericError(xmlGenericErrorContext,
8206 "PP: try PI\n");break;
8207 case XML_PARSER_IGNORE:
8208 xmlGenericError(xmlGenericErrorContext,
8209 "PP: try IGNORE\n");break;
8210 }
8211#endif
8212
8213 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008214 SHRINK;
8215
Owen Taylor3473f882001-02-23 17:55:21 +00008216 /*
8217 * Pop-up of finished entities.
8218 */
8219 while ((RAW == 0) && (ctxt->inputNr > 1))
8220 xmlPopInput(ctxt);
8221
8222 if (ctxt->input ==NULL) break;
8223 if (ctxt->input->buf == NULL)
8224 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008225 else {
8226 /*
8227 * If we are operating on converted input, try to flush
8228 * remainng chars to avoid them stalling in the non-converted
8229 * buffer.
8230 */
8231 if ((ctxt->input->buf->raw != NULL) &&
8232 (ctxt->input->buf->raw->use > 0)) {
8233 int base = ctxt->input->base -
8234 ctxt->input->buf->buffer->content;
8235 int current = ctxt->input->cur - ctxt->input->base;
8236
8237 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8238 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8239 ctxt->input->cur = ctxt->input->base + current;
8240 ctxt->input->end =
8241 &ctxt->input->buf->buffer->content[
8242 ctxt->input->buf->buffer->use];
8243 }
8244 avail = ctxt->input->buf->buffer->use -
8245 (ctxt->input->cur - ctxt->input->base);
8246 }
Owen Taylor3473f882001-02-23 17:55:21 +00008247 if (avail < 1)
8248 goto done;
8249 switch (ctxt->instate) {
8250 case XML_PARSER_EOF:
8251 /*
8252 * Document parsing is done !
8253 */
8254 goto done;
8255 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008256 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8257 xmlChar start[4];
8258 xmlCharEncoding enc;
8259
8260 /*
8261 * Very first chars read from the document flow.
8262 */
8263 if (avail < 4)
8264 goto done;
8265
8266 /*
8267 * Get the 4 first bytes and decode the charset
8268 * if enc != XML_CHAR_ENCODING_NONE
8269 * plug some encoding conversion routines.
8270 */
8271 start[0] = RAW;
8272 start[1] = NXT(1);
8273 start[2] = NXT(2);
8274 start[3] = NXT(3);
8275 enc = xmlDetectCharEncoding(start, 4);
8276 if (enc != XML_CHAR_ENCODING_NONE) {
8277 xmlSwitchEncoding(ctxt, enc);
8278 }
8279 break;
8280 }
Owen Taylor3473f882001-02-23 17:55:21 +00008281
8282 cur = ctxt->input->cur[0];
8283 next = ctxt->input->cur[1];
8284 if (cur == 0) {
8285 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8286 ctxt->sax->setDocumentLocator(ctxt->userData,
8287 &xmlDefaultSAXLocator);
8288 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8290 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8291 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008292 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008293 ctxt->instate = XML_PARSER_EOF;
8294#ifdef DEBUG_PUSH
8295 xmlGenericError(xmlGenericErrorContext,
8296 "PP: entering EOF\n");
8297#endif
8298 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8299 ctxt->sax->endDocument(ctxt->userData);
8300 goto done;
8301 }
8302 if ((cur == '<') && (next == '?')) {
8303 /* PI or XML decl */
8304 if (avail < 5) return(ret);
8305 if ((!terminate) &&
8306 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8307 return(ret);
8308 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8309 ctxt->sax->setDocumentLocator(ctxt->userData,
8310 &xmlDefaultSAXLocator);
8311 if ((ctxt->input->cur[2] == 'x') &&
8312 (ctxt->input->cur[3] == 'm') &&
8313 (ctxt->input->cur[4] == 'l') &&
8314 (IS_BLANK(ctxt->input->cur[5]))) {
8315 ret += 5;
8316#ifdef DEBUG_PUSH
8317 xmlGenericError(xmlGenericErrorContext,
8318 "PP: Parsing XML Decl\n");
8319#endif
8320 xmlParseXMLDecl(ctxt);
8321 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8322 /*
8323 * The XML REC instructs us to stop parsing right
8324 * here
8325 */
8326 ctxt->instate = XML_PARSER_EOF;
8327 return(0);
8328 }
8329 ctxt->standalone = ctxt->input->standalone;
8330 if ((ctxt->encoding == NULL) &&
8331 (ctxt->input->encoding != NULL))
8332 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8333 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8334 (!ctxt->disableSAX))
8335 ctxt->sax->startDocument(ctxt->userData);
8336 ctxt->instate = XML_PARSER_MISC;
8337#ifdef DEBUG_PUSH
8338 xmlGenericError(xmlGenericErrorContext,
8339 "PP: entering MISC\n");
8340#endif
8341 } else {
8342 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8343 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8344 (!ctxt->disableSAX))
8345 ctxt->sax->startDocument(ctxt->userData);
8346 ctxt->instate = XML_PARSER_MISC;
8347#ifdef DEBUG_PUSH
8348 xmlGenericError(xmlGenericErrorContext,
8349 "PP: entering MISC\n");
8350#endif
8351 }
8352 } else {
8353 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8354 ctxt->sax->setDocumentLocator(ctxt->userData,
8355 &xmlDefaultSAXLocator);
8356 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8357 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8358 (!ctxt->disableSAX))
8359 ctxt->sax->startDocument(ctxt->userData);
8360 ctxt->instate = XML_PARSER_MISC;
8361#ifdef DEBUG_PUSH
8362 xmlGenericError(xmlGenericErrorContext,
8363 "PP: entering MISC\n");
8364#endif
8365 }
8366 break;
8367 case XML_PARSER_MISC:
8368 SKIP_BLANKS;
8369 if (ctxt->input->buf == NULL)
8370 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8371 else
8372 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8373 if (avail < 2)
8374 goto done;
8375 cur = ctxt->input->cur[0];
8376 next = ctxt->input->cur[1];
8377 if ((cur == '<') && (next == '?')) {
8378 if ((!terminate) &&
8379 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8380 goto done;
8381#ifdef DEBUG_PUSH
8382 xmlGenericError(xmlGenericErrorContext,
8383 "PP: Parsing PI\n");
8384#endif
8385 xmlParsePI(ctxt);
8386 } else if ((cur == '<') && (next == '!') &&
8387 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8388 if ((!terminate) &&
8389 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8390 goto done;
8391#ifdef DEBUG_PUSH
8392 xmlGenericError(xmlGenericErrorContext,
8393 "PP: Parsing Comment\n");
8394#endif
8395 xmlParseComment(ctxt);
8396 ctxt->instate = XML_PARSER_MISC;
8397 } else if ((cur == '<') && (next == '!') &&
8398 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8399 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8400 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8401 (ctxt->input->cur[8] == 'E')) {
8402 if ((!terminate) &&
8403 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8404 goto done;
8405#ifdef DEBUG_PUSH
8406 xmlGenericError(xmlGenericErrorContext,
8407 "PP: Parsing internal subset\n");
8408#endif
8409 ctxt->inSubset = 1;
8410 xmlParseDocTypeDecl(ctxt);
8411 if (RAW == '[') {
8412 ctxt->instate = XML_PARSER_DTD;
8413#ifdef DEBUG_PUSH
8414 xmlGenericError(xmlGenericErrorContext,
8415 "PP: entering DTD\n");
8416#endif
8417 } else {
8418 /*
8419 * Create and update the external subset.
8420 */
8421 ctxt->inSubset = 2;
8422 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8423 (ctxt->sax->externalSubset != NULL))
8424 ctxt->sax->externalSubset(ctxt->userData,
8425 ctxt->intSubName, ctxt->extSubSystem,
8426 ctxt->extSubURI);
8427 ctxt->inSubset = 0;
8428 ctxt->instate = XML_PARSER_PROLOG;
8429#ifdef DEBUG_PUSH
8430 xmlGenericError(xmlGenericErrorContext,
8431 "PP: entering PROLOG\n");
8432#endif
8433 }
8434 } else if ((cur == '<') && (next == '!') &&
8435 (avail < 9)) {
8436 goto done;
8437 } else {
8438 ctxt->instate = XML_PARSER_START_TAG;
8439#ifdef DEBUG_PUSH
8440 xmlGenericError(xmlGenericErrorContext,
8441 "PP: entering START_TAG\n");
8442#endif
8443 }
8444 break;
8445 case XML_PARSER_IGNORE:
8446 xmlGenericError(xmlGenericErrorContext,
8447 "PP: internal error, state == IGNORE");
8448 ctxt->instate = XML_PARSER_DTD;
8449#ifdef DEBUG_PUSH
8450 xmlGenericError(xmlGenericErrorContext,
8451 "PP: entering DTD\n");
8452#endif
8453 break;
8454 case XML_PARSER_PROLOG:
8455 SKIP_BLANKS;
8456 if (ctxt->input->buf == NULL)
8457 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8458 else
8459 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8460 if (avail < 2)
8461 goto done;
8462 cur = ctxt->input->cur[0];
8463 next = ctxt->input->cur[1];
8464 if ((cur == '<') && (next == '?')) {
8465 if ((!terminate) &&
8466 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8467 goto done;
8468#ifdef DEBUG_PUSH
8469 xmlGenericError(xmlGenericErrorContext,
8470 "PP: Parsing PI\n");
8471#endif
8472 xmlParsePI(ctxt);
8473 } else if ((cur == '<') && (next == '!') &&
8474 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8475 if ((!terminate) &&
8476 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8477 goto done;
8478#ifdef DEBUG_PUSH
8479 xmlGenericError(xmlGenericErrorContext,
8480 "PP: Parsing Comment\n");
8481#endif
8482 xmlParseComment(ctxt);
8483 ctxt->instate = XML_PARSER_PROLOG;
8484 } else if ((cur == '<') && (next == '!') &&
8485 (avail < 4)) {
8486 goto done;
8487 } else {
8488 ctxt->instate = XML_PARSER_START_TAG;
8489#ifdef DEBUG_PUSH
8490 xmlGenericError(xmlGenericErrorContext,
8491 "PP: entering START_TAG\n");
8492#endif
8493 }
8494 break;
8495 case XML_PARSER_EPILOG:
8496 SKIP_BLANKS;
8497 if (ctxt->input->buf == NULL)
8498 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8499 else
8500 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8501 if (avail < 2)
8502 goto done;
8503 cur = ctxt->input->cur[0];
8504 next = ctxt->input->cur[1];
8505 if ((cur == '<') && (next == '?')) {
8506 if ((!terminate) &&
8507 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8508 goto done;
8509#ifdef DEBUG_PUSH
8510 xmlGenericError(xmlGenericErrorContext,
8511 "PP: Parsing PI\n");
8512#endif
8513 xmlParsePI(ctxt);
8514 ctxt->instate = XML_PARSER_EPILOG;
8515 } else if ((cur == '<') && (next == '!') &&
8516 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8517 if ((!terminate) &&
8518 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8519 goto done;
8520#ifdef DEBUG_PUSH
8521 xmlGenericError(xmlGenericErrorContext,
8522 "PP: Parsing Comment\n");
8523#endif
8524 xmlParseComment(ctxt);
8525 ctxt->instate = XML_PARSER_EPILOG;
8526 } else if ((cur == '<') && (next == '!') &&
8527 (avail < 4)) {
8528 goto done;
8529 } else {
8530 ctxt->errNo = XML_ERR_DOCUMENT_END;
8531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8532 ctxt->sax->error(ctxt->userData,
8533 "Extra content at the end of the document\n");
8534 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008535 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008536 ctxt->instate = XML_PARSER_EOF;
8537#ifdef DEBUG_PUSH
8538 xmlGenericError(xmlGenericErrorContext,
8539 "PP: entering EOF\n");
8540#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008541 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008542 ctxt->sax->endDocument(ctxt->userData);
8543 goto done;
8544 }
8545 break;
8546 case XML_PARSER_START_TAG: {
8547 xmlChar *name, *oldname;
8548
8549 if ((avail < 2) && (ctxt->inputNr == 1))
8550 goto done;
8551 cur = ctxt->input->cur[0];
8552 if (cur != '<') {
8553 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8555 ctxt->sax->error(ctxt->userData,
8556 "Start tag expect, '<' not found\n");
8557 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008558 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008559 ctxt->instate = XML_PARSER_EOF;
8560#ifdef DEBUG_PUSH
8561 xmlGenericError(xmlGenericErrorContext,
8562 "PP: entering EOF\n");
8563#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008564 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008565 ctxt->sax->endDocument(ctxt->userData);
8566 goto done;
8567 }
8568 if ((!terminate) &&
8569 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8570 goto done;
8571 if (ctxt->spaceNr == 0)
8572 spacePush(ctxt, -1);
8573 else
8574 spacePush(ctxt, *ctxt->space);
8575 name = xmlParseStartTag(ctxt);
8576 if (name == NULL) {
8577 spacePop(ctxt);
8578 ctxt->instate = XML_PARSER_EOF;
8579#ifdef DEBUG_PUSH
8580 xmlGenericError(xmlGenericErrorContext,
8581 "PP: entering EOF\n");
8582#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008583 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008584 ctxt->sax->endDocument(ctxt->userData);
8585 goto done;
8586 }
8587 namePush(ctxt, xmlStrdup(name));
8588
8589 /*
8590 * [ VC: Root Element Type ]
8591 * The Name in the document type declaration must match
8592 * the element type of the root element.
8593 */
8594 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8595 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8596 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8597
8598 /*
8599 * Check for an Empty Element.
8600 */
8601 if ((RAW == '/') && (NXT(1) == '>')) {
8602 SKIP(2);
8603 if ((ctxt->sax != NULL) &&
8604 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8605 ctxt->sax->endElement(ctxt->userData, name);
8606 xmlFree(name);
8607 oldname = namePop(ctxt);
8608 spacePop(ctxt);
8609 if (oldname != NULL) {
8610#ifdef DEBUG_STACK
8611 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8612#endif
8613 xmlFree(oldname);
8614 }
8615 if (ctxt->name == NULL) {
8616 ctxt->instate = XML_PARSER_EPILOG;
8617#ifdef DEBUG_PUSH
8618 xmlGenericError(xmlGenericErrorContext,
8619 "PP: entering EPILOG\n");
8620#endif
8621 } else {
8622 ctxt->instate = XML_PARSER_CONTENT;
8623#ifdef DEBUG_PUSH
8624 xmlGenericError(xmlGenericErrorContext,
8625 "PP: entering CONTENT\n");
8626#endif
8627 }
8628 break;
8629 }
8630 if (RAW == '>') {
8631 NEXT;
8632 } else {
8633 ctxt->errNo = XML_ERR_GT_REQUIRED;
8634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8635 ctxt->sax->error(ctxt->userData,
8636 "Couldn't find end of Start Tag %s\n",
8637 name);
8638 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008639 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008640
8641 /*
8642 * end of parsing of this node.
8643 */
8644 nodePop(ctxt);
8645 oldname = namePop(ctxt);
8646 spacePop(ctxt);
8647 if (oldname != NULL) {
8648#ifdef DEBUG_STACK
8649 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8650#endif
8651 xmlFree(oldname);
8652 }
8653 }
8654 xmlFree(name);
8655 ctxt->instate = XML_PARSER_CONTENT;
8656#ifdef DEBUG_PUSH
8657 xmlGenericError(xmlGenericErrorContext,
8658 "PP: entering CONTENT\n");
8659#endif
8660 break;
8661 }
8662 case XML_PARSER_CONTENT: {
8663 const xmlChar *test;
8664 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008665 if ((avail < 2) && (ctxt->inputNr == 1))
8666 goto done;
8667 cur = ctxt->input->cur[0];
8668 next = ctxt->input->cur[1];
8669
8670 test = CUR_PTR;
8671 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008672 if ((cur == '<') && (next == '?')) {
8673 if ((!terminate) &&
8674 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8675 goto done;
8676#ifdef DEBUG_PUSH
8677 xmlGenericError(xmlGenericErrorContext,
8678 "PP: Parsing PI\n");
8679#endif
8680 xmlParsePI(ctxt);
8681 } else if ((cur == '<') && (next == '!') &&
8682 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8683 if ((!terminate) &&
8684 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8685 goto done;
8686#ifdef DEBUG_PUSH
8687 xmlGenericError(xmlGenericErrorContext,
8688 "PP: Parsing Comment\n");
8689#endif
8690 xmlParseComment(ctxt);
8691 ctxt->instate = XML_PARSER_CONTENT;
8692 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8693 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8694 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8695 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8696 (ctxt->input->cur[8] == '[')) {
8697 SKIP(9);
8698 ctxt->instate = XML_PARSER_CDATA_SECTION;
8699#ifdef DEBUG_PUSH
8700 xmlGenericError(xmlGenericErrorContext,
8701 "PP: entering CDATA_SECTION\n");
8702#endif
8703 break;
8704 } else if ((cur == '<') && (next == '!') &&
8705 (avail < 9)) {
8706 goto done;
8707 } else if ((cur == '<') && (next == '/')) {
8708 ctxt->instate = XML_PARSER_END_TAG;
8709#ifdef DEBUG_PUSH
8710 xmlGenericError(xmlGenericErrorContext,
8711 "PP: entering END_TAG\n");
8712#endif
8713 break;
8714 } else if (cur == '<') {
8715 ctxt->instate = XML_PARSER_START_TAG;
8716#ifdef DEBUG_PUSH
8717 xmlGenericError(xmlGenericErrorContext,
8718 "PP: entering START_TAG\n");
8719#endif
8720 break;
8721 } else if (cur == '&') {
8722 if ((!terminate) &&
8723 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8724 goto done;
8725#ifdef DEBUG_PUSH
8726 xmlGenericError(xmlGenericErrorContext,
8727 "PP: Parsing Reference\n");
8728#endif
8729 xmlParseReference(ctxt);
8730 } else {
8731 /* TODO Avoid the extra copy, handle directly !!! */
8732 /*
8733 * Goal of the following test is:
8734 * - minimize calls to the SAX 'character' callback
8735 * when they are mergeable
8736 * - handle an problem for isBlank when we only parse
8737 * a sequence of blank chars and the next one is
8738 * not available to check against '<' presence.
8739 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008740 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008741 * of the parser.
8742 */
8743 if ((ctxt->inputNr == 1) &&
8744 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8745 if ((!terminate) &&
8746 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8747 goto done;
8748 }
8749 ctxt->checkIndex = 0;
8750#ifdef DEBUG_PUSH
8751 xmlGenericError(xmlGenericErrorContext,
8752 "PP: Parsing char data\n");
8753#endif
8754 xmlParseCharData(ctxt, 0);
8755 }
8756 /*
8757 * Pop-up of finished entities.
8758 */
8759 while ((RAW == 0) && (ctxt->inputNr > 1))
8760 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008761 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008762 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8764 ctxt->sax->error(ctxt->userData,
8765 "detected an error in element content\n");
8766 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008767 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008768 ctxt->instate = XML_PARSER_EOF;
8769 break;
8770 }
8771 break;
8772 }
8773 case XML_PARSER_CDATA_SECTION: {
8774 /*
8775 * The Push mode need to have the SAX callback for
8776 * cdataBlock merge back contiguous callbacks.
8777 */
8778 int base;
8779
8780 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8781 if (base < 0) {
8782 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8783 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8784 if (ctxt->sax->cdataBlock != NULL)
8785 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8786 XML_PARSER_BIG_BUFFER_SIZE);
8787 }
8788 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8789 ctxt->checkIndex = 0;
8790 }
8791 goto done;
8792 } else {
8793 if ((ctxt->sax != NULL) && (base > 0) &&
8794 (!ctxt->disableSAX)) {
8795 if (ctxt->sax->cdataBlock != NULL)
8796 ctxt->sax->cdataBlock(ctxt->userData,
8797 ctxt->input->cur, base);
8798 }
8799 SKIP(base + 3);
8800 ctxt->checkIndex = 0;
8801 ctxt->instate = XML_PARSER_CONTENT;
8802#ifdef DEBUG_PUSH
8803 xmlGenericError(xmlGenericErrorContext,
8804 "PP: entering CONTENT\n");
8805#endif
8806 }
8807 break;
8808 }
8809 case XML_PARSER_END_TAG:
8810 if (avail < 2)
8811 goto done;
8812 if ((!terminate) &&
8813 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8814 goto done;
8815 xmlParseEndTag(ctxt);
8816 if (ctxt->name == NULL) {
8817 ctxt->instate = XML_PARSER_EPILOG;
8818#ifdef DEBUG_PUSH
8819 xmlGenericError(xmlGenericErrorContext,
8820 "PP: entering EPILOG\n");
8821#endif
8822 } else {
8823 ctxt->instate = XML_PARSER_CONTENT;
8824#ifdef DEBUG_PUSH
8825 xmlGenericError(xmlGenericErrorContext,
8826 "PP: entering CONTENT\n");
8827#endif
8828 }
8829 break;
8830 case XML_PARSER_DTD: {
8831 /*
8832 * Sorry but progressive parsing of the internal subset
8833 * is not expected to be supported. We first check that
8834 * the full content of the internal subset is available and
8835 * the parsing is launched only at that point.
8836 * Internal subset ends up with "']' S? '>'" in an unescaped
8837 * section and not in a ']]>' sequence which are conditional
8838 * sections (whoever argued to keep that crap in XML deserve
8839 * a place in hell !).
8840 */
8841 int base, i;
8842 xmlChar *buf;
8843 xmlChar quote = 0;
8844
8845 base = ctxt->input->cur - ctxt->input->base;
8846 if (base < 0) return(0);
8847 if (ctxt->checkIndex > base)
8848 base = ctxt->checkIndex;
8849 buf = ctxt->input->buf->buffer->content;
8850 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8851 base++) {
8852 if (quote != 0) {
8853 if (buf[base] == quote)
8854 quote = 0;
8855 continue;
8856 }
8857 if (buf[base] == '"') {
8858 quote = '"';
8859 continue;
8860 }
8861 if (buf[base] == '\'') {
8862 quote = '\'';
8863 continue;
8864 }
8865 if (buf[base] == ']') {
8866 if ((unsigned int) base +1 >=
8867 ctxt->input->buf->buffer->use)
8868 break;
8869 if (buf[base + 1] == ']') {
8870 /* conditional crap, skip both ']' ! */
8871 base++;
8872 continue;
8873 }
8874 for (i = 0;
8875 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8876 i++) {
8877 if (buf[base + i] == '>')
8878 goto found_end_int_subset;
8879 }
8880 break;
8881 }
8882 }
8883 /*
8884 * We didn't found the end of the Internal subset
8885 */
8886 if (quote == 0)
8887 ctxt->checkIndex = base;
8888#ifdef DEBUG_PUSH
8889 if (next == 0)
8890 xmlGenericError(xmlGenericErrorContext,
8891 "PP: lookup of int subset end filed\n");
8892#endif
8893 goto done;
8894
8895found_end_int_subset:
8896 xmlParseInternalSubset(ctxt);
8897 ctxt->inSubset = 2;
8898 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8899 (ctxt->sax->externalSubset != NULL))
8900 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8901 ctxt->extSubSystem, ctxt->extSubURI);
8902 ctxt->inSubset = 0;
8903 ctxt->instate = XML_PARSER_PROLOG;
8904 ctxt->checkIndex = 0;
8905#ifdef DEBUG_PUSH
8906 xmlGenericError(xmlGenericErrorContext,
8907 "PP: entering PROLOG\n");
8908#endif
8909 break;
8910 }
8911 case XML_PARSER_COMMENT:
8912 xmlGenericError(xmlGenericErrorContext,
8913 "PP: internal error, state == COMMENT\n");
8914 ctxt->instate = XML_PARSER_CONTENT;
8915#ifdef DEBUG_PUSH
8916 xmlGenericError(xmlGenericErrorContext,
8917 "PP: entering CONTENT\n");
8918#endif
8919 break;
8920 case XML_PARSER_PI:
8921 xmlGenericError(xmlGenericErrorContext,
8922 "PP: internal error, state == PI\n");
8923 ctxt->instate = XML_PARSER_CONTENT;
8924#ifdef DEBUG_PUSH
8925 xmlGenericError(xmlGenericErrorContext,
8926 "PP: entering CONTENT\n");
8927#endif
8928 break;
8929 case XML_PARSER_ENTITY_DECL:
8930 xmlGenericError(xmlGenericErrorContext,
8931 "PP: internal error, state == ENTITY_DECL\n");
8932 ctxt->instate = XML_PARSER_DTD;
8933#ifdef DEBUG_PUSH
8934 xmlGenericError(xmlGenericErrorContext,
8935 "PP: entering DTD\n");
8936#endif
8937 break;
8938 case XML_PARSER_ENTITY_VALUE:
8939 xmlGenericError(xmlGenericErrorContext,
8940 "PP: internal error, state == ENTITY_VALUE\n");
8941 ctxt->instate = XML_PARSER_CONTENT;
8942#ifdef DEBUG_PUSH
8943 xmlGenericError(xmlGenericErrorContext,
8944 "PP: entering DTD\n");
8945#endif
8946 break;
8947 case XML_PARSER_ATTRIBUTE_VALUE:
8948 xmlGenericError(xmlGenericErrorContext,
8949 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8950 ctxt->instate = XML_PARSER_START_TAG;
8951#ifdef DEBUG_PUSH
8952 xmlGenericError(xmlGenericErrorContext,
8953 "PP: entering START_TAG\n");
8954#endif
8955 break;
8956 case XML_PARSER_SYSTEM_LITERAL:
8957 xmlGenericError(xmlGenericErrorContext,
8958 "PP: internal error, state == SYSTEM_LITERAL\n");
8959 ctxt->instate = XML_PARSER_START_TAG;
8960#ifdef DEBUG_PUSH
8961 xmlGenericError(xmlGenericErrorContext,
8962 "PP: entering START_TAG\n");
8963#endif
8964 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008965 case XML_PARSER_PUBLIC_LITERAL:
8966 xmlGenericError(xmlGenericErrorContext,
8967 "PP: internal error, state == PUBLIC_LITERAL\n");
8968 ctxt->instate = XML_PARSER_START_TAG;
8969#ifdef DEBUG_PUSH
8970 xmlGenericError(xmlGenericErrorContext,
8971 "PP: entering START_TAG\n");
8972#endif
8973 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008974 }
8975 }
8976done:
8977#ifdef DEBUG_PUSH
8978 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8979#endif
8980 return(ret);
8981}
8982
8983/**
Owen Taylor3473f882001-02-23 17:55:21 +00008984 * xmlParseChunk:
8985 * @ctxt: an XML parser context
8986 * @chunk: an char array
8987 * @size: the size in byte of the chunk
8988 * @terminate: last chunk indicator
8989 *
8990 * Parse a Chunk of memory
8991 *
8992 * Returns zero if no error, the xmlParserErrors otherwise.
8993 */
8994int
8995xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8996 int terminate) {
8997 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8998 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8999 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9000 int cur = ctxt->input->cur - ctxt->input->base;
9001
9002 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9003 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9004 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009005 ctxt->input->end =
9006 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009007#ifdef DEBUG_PUSH
9008 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9009#endif
9010
9011 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9012 xmlParseTryOrFinish(ctxt, terminate);
9013 } else if (ctxt->instate != XML_PARSER_EOF) {
9014 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9015 xmlParserInputBufferPtr in = ctxt->input->buf;
9016 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9017 (in->raw != NULL)) {
9018 int nbchars;
9019
9020 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9021 if (nbchars < 0) {
9022 xmlGenericError(xmlGenericErrorContext,
9023 "xmlParseChunk: encoder error\n");
9024 return(XML_ERR_INVALID_ENCODING);
9025 }
9026 }
9027 }
9028 }
9029 xmlParseTryOrFinish(ctxt, terminate);
9030 if (terminate) {
9031 /*
9032 * Check for termination
9033 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009034 int avail = 0;
9035 if (ctxt->input->buf == NULL)
9036 avail = ctxt->input->length -
9037 (ctxt->input->cur - ctxt->input->base);
9038 else
9039 avail = ctxt->input->buf->buffer->use -
9040 (ctxt->input->cur - ctxt->input->base);
9041
Owen Taylor3473f882001-02-23 17:55:21 +00009042 if ((ctxt->instate != XML_PARSER_EOF) &&
9043 (ctxt->instate != XML_PARSER_EPILOG)) {
9044 ctxt->errNo = XML_ERR_DOCUMENT_END;
9045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9046 ctxt->sax->error(ctxt->userData,
9047 "Extra content at the end of the document\n");
9048 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009049 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009050 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009051 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9052 ctxt->errNo = XML_ERR_DOCUMENT_END;
9053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9054 ctxt->sax->error(ctxt->userData,
9055 "Extra content at the end of the document\n");
9056 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009058
9059 }
Owen Taylor3473f882001-02-23 17:55:21 +00009060 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009061 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009062 ctxt->sax->endDocument(ctxt->userData);
9063 }
9064 ctxt->instate = XML_PARSER_EOF;
9065 }
9066 return((xmlParserErrors) ctxt->errNo);
9067}
9068
9069/************************************************************************
9070 * *
9071 * I/O front end functions to the parser *
9072 * *
9073 ************************************************************************/
9074
9075/**
9076 * xmlStopParser:
9077 * @ctxt: an XML parser context
9078 *
9079 * Blocks further parser processing
9080 */
9081void
9082xmlStopParser(xmlParserCtxtPtr ctxt) {
9083 ctxt->instate = XML_PARSER_EOF;
9084 if (ctxt->input != NULL)
9085 ctxt->input->cur = BAD_CAST"";
9086}
9087
9088/**
9089 * xmlCreatePushParserCtxt:
9090 * @sax: a SAX handler
9091 * @user_data: The user data returned on SAX callbacks
9092 * @chunk: a pointer to an array of chars
9093 * @size: number of chars in the array
9094 * @filename: an optional file name or URI
9095 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009096 * Create a parser context for using the XML parser in push mode.
9097 * If @buffer and @size are non-NULL, the data is used to detect
9098 * the encoding. The remaining characters will be parsed so they
9099 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009100 * To allow content encoding detection, @size should be >= 4
9101 * The value of @filename is used for fetching external entities
9102 * and error/warning reports.
9103 *
9104 * Returns the new parser context or NULL
9105 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009106
Owen Taylor3473f882001-02-23 17:55:21 +00009107xmlParserCtxtPtr
9108xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9109 const char *chunk, int size, const char *filename) {
9110 xmlParserCtxtPtr ctxt;
9111 xmlParserInputPtr inputStream;
9112 xmlParserInputBufferPtr buf;
9113 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9114
9115 /*
9116 * plug some encoding conversion routines
9117 */
9118 if ((chunk != NULL) && (size >= 4))
9119 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9120
9121 buf = xmlAllocParserInputBuffer(enc);
9122 if (buf == NULL) return(NULL);
9123
9124 ctxt = xmlNewParserCtxt();
9125 if (ctxt == NULL) {
9126 xmlFree(buf);
9127 return(NULL);
9128 }
9129 if (sax != NULL) {
9130 if (ctxt->sax != &xmlDefaultSAXHandler)
9131 xmlFree(ctxt->sax);
9132 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9133 if (ctxt->sax == NULL) {
9134 xmlFree(buf);
9135 xmlFree(ctxt);
9136 return(NULL);
9137 }
9138 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9139 if (user_data != NULL)
9140 ctxt->userData = user_data;
9141 }
9142 if (filename == NULL) {
9143 ctxt->directory = NULL;
9144 } else {
9145 ctxt->directory = xmlParserGetDirectory(filename);
9146 }
9147
9148 inputStream = xmlNewInputStream(ctxt);
9149 if (inputStream == NULL) {
9150 xmlFreeParserCtxt(ctxt);
9151 return(NULL);
9152 }
9153
9154 if (filename == NULL)
9155 inputStream->filename = NULL;
9156 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009157 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009158 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009159 inputStream->buf = buf;
9160 inputStream->base = inputStream->buf->buffer->content;
9161 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009162 inputStream->end =
9163 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009164
9165 inputPush(ctxt, inputStream);
9166
9167 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9168 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009169 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9170 int cur = ctxt->input->cur - ctxt->input->base;
9171
Owen Taylor3473f882001-02-23 17:55:21 +00009172 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009173
9174 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9175 ctxt->input->cur = ctxt->input->base + cur;
9176 ctxt->input->end =
9177 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009178#ifdef DEBUG_PUSH
9179 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9180#endif
9181 }
9182
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009183 if (enc != XML_CHAR_ENCODING_NONE) {
9184 xmlSwitchEncoding(ctxt, enc);
9185 }
9186
Owen Taylor3473f882001-02-23 17:55:21 +00009187 return(ctxt);
9188}
9189
9190/**
9191 * xmlCreateIOParserCtxt:
9192 * @sax: a SAX handler
9193 * @user_data: The user data returned on SAX callbacks
9194 * @ioread: an I/O read function
9195 * @ioclose: an I/O close function
9196 * @ioctx: an I/O handler
9197 * @enc: the charset encoding if known
9198 *
9199 * Create a parser context for using the XML parser with an existing
9200 * I/O stream
9201 *
9202 * Returns the new parser context or NULL
9203 */
9204xmlParserCtxtPtr
9205xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9206 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9207 void *ioctx, xmlCharEncoding enc) {
9208 xmlParserCtxtPtr ctxt;
9209 xmlParserInputPtr inputStream;
9210 xmlParserInputBufferPtr buf;
9211
9212 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9213 if (buf == NULL) return(NULL);
9214
9215 ctxt = xmlNewParserCtxt();
9216 if (ctxt == NULL) {
9217 xmlFree(buf);
9218 return(NULL);
9219 }
9220 if (sax != NULL) {
9221 if (ctxt->sax != &xmlDefaultSAXHandler)
9222 xmlFree(ctxt->sax);
9223 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9224 if (ctxt->sax == NULL) {
9225 xmlFree(buf);
9226 xmlFree(ctxt);
9227 return(NULL);
9228 }
9229 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9230 if (user_data != NULL)
9231 ctxt->userData = user_data;
9232 }
9233
9234 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9235 if (inputStream == NULL) {
9236 xmlFreeParserCtxt(ctxt);
9237 return(NULL);
9238 }
9239 inputPush(ctxt, inputStream);
9240
9241 return(ctxt);
9242}
9243
9244/************************************************************************
9245 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009246 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009247 * *
9248 ************************************************************************/
9249
9250/**
9251 * xmlIOParseDTD:
9252 * @sax: the SAX handler block or NULL
9253 * @input: an Input Buffer
9254 * @enc: the charset encoding if known
9255 *
9256 * Load and parse a DTD
9257 *
9258 * Returns the resulting xmlDtdPtr or NULL in case of error.
9259 * @input will be freed at parsing end.
9260 */
9261
9262xmlDtdPtr
9263xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9264 xmlCharEncoding enc) {
9265 xmlDtdPtr ret = NULL;
9266 xmlParserCtxtPtr ctxt;
9267 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009268 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009269
9270 if (input == NULL)
9271 return(NULL);
9272
9273 ctxt = xmlNewParserCtxt();
9274 if (ctxt == NULL) {
9275 return(NULL);
9276 }
9277
9278 /*
9279 * Set-up the SAX context
9280 */
9281 if (sax != NULL) {
9282 if (ctxt->sax != NULL)
9283 xmlFree(ctxt->sax);
9284 ctxt->sax = sax;
9285 ctxt->userData = NULL;
9286 }
9287
9288 /*
9289 * generate a parser input from the I/O handler
9290 */
9291
9292 pinput = xmlNewIOInputStream(ctxt, input, enc);
9293 if (pinput == NULL) {
9294 if (sax != NULL) ctxt->sax = NULL;
9295 xmlFreeParserCtxt(ctxt);
9296 return(NULL);
9297 }
9298
9299 /*
9300 * plug some encoding conversion routines here.
9301 */
9302 xmlPushInput(ctxt, pinput);
9303
9304 pinput->filename = NULL;
9305 pinput->line = 1;
9306 pinput->col = 1;
9307 pinput->base = ctxt->input->cur;
9308 pinput->cur = ctxt->input->cur;
9309 pinput->free = NULL;
9310
9311 /*
9312 * let's parse that entity knowing it's an external subset.
9313 */
9314 ctxt->inSubset = 2;
9315 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9316 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9317 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009318
9319 if (enc == XML_CHAR_ENCODING_NONE) {
9320 /*
9321 * Get the 4 first bytes and decode the charset
9322 * if enc != XML_CHAR_ENCODING_NONE
9323 * plug some encoding conversion routines.
9324 */
9325 start[0] = RAW;
9326 start[1] = NXT(1);
9327 start[2] = NXT(2);
9328 start[3] = NXT(3);
9329 enc = xmlDetectCharEncoding(start, 4);
9330 if (enc != XML_CHAR_ENCODING_NONE) {
9331 xmlSwitchEncoding(ctxt, enc);
9332 }
9333 }
9334
Owen Taylor3473f882001-02-23 17:55:21 +00009335 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9336
9337 if (ctxt->myDoc != NULL) {
9338 if (ctxt->wellFormed) {
9339 ret = ctxt->myDoc->extSubset;
9340 ctxt->myDoc->extSubset = NULL;
9341 } else {
9342 ret = NULL;
9343 }
9344 xmlFreeDoc(ctxt->myDoc);
9345 ctxt->myDoc = NULL;
9346 }
9347 if (sax != NULL) ctxt->sax = NULL;
9348 xmlFreeParserCtxt(ctxt);
9349
9350 return(ret);
9351}
9352
9353/**
9354 * xmlSAXParseDTD:
9355 * @sax: the SAX handler block
9356 * @ExternalID: a NAME* containing the External ID of the DTD
9357 * @SystemID: a NAME* containing the URL to the DTD
9358 *
9359 * Load and parse an external subset.
9360 *
9361 * Returns the resulting xmlDtdPtr or NULL in case of error.
9362 */
9363
9364xmlDtdPtr
9365xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9366 const xmlChar *SystemID) {
9367 xmlDtdPtr ret = NULL;
9368 xmlParserCtxtPtr ctxt;
9369 xmlParserInputPtr input = NULL;
9370 xmlCharEncoding enc;
9371
9372 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9373
9374 ctxt = xmlNewParserCtxt();
9375 if (ctxt == NULL) {
9376 return(NULL);
9377 }
9378
9379 /*
9380 * Set-up the SAX context
9381 */
9382 if (sax != NULL) {
9383 if (ctxt->sax != NULL)
9384 xmlFree(ctxt->sax);
9385 ctxt->sax = sax;
9386 ctxt->userData = NULL;
9387 }
9388
9389 /*
9390 * Ask the Entity resolver to load the damn thing
9391 */
9392
9393 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9394 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9395 if (input == NULL) {
9396 if (sax != NULL) ctxt->sax = NULL;
9397 xmlFreeParserCtxt(ctxt);
9398 return(NULL);
9399 }
9400
9401 /*
9402 * plug some encoding conversion routines here.
9403 */
9404 xmlPushInput(ctxt, input);
9405 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9406 xmlSwitchEncoding(ctxt, enc);
9407
9408 if (input->filename == NULL)
9409 input->filename = (char *) xmlStrdup(SystemID);
9410 input->line = 1;
9411 input->col = 1;
9412 input->base = ctxt->input->cur;
9413 input->cur = ctxt->input->cur;
9414 input->free = NULL;
9415
9416 /*
9417 * let's parse that entity knowing it's an external subset.
9418 */
9419 ctxt->inSubset = 2;
9420 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9421 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9422 ExternalID, SystemID);
9423 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9424
9425 if (ctxt->myDoc != NULL) {
9426 if (ctxt->wellFormed) {
9427 ret = ctxt->myDoc->extSubset;
9428 ctxt->myDoc->extSubset = NULL;
9429 } else {
9430 ret = NULL;
9431 }
9432 xmlFreeDoc(ctxt->myDoc);
9433 ctxt->myDoc = NULL;
9434 }
9435 if (sax != NULL) ctxt->sax = NULL;
9436 xmlFreeParserCtxt(ctxt);
9437
9438 return(ret);
9439}
9440
9441/**
9442 * xmlParseDTD:
9443 * @ExternalID: a NAME* containing the External ID of the DTD
9444 * @SystemID: a NAME* containing the URL to the DTD
9445 *
9446 * Load and parse an external subset.
9447 *
9448 * Returns the resulting xmlDtdPtr or NULL in case of error.
9449 */
9450
9451xmlDtdPtr
9452xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9453 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9454}
9455
9456/************************************************************************
9457 * *
9458 * Front ends when parsing an Entity *
9459 * *
9460 ************************************************************************/
9461
9462/**
Owen Taylor3473f882001-02-23 17:55:21 +00009463 * xmlParseCtxtExternalEntity:
9464 * @ctx: the existing parsing context
9465 * @URL: the URL for the entity to load
9466 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009467 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009468 *
9469 * Parse an external general entity within an existing parsing context
9470 * An external general parsed entity is well-formed if it matches the
9471 * production labeled extParsedEnt.
9472 *
9473 * [78] extParsedEnt ::= TextDecl? content
9474 *
9475 * Returns 0 if the entity is well formed, -1 in case of args problem and
9476 * the parser error code otherwise
9477 */
9478
9479int
9480xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009481 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009482 xmlParserCtxtPtr ctxt;
9483 xmlDocPtr newDoc;
9484 xmlSAXHandlerPtr oldsax = NULL;
9485 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009486 xmlChar start[4];
9487 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009488
9489 if (ctx->depth > 40) {
9490 return(XML_ERR_ENTITY_LOOP);
9491 }
9492
Daniel Veillardcda96922001-08-21 10:56:31 +00009493 if (lst != NULL)
9494 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009495 if ((URL == NULL) && (ID == NULL))
9496 return(-1);
9497 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9498 return(-1);
9499
9500
9501 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9502 if (ctxt == NULL) return(-1);
9503 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +00009504 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +00009505 oldsax = ctxt->sax;
9506 ctxt->sax = ctx->sax;
9507 newDoc = xmlNewDoc(BAD_CAST "1.0");
9508 if (newDoc == NULL) {
9509 xmlFreeParserCtxt(ctxt);
9510 return(-1);
9511 }
9512 if (ctx->myDoc != NULL) {
9513 newDoc->intSubset = ctx->myDoc->intSubset;
9514 newDoc->extSubset = ctx->myDoc->extSubset;
9515 }
9516 if (ctx->myDoc->URL != NULL) {
9517 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9518 }
9519 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9520 if (newDoc->children == NULL) {
9521 ctxt->sax = oldsax;
9522 xmlFreeParserCtxt(ctxt);
9523 newDoc->intSubset = NULL;
9524 newDoc->extSubset = NULL;
9525 xmlFreeDoc(newDoc);
9526 return(-1);
9527 }
9528 nodePush(ctxt, newDoc->children);
9529 if (ctx->myDoc == NULL) {
9530 ctxt->myDoc = newDoc;
9531 } else {
9532 ctxt->myDoc = ctx->myDoc;
9533 newDoc->children->doc = ctx->myDoc;
9534 }
9535
Daniel Veillard87a764e2001-06-20 17:41:10 +00009536 /*
9537 * Get the 4 first bytes and decode the charset
9538 * if enc != XML_CHAR_ENCODING_NONE
9539 * plug some encoding conversion routines.
9540 */
9541 GROW
9542 start[0] = RAW;
9543 start[1] = NXT(1);
9544 start[2] = NXT(2);
9545 start[3] = NXT(3);
9546 enc = xmlDetectCharEncoding(start, 4);
9547 if (enc != XML_CHAR_ENCODING_NONE) {
9548 xmlSwitchEncoding(ctxt, enc);
9549 }
9550
Owen Taylor3473f882001-02-23 17:55:21 +00009551 /*
9552 * Parse a possible text declaration first
9553 */
Owen Taylor3473f882001-02-23 17:55:21 +00009554 if ((RAW == '<') && (NXT(1) == '?') &&
9555 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9556 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9557 xmlParseTextDecl(ctxt);
9558 }
9559
9560 /*
9561 * Doing validity checking on chunk doesn't make sense
9562 */
9563 ctxt->instate = XML_PARSER_CONTENT;
9564 ctxt->validate = ctx->validate;
9565 ctxt->loadsubset = ctx->loadsubset;
9566 ctxt->depth = ctx->depth + 1;
9567 ctxt->replaceEntities = ctx->replaceEntities;
9568 if (ctxt->validate) {
9569 ctxt->vctxt.error = ctx->vctxt.error;
9570 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009571 } else {
9572 ctxt->vctxt.error = NULL;
9573 ctxt->vctxt.warning = NULL;
9574 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009575 ctxt->vctxt.nodeTab = NULL;
9576 ctxt->vctxt.nodeNr = 0;
9577 ctxt->vctxt.nodeMax = 0;
9578 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009579
9580 xmlParseContent(ctxt);
9581
9582 if ((RAW == '<') && (NXT(1) == '/')) {
9583 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9585 ctxt->sax->error(ctxt->userData,
9586 "chunk is not well balanced\n");
9587 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009588 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009589 } else if (RAW != 0) {
9590 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9592 ctxt->sax->error(ctxt->userData,
9593 "extra content at the end of well balanced chunk\n");
9594 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009595 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009596 }
9597 if (ctxt->node != newDoc->children) {
9598 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9600 ctxt->sax->error(ctxt->userData,
9601 "chunk is not well balanced\n");
9602 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009603 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009604 }
9605
9606 if (!ctxt->wellFormed) {
9607 if (ctxt->errNo == 0)
9608 ret = 1;
9609 else
9610 ret = ctxt->errNo;
9611 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009612 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009613 xmlNodePtr cur;
9614
9615 /*
9616 * Return the newly created nodeset after unlinking it from
9617 * they pseudo parent.
9618 */
9619 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009620 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009621 while (cur != NULL) {
9622 cur->parent = NULL;
9623 cur = cur->next;
9624 }
9625 newDoc->children->children = NULL;
9626 }
9627 ret = 0;
9628 }
9629 ctxt->sax = oldsax;
9630 xmlFreeParserCtxt(ctxt);
9631 newDoc->intSubset = NULL;
9632 newDoc->extSubset = NULL;
9633 xmlFreeDoc(newDoc);
9634
9635 return(ret);
9636}
9637
9638/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009639 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009640 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009641 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009642 * @sax: the SAX handler bloc (possibly NULL)
9643 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9644 * @depth: Used for loop detection, use 0
9645 * @URL: the URL for the entity to load
9646 * @ID: the System ID for the entity to load
9647 * @list: the return value for the set of parsed nodes
9648 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009649 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009650 *
9651 * Returns 0 if the entity is well formed, -1 in case of args problem and
9652 * the parser error code otherwise
9653 */
9654
Daniel Veillard257d9102001-05-08 10:41:44 +00009655static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009656xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9657 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009658 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009659 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009660 xmlParserCtxtPtr ctxt;
9661 xmlDocPtr newDoc;
9662 xmlSAXHandlerPtr oldsax = NULL;
9663 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009664 xmlChar start[4];
9665 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009666
9667 if (depth > 40) {
9668 return(XML_ERR_ENTITY_LOOP);
9669 }
9670
9671
9672
9673 if (list != NULL)
9674 *list = NULL;
9675 if ((URL == NULL) && (ID == NULL))
9676 return(-1);
9677 if (doc == NULL) /* @@ relax but check for dereferences */
9678 return(-1);
9679
9680
9681 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9682 if (ctxt == NULL) return(-1);
9683 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009684 if (oldctxt != NULL) {
9685 ctxt->_private = oldctxt->_private;
9686 ctxt->loadsubset = oldctxt->loadsubset;
9687 ctxt->validate = oldctxt->validate;
9688 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009689 ctxt->record_info = oldctxt->record_info;
9690 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
9691 ctxt->node_seq.length = oldctxt->node_seq.length;
9692 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009693 } else {
9694 /*
9695 * Doing validity checking on chunk without context
9696 * doesn't make sense
9697 */
9698 ctxt->_private = NULL;
9699 ctxt->validate = 0;
9700 ctxt->external = 2;
9701 ctxt->loadsubset = 0;
9702 }
Owen Taylor3473f882001-02-23 17:55:21 +00009703 if (sax != NULL) {
9704 oldsax = ctxt->sax;
9705 ctxt->sax = sax;
9706 if (user_data != NULL)
9707 ctxt->userData = user_data;
9708 }
9709 newDoc = xmlNewDoc(BAD_CAST "1.0");
9710 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009711 ctxt->node_seq.maximum = 0;
9712 ctxt->node_seq.length = 0;
9713 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009714 xmlFreeParserCtxt(ctxt);
9715 return(-1);
9716 }
9717 if (doc != NULL) {
9718 newDoc->intSubset = doc->intSubset;
9719 newDoc->extSubset = doc->extSubset;
9720 }
9721 if (doc->URL != NULL) {
9722 newDoc->URL = xmlStrdup(doc->URL);
9723 }
9724 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9725 if (newDoc->children == NULL) {
9726 if (sax != NULL)
9727 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009728 ctxt->node_seq.maximum = 0;
9729 ctxt->node_seq.length = 0;
9730 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009731 xmlFreeParserCtxt(ctxt);
9732 newDoc->intSubset = NULL;
9733 newDoc->extSubset = NULL;
9734 xmlFreeDoc(newDoc);
9735 return(-1);
9736 }
9737 nodePush(ctxt, newDoc->children);
9738 if (doc == NULL) {
9739 ctxt->myDoc = newDoc;
9740 } else {
9741 ctxt->myDoc = doc;
9742 newDoc->children->doc = doc;
9743 }
9744
Daniel Veillard87a764e2001-06-20 17:41:10 +00009745 /*
9746 * Get the 4 first bytes and decode the charset
9747 * if enc != XML_CHAR_ENCODING_NONE
9748 * plug some encoding conversion routines.
9749 */
9750 GROW;
9751 start[0] = RAW;
9752 start[1] = NXT(1);
9753 start[2] = NXT(2);
9754 start[3] = NXT(3);
9755 enc = xmlDetectCharEncoding(start, 4);
9756 if (enc != XML_CHAR_ENCODING_NONE) {
9757 xmlSwitchEncoding(ctxt, enc);
9758 }
9759
Owen Taylor3473f882001-02-23 17:55:21 +00009760 /*
9761 * Parse a possible text declaration first
9762 */
Owen Taylor3473f882001-02-23 17:55:21 +00009763 if ((RAW == '<') && (NXT(1) == '?') &&
9764 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9765 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9766 xmlParseTextDecl(ctxt);
9767 }
9768
Owen Taylor3473f882001-02-23 17:55:21 +00009769 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009770 ctxt->depth = depth;
9771
9772 xmlParseContent(ctxt);
9773
Daniel Veillard561b7f82002-03-20 21:55:57 +00009774 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009775 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9777 ctxt->sax->error(ctxt->userData,
9778 "chunk is not well balanced\n");
9779 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009780 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009781 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009782 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9784 ctxt->sax->error(ctxt->userData,
9785 "extra content at the end of well balanced chunk\n");
9786 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009787 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009788 }
9789 if (ctxt->node != newDoc->children) {
9790 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9792 ctxt->sax->error(ctxt->userData,
9793 "chunk is not well balanced\n");
9794 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009795 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009796 }
9797
9798 if (!ctxt->wellFormed) {
9799 if (ctxt->errNo == 0)
9800 ret = 1;
9801 else
9802 ret = ctxt->errNo;
9803 } else {
9804 if (list != NULL) {
9805 xmlNodePtr cur;
9806
9807 /*
9808 * Return the newly created nodeset after unlinking it from
9809 * they pseudo parent.
9810 */
9811 cur = newDoc->children->children;
9812 *list = cur;
9813 while (cur != NULL) {
9814 cur->parent = NULL;
9815 cur = cur->next;
9816 }
9817 newDoc->children->children = NULL;
9818 }
9819 ret = 0;
9820 }
9821 if (sax != NULL)
9822 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +00009823 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
9824 oldctxt->node_seq.length = ctxt->node_seq.length;
9825 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009826 ctxt->node_seq.maximum = 0;
9827 ctxt->node_seq.length = 0;
9828 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009829 xmlFreeParserCtxt(ctxt);
9830 newDoc->intSubset = NULL;
9831 newDoc->extSubset = NULL;
9832 xmlFreeDoc(newDoc);
9833
9834 return(ret);
9835}
9836
9837/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009838 * xmlParseExternalEntity:
9839 * @doc: the document the chunk pertains to
9840 * @sax: the SAX handler bloc (possibly NULL)
9841 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9842 * @depth: Used for loop detection, use 0
9843 * @URL: the URL for the entity to load
9844 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009845 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009846 *
9847 * Parse an external general entity
9848 * An external general parsed entity is well-formed if it matches the
9849 * production labeled extParsedEnt.
9850 *
9851 * [78] extParsedEnt ::= TextDecl? content
9852 *
9853 * Returns 0 if the entity is well formed, -1 in case of args problem and
9854 * the parser error code otherwise
9855 */
9856
9857int
9858xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009859 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009860 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009861 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009862}
9863
9864/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009865 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009866 * @doc: the document the chunk pertains to
9867 * @sax: the SAX handler bloc (possibly NULL)
9868 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9869 * @depth: Used for loop detection, use 0
9870 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009871 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009872 *
9873 * Parse a well-balanced chunk of an XML document
9874 * called by the parser
9875 * The allowed sequence for the Well Balanced Chunk is the one defined by
9876 * the content production in the XML grammar:
9877 *
9878 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9879 *
9880 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9881 * the parser error code otherwise
9882 */
9883
9884int
9885xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009886 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009887 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9888 depth, string, lst, 0 );
9889}
9890
9891/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009892 * xmlParseBalancedChunkMemoryInternal:
9893 * @oldctxt: the existing parsing context
9894 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9895 * @user_data: the user data field for the parser context
9896 * @lst: the return value for the set of parsed nodes
9897 *
9898 *
9899 * Parse a well-balanced chunk of an XML document
9900 * called by the parser
9901 * The allowed sequence for the Well Balanced Chunk is the one defined by
9902 * the content production in the XML grammar:
9903 *
9904 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9905 *
9906 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9907 * the parser error code otherwise
9908 *
9909 * In case recover is set to 1, the nodelist will not be empty even if
9910 * the parsed chunk is not well balanced.
9911 */
9912static int
9913xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9914 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9915 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009916 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009917 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009918 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009919 int size;
9920 int ret = 0;
9921
9922 if (oldctxt->depth > 40) {
9923 return(XML_ERR_ENTITY_LOOP);
9924 }
9925
9926
9927 if (lst != NULL)
9928 *lst = NULL;
9929 if (string == NULL)
9930 return(-1);
9931
9932 size = xmlStrlen(string);
9933
9934 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9935 if (ctxt == NULL) return(-1);
9936 if (user_data != NULL)
9937 ctxt->userData = user_data;
9938 else
9939 ctxt->userData = ctxt;
9940
9941 oldsax = ctxt->sax;
9942 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +00009943 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009944 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009945 newDoc = xmlNewDoc(BAD_CAST "1.0");
9946 if (newDoc == NULL) {
9947 ctxt->sax = oldsax;
9948 xmlFreeParserCtxt(ctxt);
9949 return(-1);
9950 }
Daniel Veillard328f48c2002-11-15 15:24:34 +00009951 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009952 } else {
9953 ctxt->myDoc = oldctxt->myDoc;
9954 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009955 }
Daniel Veillard9bc53102002-11-25 13:20:04 +00009956 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +00009957 BAD_CAST "pseudoroot", NULL);
9958 if (ctxt->myDoc->children == NULL) {
9959 ctxt->sax = oldsax;
9960 xmlFreeParserCtxt(ctxt);
9961 if (newDoc != NULL)
9962 xmlFreeDoc(newDoc);
9963 return(-1);
9964 }
9965 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009966 ctxt->instate = XML_PARSER_CONTENT;
9967 ctxt->depth = oldctxt->depth + 1;
9968
Daniel Veillard328f48c2002-11-15 15:24:34 +00009969 ctxt->validate = 0;
9970 ctxt->loadsubset = oldctxt->loadsubset;
9971
Daniel Veillard68e9e742002-11-16 15:35:11 +00009972 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009973 if ((RAW == '<') && (NXT(1) == '/')) {
9974 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9976 ctxt->sax->error(ctxt->userData,
9977 "chunk is not well balanced\n");
9978 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009979 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009980 } else if (RAW != 0) {
9981 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9983 ctxt->sax->error(ctxt->userData,
9984 "extra content at the end of well balanced chunk\n");
9985 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009986 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009987 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009988 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +00009989 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9991 ctxt->sax->error(ctxt->userData,
9992 "chunk is not well balanced\n");
9993 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009994 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009995 }
9996
9997 if (!ctxt->wellFormed) {
9998 if (ctxt->errNo == 0)
9999 ret = 1;
10000 else
10001 ret = ctxt->errNo;
10002 } else {
10003 ret = 0;
10004 }
10005
10006 if ((lst != NULL) && (ret == 0)) {
10007 xmlNodePtr cur;
10008
10009 /*
10010 * Return the newly created nodeset after unlinking it from
10011 * they pseudo parent.
10012 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010013 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010014 *lst = cur;
10015 while (cur != NULL) {
Daniel Veillard8d589042003-02-04 15:07:21 +000010016 if (oldctxt->validate && oldctxt->wellFormed &&
10017 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10018 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10019 oldctxt->myDoc, cur);
10020 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010021 cur->parent = NULL;
10022 cur = cur->next;
10023 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010024 ctxt->myDoc->children->children = NULL;
10025 }
10026 if (ctxt->myDoc != NULL) {
10027 xmlFreeNode(ctxt->myDoc->children);
10028 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010029 }
10030
10031 ctxt->sax = oldsax;
10032 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010033 if (newDoc != NULL)
10034 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010035
10036 return(ret);
10037}
10038
10039/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010040 * xmlParseBalancedChunkMemoryRecover:
10041 * @doc: the document the chunk pertains to
10042 * @sax: the SAX handler bloc (possibly NULL)
10043 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10044 * @depth: Used for loop detection, use 0
10045 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10046 * @lst: the return value for the set of parsed nodes
10047 * @recover: return nodes even if the data is broken (use 0)
10048 *
10049 *
10050 * Parse a well-balanced chunk of an XML document
10051 * called by the parser
10052 * The allowed sequence for the Well Balanced Chunk is the one defined by
10053 * the content production in the XML grammar:
10054 *
10055 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10056 *
10057 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10058 * the parser error code otherwise
10059 *
10060 * In case recover is set to 1, the nodelist will not be empty even if
10061 * the parsed chunk is not well balanced.
10062 */
10063int
10064xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10065 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10066 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010067 xmlParserCtxtPtr ctxt;
10068 xmlDocPtr newDoc;
10069 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010070 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010071 int size;
10072 int ret = 0;
10073
10074 if (depth > 40) {
10075 return(XML_ERR_ENTITY_LOOP);
10076 }
10077
10078
Daniel Veillardcda96922001-08-21 10:56:31 +000010079 if (lst != NULL)
10080 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010081 if (string == NULL)
10082 return(-1);
10083
10084 size = xmlStrlen(string);
10085
10086 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10087 if (ctxt == NULL) return(-1);
10088 ctxt->userData = ctxt;
10089 if (sax != NULL) {
10090 oldsax = ctxt->sax;
10091 ctxt->sax = sax;
10092 if (user_data != NULL)
10093 ctxt->userData = user_data;
10094 }
10095 newDoc = xmlNewDoc(BAD_CAST "1.0");
10096 if (newDoc == NULL) {
10097 xmlFreeParserCtxt(ctxt);
10098 return(-1);
10099 }
10100 if (doc != NULL) {
10101 newDoc->intSubset = doc->intSubset;
10102 newDoc->extSubset = doc->extSubset;
10103 }
10104 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10105 if (newDoc->children == NULL) {
10106 if (sax != NULL)
10107 ctxt->sax = oldsax;
10108 xmlFreeParserCtxt(ctxt);
10109 newDoc->intSubset = NULL;
10110 newDoc->extSubset = NULL;
10111 xmlFreeDoc(newDoc);
10112 return(-1);
10113 }
10114 nodePush(ctxt, newDoc->children);
10115 if (doc == NULL) {
10116 ctxt->myDoc = newDoc;
10117 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010118 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010119 newDoc->children->doc = doc;
10120 }
10121 ctxt->instate = XML_PARSER_CONTENT;
10122 ctxt->depth = depth;
10123
10124 /*
10125 * Doing validity checking on chunk doesn't make sense
10126 */
10127 ctxt->validate = 0;
10128 ctxt->loadsubset = 0;
10129
Daniel Veillardb39bc392002-10-26 19:29:51 +000010130 if ( doc != NULL ){
10131 content = doc->children;
10132 doc->children = NULL;
10133 xmlParseContent(ctxt);
10134 doc->children = content;
10135 }
10136 else {
10137 xmlParseContent(ctxt);
10138 }
Owen Taylor3473f882001-02-23 17:55:21 +000010139 if ((RAW == '<') && (NXT(1) == '/')) {
10140 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10142 ctxt->sax->error(ctxt->userData,
10143 "chunk is not well balanced\n");
10144 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010145 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010146 } else if (RAW != 0) {
10147 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10149 ctxt->sax->error(ctxt->userData,
10150 "extra content at the end of well balanced chunk\n");
10151 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010152 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010153 }
10154 if (ctxt->node != newDoc->children) {
10155 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10157 ctxt->sax->error(ctxt->userData,
10158 "chunk is not well balanced\n");
10159 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010160 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010161 }
10162
10163 if (!ctxt->wellFormed) {
10164 if (ctxt->errNo == 0)
10165 ret = 1;
10166 else
10167 ret = ctxt->errNo;
10168 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010169 ret = 0;
10170 }
10171
10172 if (lst != NULL && (ret == 0 || recover == 1)) {
10173 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010174
10175 /*
10176 * Return the newly created nodeset after unlinking it from
10177 * they pseudo parent.
10178 */
10179 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010180 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010181 while (cur != NULL) {
10182 cur->parent = NULL;
10183 cur = cur->next;
10184 }
10185 newDoc->children->children = NULL;
10186 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010187
Owen Taylor3473f882001-02-23 17:55:21 +000010188 if (sax != NULL)
10189 ctxt->sax = oldsax;
10190 xmlFreeParserCtxt(ctxt);
10191 newDoc->intSubset = NULL;
10192 newDoc->extSubset = NULL;
10193 xmlFreeDoc(newDoc);
10194
10195 return(ret);
10196}
10197
10198/**
10199 * xmlSAXParseEntity:
10200 * @sax: the SAX handler block
10201 * @filename: the filename
10202 *
10203 * parse an XML external entity out of context and build a tree.
10204 * It use the given SAX function block to handle the parsing callback.
10205 * If sax is NULL, fallback to the default DOM tree building routines.
10206 *
10207 * [78] extParsedEnt ::= TextDecl? content
10208 *
10209 * This correspond to a "Well Balanced" chunk
10210 *
10211 * Returns the resulting document tree
10212 */
10213
10214xmlDocPtr
10215xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10216 xmlDocPtr ret;
10217 xmlParserCtxtPtr ctxt;
10218 char *directory = NULL;
10219
10220 ctxt = xmlCreateFileParserCtxt(filename);
10221 if (ctxt == NULL) {
10222 return(NULL);
10223 }
10224 if (sax != NULL) {
10225 if (ctxt->sax != NULL)
10226 xmlFree(ctxt->sax);
10227 ctxt->sax = sax;
10228 ctxt->userData = NULL;
10229 }
10230
10231 if ((ctxt->directory == NULL) && (directory == NULL))
10232 directory = xmlParserGetDirectory(filename);
10233
10234 xmlParseExtParsedEnt(ctxt);
10235
10236 if (ctxt->wellFormed)
10237 ret = ctxt->myDoc;
10238 else {
10239 ret = NULL;
10240 xmlFreeDoc(ctxt->myDoc);
10241 ctxt->myDoc = NULL;
10242 }
10243 if (sax != NULL)
10244 ctxt->sax = NULL;
10245 xmlFreeParserCtxt(ctxt);
10246
10247 return(ret);
10248}
10249
10250/**
10251 * xmlParseEntity:
10252 * @filename: the filename
10253 *
10254 * parse an XML external entity out of context and build a tree.
10255 *
10256 * [78] extParsedEnt ::= TextDecl? content
10257 *
10258 * This correspond to a "Well Balanced" chunk
10259 *
10260 * Returns the resulting document tree
10261 */
10262
10263xmlDocPtr
10264xmlParseEntity(const char *filename) {
10265 return(xmlSAXParseEntity(NULL, filename));
10266}
10267
10268/**
10269 * xmlCreateEntityParserCtxt:
10270 * @URL: the entity URL
10271 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010272 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010273 *
10274 * Create a parser context for an external entity
10275 * Automatic support for ZLIB/Compress compressed document is provided
10276 * by default if found at compile-time.
10277 *
10278 * Returns the new parser context or NULL
10279 */
10280xmlParserCtxtPtr
10281xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10282 const xmlChar *base) {
10283 xmlParserCtxtPtr ctxt;
10284 xmlParserInputPtr inputStream;
10285 char *directory = NULL;
10286 xmlChar *uri;
10287
10288 ctxt = xmlNewParserCtxt();
10289 if (ctxt == NULL) {
10290 return(NULL);
10291 }
10292
10293 uri = xmlBuildURI(URL, base);
10294
10295 if (uri == NULL) {
10296 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10297 if (inputStream == NULL) {
10298 xmlFreeParserCtxt(ctxt);
10299 return(NULL);
10300 }
10301
10302 inputPush(ctxt, inputStream);
10303
10304 if ((ctxt->directory == NULL) && (directory == NULL))
10305 directory = xmlParserGetDirectory((char *)URL);
10306 if ((ctxt->directory == NULL) && (directory != NULL))
10307 ctxt->directory = directory;
10308 } else {
10309 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10310 if (inputStream == NULL) {
10311 xmlFree(uri);
10312 xmlFreeParserCtxt(ctxt);
10313 return(NULL);
10314 }
10315
10316 inputPush(ctxt, inputStream);
10317
10318 if ((ctxt->directory == NULL) && (directory == NULL))
10319 directory = xmlParserGetDirectory((char *)uri);
10320 if ((ctxt->directory == NULL) && (directory != NULL))
10321 ctxt->directory = directory;
10322 xmlFree(uri);
10323 }
10324
10325 return(ctxt);
10326}
10327
10328/************************************************************************
10329 * *
10330 * Front ends when parsing from a file *
10331 * *
10332 ************************************************************************/
10333
10334/**
10335 * xmlCreateFileParserCtxt:
10336 * @filename: the filename
10337 *
10338 * Create a parser context for a file content.
10339 * Automatic support for ZLIB/Compress compressed document is provided
10340 * by default if found at compile-time.
10341 *
10342 * Returns the new parser context or NULL
10343 */
10344xmlParserCtxtPtr
10345xmlCreateFileParserCtxt(const char *filename)
10346{
10347 xmlParserCtxtPtr ctxt;
10348 xmlParserInputPtr inputStream;
Igor Zlatkovicce076162003-02-23 13:39:39 +000010349 char *canonicFilename;
Owen Taylor3473f882001-02-23 17:55:21 +000010350 char *directory = NULL;
10351
Owen Taylor3473f882001-02-23 17:55:21 +000010352 ctxt = xmlNewParserCtxt();
10353 if (ctxt == NULL) {
10354 if (xmlDefaultSAXHandler.error != NULL) {
10355 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10356 }
10357 return(NULL);
10358 }
10359
Daniel Veillardc64b8e92003-02-24 11:47:13 +000010360 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename);
Igor Zlatkovicce076162003-02-23 13:39:39 +000010361 if (canonicFilename == NULL) {
10362 if (xmlDefaultSAXHandler.error != NULL) {
10363 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10364 }
10365 return(NULL);
10366 }
10367
10368 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
10369 xmlFree(canonicFilename);
Owen Taylor3473f882001-02-23 17:55:21 +000010370 if (inputStream == NULL) {
10371 xmlFreeParserCtxt(ctxt);
10372 return(NULL);
10373 }
10374
Owen Taylor3473f882001-02-23 17:55:21 +000010375 inputPush(ctxt, inputStream);
10376 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010377 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010378 if ((ctxt->directory == NULL) && (directory != NULL))
10379 ctxt->directory = directory;
10380
10381 return(ctxt);
10382}
10383
10384/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010385 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010386 * @sax: the SAX handler block
10387 * @filename: the filename
10388 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10389 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010390 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010391 *
10392 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10393 * compressed document is provided by default if found at compile-time.
10394 * It use the given SAX function block to handle the parsing callback.
10395 * If sax is NULL, fallback to the default DOM tree building routines.
10396 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010397 * User data (void *) is stored within the parser context in the
10398 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010399 *
Owen Taylor3473f882001-02-23 17:55:21 +000010400 * Returns the resulting document tree
10401 */
10402
10403xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010404xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10405 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010406 xmlDocPtr ret;
10407 xmlParserCtxtPtr ctxt;
10408 char *directory = NULL;
10409
Daniel Veillard635ef722001-10-29 11:48:19 +000010410 xmlInitParser();
10411
Owen Taylor3473f882001-02-23 17:55:21 +000010412 ctxt = xmlCreateFileParserCtxt(filename);
10413 if (ctxt == NULL) {
10414 return(NULL);
10415 }
10416 if (sax != NULL) {
10417 if (ctxt->sax != NULL)
10418 xmlFree(ctxt->sax);
10419 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010420 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010421 if (data!=NULL) {
10422 ctxt->_private=data;
10423 }
Owen Taylor3473f882001-02-23 17:55:21 +000010424
10425 if ((ctxt->directory == NULL) && (directory == NULL))
10426 directory = xmlParserGetDirectory(filename);
10427 if ((ctxt->directory == NULL) && (directory != NULL))
10428 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10429
Daniel Veillarddad3f682002-11-17 16:47:27 +000010430 ctxt->recovery = recovery;
10431
Owen Taylor3473f882001-02-23 17:55:21 +000010432 xmlParseDocument(ctxt);
10433
10434 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10435 else {
10436 ret = NULL;
10437 xmlFreeDoc(ctxt->myDoc);
10438 ctxt->myDoc = NULL;
10439 }
10440 if (sax != NULL)
10441 ctxt->sax = NULL;
10442 xmlFreeParserCtxt(ctxt);
10443
10444 return(ret);
10445}
10446
10447/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010448 * xmlSAXParseFile:
10449 * @sax: the SAX handler block
10450 * @filename: the filename
10451 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10452 * documents
10453 *
10454 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10455 * compressed document is provided by default if found at compile-time.
10456 * It use the given SAX function block to handle the parsing callback.
10457 * If sax is NULL, fallback to the default DOM tree building routines.
10458 *
10459 * Returns the resulting document tree
10460 */
10461
10462xmlDocPtr
10463xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10464 int recovery) {
10465 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10466}
10467
10468/**
Owen Taylor3473f882001-02-23 17:55:21 +000010469 * xmlRecoverDoc:
10470 * @cur: a pointer to an array of xmlChar
10471 *
10472 * parse an XML in-memory document and build a tree.
10473 * In the case the document is not Well Formed, a tree is built anyway
10474 *
10475 * Returns the resulting document tree
10476 */
10477
10478xmlDocPtr
10479xmlRecoverDoc(xmlChar *cur) {
10480 return(xmlSAXParseDoc(NULL, cur, 1));
10481}
10482
10483/**
10484 * xmlParseFile:
10485 * @filename: the filename
10486 *
10487 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10488 * compressed document is provided by default if found at compile-time.
10489 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010490 * Returns the resulting document tree if the file was wellformed,
10491 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010492 */
10493
10494xmlDocPtr
10495xmlParseFile(const char *filename) {
10496 return(xmlSAXParseFile(NULL, filename, 0));
10497}
10498
10499/**
10500 * xmlRecoverFile:
10501 * @filename: the filename
10502 *
10503 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10504 * compressed document is provided by default if found at compile-time.
10505 * In the case the document is not Well Formed, a tree is built anyway
10506 *
10507 * Returns the resulting document tree
10508 */
10509
10510xmlDocPtr
10511xmlRecoverFile(const char *filename) {
10512 return(xmlSAXParseFile(NULL, filename, 1));
10513}
10514
10515
10516/**
10517 * xmlSetupParserForBuffer:
10518 * @ctxt: an XML parser context
10519 * @buffer: a xmlChar * buffer
10520 * @filename: a file name
10521 *
10522 * Setup the parser context to parse a new buffer; Clears any prior
10523 * contents from the parser context. The buffer parameter must not be
10524 * NULL, but the filename parameter can be
10525 */
10526void
10527xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10528 const char* filename)
10529{
10530 xmlParserInputPtr input;
10531
10532 input = xmlNewInputStream(ctxt);
10533 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010534 xmlGenericError(xmlGenericErrorContext,
10535 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010536 xmlFree(ctxt);
10537 return;
10538 }
10539
10540 xmlClearParserCtxt(ctxt);
10541 if (filename != NULL)
10542 input->filename = xmlMemStrdup(filename);
10543 input->base = buffer;
10544 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010545 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010546 inputPush(ctxt, input);
10547}
10548
10549/**
10550 * xmlSAXUserParseFile:
10551 * @sax: a SAX handler
10552 * @user_data: The user data returned on SAX callbacks
10553 * @filename: a file name
10554 *
10555 * parse an XML file and call the given SAX handler routines.
10556 * Automatic support for ZLIB/Compress compressed document is provided
10557 *
10558 * Returns 0 in case of success or a error number otherwise
10559 */
10560int
10561xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10562 const char *filename) {
10563 int ret = 0;
10564 xmlParserCtxtPtr ctxt;
10565
10566 ctxt = xmlCreateFileParserCtxt(filename);
10567 if (ctxt == NULL) return -1;
10568 if (ctxt->sax != &xmlDefaultSAXHandler)
10569 xmlFree(ctxt->sax);
10570 ctxt->sax = sax;
10571 if (user_data != NULL)
10572 ctxt->userData = user_data;
10573
10574 xmlParseDocument(ctxt);
10575
10576 if (ctxt->wellFormed)
10577 ret = 0;
10578 else {
10579 if (ctxt->errNo != 0)
10580 ret = ctxt->errNo;
10581 else
10582 ret = -1;
10583 }
10584 if (sax != NULL)
10585 ctxt->sax = NULL;
10586 xmlFreeParserCtxt(ctxt);
10587
10588 return ret;
10589}
10590
10591/************************************************************************
10592 * *
10593 * Front ends when parsing from memory *
10594 * *
10595 ************************************************************************/
10596
10597/**
10598 * xmlCreateMemoryParserCtxt:
10599 * @buffer: a pointer to a char array
10600 * @size: the size of the array
10601 *
10602 * Create a parser context for an XML in-memory document.
10603 *
10604 * Returns the new parser context or NULL
10605 */
10606xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010607xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010608 xmlParserCtxtPtr ctxt;
10609 xmlParserInputPtr input;
10610 xmlParserInputBufferPtr buf;
10611
10612 if (buffer == NULL)
10613 return(NULL);
10614 if (size <= 0)
10615 return(NULL);
10616
10617 ctxt = xmlNewParserCtxt();
10618 if (ctxt == NULL)
10619 return(NULL);
10620
10621 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010622 if (buf == NULL) {
10623 xmlFreeParserCtxt(ctxt);
10624 return(NULL);
10625 }
Owen Taylor3473f882001-02-23 17:55:21 +000010626
10627 input = xmlNewInputStream(ctxt);
10628 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010629 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010630 xmlFreeParserCtxt(ctxt);
10631 return(NULL);
10632 }
10633
10634 input->filename = NULL;
10635 input->buf = buf;
10636 input->base = input->buf->buffer->content;
10637 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010638 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010639
10640 inputPush(ctxt, input);
10641 return(ctxt);
10642}
10643
10644/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010645 * xmlSAXParseMemoryWithData:
10646 * @sax: the SAX handler block
10647 * @buffer: an pointer to a char array
10648 * @size: the size of the array
10649 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10650 * documents
10651 * @data: the userdata
10652 *
10653 * parse an XML in-memory block and use the given SAX function block
10654 * to handle the parsing callback. If sax is NULL, fallback to the default
10655 * DOM tree building routines.
10656 *
10657 * User data (void *) is stored within the parser context in the
10658 * context's _private member, so it is available nearly everywhere in libxml
10659 *
10660 * Returns the resulting document tree
10661 */
10662
10663xmlDocPtr
10664xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10665 int size, int recovery, void *data) {
10666 xmlDocPtr ret;
10667 xmlParserCtxtPtr ctxt;
10668
10669 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10670 if (ctxt == NULL) return(NULL);
10671 if (sax != NULL) {
10672 if (ctxt->sax != NULL)
10673 xmlFree(ctxt->sax);
10674 ctxt->sax = sax;
10675 }
10676 if (data!=NULL) {
10677 ctxt->_private=data;
10678 }
10679
10680 xmlParseDocument(ctxt);
10681
10682 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10683 else {
10684 ret = NULL;
10685 xmlFreeDoc(ctxt->myDoc);
10686 ctxt->myDoc = NULL;
10687 }
10688 if (sax != NULL)
10689 ctxt->sax = NULL;
10690 xmlFreeParserCtxt(ctxt);
10691
10692 return(ret);
10693}
10694
10695/**
Owen Taylor3473f882001-02-23 17:55:21 +000010696 * xmlSAXParseMemory:
10697 * @sax: the SAX handler block
10698 * @buffer: an pointer to a char array
10699 * @size: the size of the array
10700 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10701 * documents
10702 *
10703 * parse an XML in-memory block and use the given SAX function block
10704 * to handle the parsing callback. If sax is NULL, fallback to the default
10705 * DOM tree building routines.
10706 *
10707 * Returns the resulting document tree
10708 */
10709xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010710xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10711 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010712 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010713}
10714
10715/**
10716 * xmlParseMemory:
10717 * @buffer: an pointer to a char array
10718 * @size: the size of the array
10719 *
10720 * parse an XML in-memory block and build a tree.
10721 *
10722 * Returns the resulting document tree
10723 */
10724
Daniel Veillard50822cb2001-07-26 20:05:51 +000010725xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010726 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10727}
10728
10729/**
10730 * xmlRecoverMemory:
10731 * @buffer: an pointer to a char array
10732 * @size: the size of the array
10733 *
10734 * parse an XML in-memory block and build a tree.
10735 * In the case the document is not Well Formed, a tree is built anyway
10736 *
10737 * Returns the resulting document tree
10738 */
10739
Daniel Veillard50822cb2001-07-26 20:05:51 +000010740xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010741 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10742}
10743
10744/**
10745 * xmlSAXUserParseMemory:
10746 * @sax: a SAX handler
10747 * @user_data: The user data returned on SAX callbacks
10748 * @buffer: an in-memory XML document input
10749 * @size: the length of the XML document in bytes
10750 *
10751 * A better SAX parsing routine.
10752 * parse an XML in-memory buffer and call the given SAX handler routines.
10753 *
10754 * Returns 0 in case of success or a error number otherwise
10755 */
10756int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010757 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010758 int ret = 0;
10759 xmlParserCtxtPtr ctxt;
10760 xmlSAXHandlerPtr oldsax = NULL;
10761
Daniel Veillard9e923512002-08-14 08:48:52 +000010762 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010763 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10764 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010765 oldsax = ctxt->sax;
10766 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010767 if (user_data != NULL)
10768 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010769
10770 xmlParseDocument(ctxt);
10771
10772 if (ctxt->wellFormed)
10773 ret = 0;
10774 else {
10775 if (ctxt->errNo != 0)
10776 ret = ctxt->errNo;
10777 else
10778 ret = -1;
10779 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010780 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010781 xmlFreeParserCtxt(ctxt);
10782
10783 return ret;
10784}
10785
10786/**
10787 * xmlCreateDocParserCtxt:
10788 * @cur: a pointer to an array of xmlChar
10789 *
10790 * Creates a parser context for an XML in-memory document.
10791 *
10792 * Returns the new parser context or NULL
10793 */
10794xmlParserCtxtPtr
10795xmlCreateDocParserCtxt(xmlChar *cur) {
10796 int len;
10797
10798 if (cur == NULL)
10799 return(NULL);
10800 len = xmlStrlen(cur);
10801 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10802}
10803
10804/**
10805 * xmlSAXParseDoc:
10806 * @sax: the SAX handler block
10807 * @cur: a pointer to an array of xmlChar
10808 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10809 * documents
10810 *
10811 * parse an XML in-memory document and build a tree.
10812 * It use the given SAX function block to handle the parsing callback.
10813 * If sax is NULL, fallback to the default DOM tree building routines.
10814 *
10815 * Returns the resulting document tree
10816 */
10817
10818xmlDocPtr
10819xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10820 xmlDocPtr ret;
10821 xmlParserCtxtPtr ctxt;
10822
10823 if (cur == NULL) return(NULL);
10824
10825
10826 ctxt = xmlCreateDocParserCtxt(cur);
10827 if (ctxt == NULL) return(NULL);
10828 if (sax != NULL) {
10829 ctxt->sax = sax;
10830 ctxt->userData = NULL;
10831 }
10832
10833 xmlParseDocument(ctxt);
10834 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10835 else {
10836 ret = NULL;
10837 xmlFreeDoc(ctxt->myDoc);
10838 ctxt->myDoc = NULL;
10839 }
10840 if (sax != NULL)
10841 ctxt->sax = NULL;
10842 xmlFreeParserCtxt(ctxt);
10843
10844 return(ret);
10845}
10846
10847/**
10848 * xmlParseDoc:
10849 * @cur: a pointer to an array of xmlChar
10850 *
10851 * parse an XML in-memory document and build a tree.
10852 *
10853 * Returns the resulting document tree
10854 */
10855
10856xmlDocPtr
10857xmlParseDoc(xmlChar *cur) {
10858 return(xmlSAXParseDoc(NULL, cur, 0));
10859}
10860
Daniel Veillard8107a222002-01-13 14:10:10 +000010861/************************************************************************
10862 * *
10863 * Specific function to keep track of entities references *
10864 * and used by the XSLT debugger *
10865 * *
10866 ************************************************************************/
10867
10868static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10869
10870/**
10871 * xmlAddEntityReference:
10872 * @ent : A valid entity
10873 * @firstNode : A valid first node for children of entity
10874 * @lastNode : A valid last node of children entity
10875 *
10876 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10877 */
10878static void
10879xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10880 xmlNodePtr lastNode)
10881{
10882 if (xmlEntityRefFunc != NULL) {
10883 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10884 }
10885}
10886
10887
10888/**
10889 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000010890 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000010891 *
10892 * Set the function to call call back when a xml reference has been made
10893 */
10894void
10895xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10896{
10897 xmlEntityRefFunc = func;
10898}
Owen Taylor3473f882001-02-23 17:55:21 +000010899
10900/************************************************************************
10901 * *
10902 * Miscellaneous *
10903 * *
10904 ************************************************************************/
10905
10906#ifdef LIBXML_XPATH_ENABLED
10907#include <libxml/xpath.h>
10908#endif
10909
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010910extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010911static int xmlParserInitialized = 0;
10912
10913/**
10914 * xmlInitParser:
10915 *
10916 * Initialization function for the XML parser.
10917 * This is not reentrant. Call once before processing in case of
10918 * use in multithreaded programs.
10919 */
10920
10921void
10922xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010923 if (xmlParserInitialized != 0)
10924 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010925
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010926 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10927 (xmlGenericError == NULL))
10928 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010929 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010930 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010931 xmlInitCharEncodingHandlers();
10932 xmlInitializePredefinedEntities();
10933 xmlDefaultSAXHandlerInit();
10934 xmlRegisterDefaultInputCallbacks();
10935 xmlRegisterDefaultOutputCallbacks();
10936#ifdef LIBXML_HTML_ENABLED
10937 htmlInitAutoClose();
10938 htmlDefaultSAXHandlerInit();
10939#endif
10940#ifdef LIBXML_XPATH_ENABLED
10941 xmlXPathInit();
10942#endif
10943 xmlParserInitialized = 1;
10944}
10945
10946/**
10947 * xmlCleanupParser:
10948 *
10949 * Cleanup function for the XML parser. It tries to reclaim all
10950 * parsing related global memory allocated for the parser processing.
10951 * It doesn't deallocate any document related memory. Calling this
10952 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000010953 * One should call xmlCleanupParser() only when the process has
10954 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000010955 */
10956
10957void
10958xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010959 xmlCleanupCharEncodingHandlers();
10960 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010961#ifdef LIBXML_CATALOG_ENABLED
10962 xmlCatalogCleanup();
10963#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010964 xmlCleanupThreads();
10965 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010966}