blob: 5c7af3d17afb99ba25361faac622af8db3b643f0 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard21a0f912001-02-25 19:54:14 +000088#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000089#define XML_PARSER_BUFFER_SIZE 100
90
Daniel Veillard5997aca2002-03-18 18:36:20 +000091#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
92
Owen Taylor3473f882001-02-23 17:55:21 +000093/*
Owen Taylor3473f882001-02-23 17:55:21 +000094 * List of XML prefixed PI allowed by W3C specs
95 */
96
Daniel Veillardb44025c2001-10-11 22:55:55 +000097static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000098 "xml-stylesheet",
99 NULL
100};
101
102/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000103xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
104 const xmlChar **str);
105
Daniel Veillard257d9102001-05-08 10:41:44 +0000106static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000107xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
108 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000109 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000111
Daniel Veillard8107a222002-01-13 14:10:10 +0000112static void
113xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
114 xmlNodePtr lastNode);
115
Daniel Veillard328f48c2002-11-15 15:24:34 +0000116static int
117xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
118 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000119/************************************************************************
120 * *
121 * Parser stacks related functions and macros *
122 * *
123 ************************************************************************/
124
125xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
126 const xmlChar ** str);
127
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128/**
129 * inputPush:
130 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000131 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000132 *
133 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000134 *
135 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000137extern int
138inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
139{
140 if (ctxt->inputNr >= ctxt->inputMax) {
141 ctxt->inputMax *= 2;
142 ctxt->inputTab =
143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
144 ctxt->inputMax *
145 sizeof(ctxt->inputTab[0]));
146 if (ctxt->inputTab == NULL) {
147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
148 return (0);
149 }
150 }
151 ctxt->inputTab[ctxt->inputNr] = value;
152 ctxt->input = value;
153 return (ctxt->inputNr++);
154}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000155/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000156 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000157 * @ctxt: an XML parser context
158 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000159 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000160 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000161 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000163extern xmlParserInputPtr
164inputPop(xmlParserCtxtPtr ctxt)
165{
166 xmlParserInputPtr ret;
167
168 if (ctxt->inputNr <= 0)
169 return (0);
170 ctxt->inputNr--;
171 if (ctxt->inputNr > 0)
172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
173 else
174 ctxt->input = NULL;
175 ret = ctxt->inputTab[ctxt->inputNr];
176 ctxt->inputTab[ctxt->inputNr] = 0;
177 return (ret);
178}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179/**
180 * nodePush:
181 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000182 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 *
184 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000185 *
186 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000187 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000188extern int
189nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
190{
191 if (ctxt->nodeNr >= ctxt->nodeMax) {
192 ctxt->nodeMax *= 2;
193 ctxt->nodeTab =
194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
195 ctxt->nodeMax *
196 sizeof(ctxt->nodeTab[0]));
197 if (ctxt->nodeTab == NULL) {
198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
199 return (0);
200 }
201 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202#ifdef MAX_DEPTH
203 if (ctxt->nodeNr > MAX_DEPTH) {
204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
205 ctxt->sax->error(ctxt->userData,
206 "Excessive depth in document: change MAX_DEPTH = %d\n",
207 MAX_DEPTH);
208 ctxt->wellFormed = 0;
209 ctxt->instate = XML_PARSER_EOF;
210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
211 return(0);
212 }
213#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000214 ctxt->nodeTab[ctxt->nodeNr] = value;
215 ctxt->node = value;
216 return (ctxt->nodeNr++);
217}
218/**
219 * nodePop:
220 * @ctxt: an XML parser context
221 *
222 * Pops the top element node from the node stack
223 *
224 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000226extern xmlNodePtr
227nodePop(xmlParserCtxtPtr ctxt)
228{
229 xmlNodePtr ret;
230
231 if (ctxt->nodeNr <= 0)
232 return (0);
233 ctxt->nodeNr--;
234 if (ctxt->nodeNr > 0)
235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
236 else
237 ctxt->node = NULL;
238 ret = ctxt->nodeTab[ctxt->nodeNr];
239 ctxt->nodeTab[ctxt->nodeNr] = 0;
240 return (ret);
241}
242/**
243 * namePush:
244 * @ctxt: an XML parser context
245 * @value: the element name
246 *
247 * Pushes a new element name on top of the name stack
248 *
249 * Returns 0 in case of error, the index in the stack otherwise
250 */
251extern int
252namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
253{
254 if (ctxt->nameNr >= ctxt->nameMax) {
255 ctxt->nameMax *= 2;
256 ctxt->nameTab =
257 (xmlChar * *)xmlRealloc(ctxt->nameTab,
258 ctxt->nameMax *
259 sizeof(ctxt->nameTab[0]));
260 if (ctxt->nameTab == NULL) {
261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
262 return (0);
263 }
264 }
265 ctxt->nameTab[ctxt->nameNr] = value;
266 ctxt->name = value;
267 return (ctxt->nameNr++);
268}
269/**
270 * namePop:
271 * @ctxt: an XML parser context
272 *
273 * Pops the top element name from the name stack
274 *
275 * Returns the name just removed
276 */
277extern xmlChar *
278namePop(xmlParserCtxtPtr ctxt)
279{
280 xmlChar *ret;
281
282 if (ctxt->nameNr <= 0)
283 return (0);
284 ctxt->nameNr--;
285 if (ctxt->nameNr > 0)
286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
287 else
288 ctxt->name = NULL;
289 ret = ctxt->nameTab[ctxt->nameNr];
290 ctxt->nameTab[ctxt->nameNr] = 0;
291 return (ret);
292}
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000294static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000295 if (ctxt->spaceNr >= ctxt->spaceMax) {
296 ctxt->spaceMax *= 2;
297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
299 if (ctxt->spaceTab == NULL) {
300 xmlGenericError(xmlGenericErrorContext,
301 "realloc failed !\n");
302 return(0);
303 }
304 }
305 ctxt->spaceTab[ctxt->spaceNr] = val;
306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
307 return(ctxt->spaceNr++);
308}
309
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000311 int ret;
312 if (ctxt->spaceNr <= 0) return(0);
313 ctxt->spaceNr--;
314 if (ctxt->spaceNr > 0)
315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
316 else
317 ctxt->space = NULL;
318 ret = ctxt->spaceTab[ctxt->spaceNr];
319 ctxt->spaceTab[ctxt->spaceNr] = -1;
320 return(ret);
321}
322
323/*
324 * Macros for accessing the content. Those should be used only by the parser,
325 * and not exported.
326 *
327 * Dirty macros, i.e. one often need to make assumption on the context to
328 * use them
329 *
330 * CUR_PTR return the current pointer to the xmlChar to be parsed.
331 * To be used with extreme caution since operations consuming
332 * characters may move the input buffer to a different location !
333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
334 * This should be used internally by the parser
335 * only to compare to ASCII values otherwise it would break when
336 * running with UTF-8 encoding.
337 * RAW same as CUR but in the input buffer, bypass any token
338 * extraction that may have been done
339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
340 * to compare on ASCII based substring.
341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +0000342 * strings without newlines within the parser.
343 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
344 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +0000345 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
346 *
347 * NEXT Skip to the next character, this does the proper decoding
348 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +0000349 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +0000350 * CUR_CHAR(l) returns the current unicode character (int), set l
351 * to the number of xmlChars used for the encoding [0-5].
352 * CUR_SCHAR same but operate on a string instead of the context
353 * COPY_BUF copy the current unicode char to the target buffer, increment
354 * the index
355 * GROW, SHRINK handling of input buffers
356 */
357
Daniel Veillardfdc91562002-07-01 21:52:03 +0000358#define RAW (*ctxt->input->cur)
359#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000360#define NXT(val) ctxt->input->cur[(val)]
361#define CUR_PTR ctxt->input->cur
362
363#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000364 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +0000365 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000366 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000367 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
368 xmlPopInput(ctxt); \
369 } while (0)
370
Daniel Veillard46de64e2002-05-29 08:21:33 +0000371#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
372 xmlSHRINK (ctxt);
373
374static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
375 xmlParserInputShrink(ctxt->input);
376 if ((*ctxt->input->cur == 0) &&
377 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
378 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000379 }
Owen Taylor3473f882001-02-23 17:55:21 +0000380
Daniel Veillard46de64e2002-05-29 08:21:33 +0000381#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
382 xmlGROW (ctxt);
383
384static void xmlGROW (xmlParserCtxtPtr ctxt) {
385 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
386 if ((*ctxt->input->cur == 0) &&
387 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
388 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000389 }
Owen Taylor3473f882001-02-23 17:55:21 +0000390
391#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
392
393#define NEXT xmlNextChar(ctxt)
394
Daniel Veillard21a0f912001-02-25 19:54:14 +0000395#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000396 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000397 ctxt->input->cur++; \
398 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000399 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000400 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
401 }
402
Owen Taylor3473f882001-02-23 17:55:21 +0000403#define NEXTL(l) do { \
404 if (*(ctxt->input->cur) == '\n') { \
405 ctxt->input->line++; ctxt->input->col = 1; \
406 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000407 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000408 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000409 } while (0)
410
411#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
412#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
413
414#define COPY_BUF(l,b,i,v) \
415 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000416 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000417
418/**
419 * xmlSkipBlankChars:
420 * @ctxt: the XML parser context
421 *
422 * skip all blanks character found at that point in the input streams.
423 * It pops up finished entities in the process if allowable at that point.
424 *
425 * Returns the number of space chars skipped
426 */
427
428int
429xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000430 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000431
432 /*
433 * It's Okay to use CUR/NEXT here since all the blanks are on
434 * the ASCII range.
435 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000436 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
437 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000438 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000439 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000440 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000441 cur = ctxt->input->cur;
442 while (IS_BLANK(*cur)) {
443 if (*cur == '\n') {
444 ctxt->input->line++; ctxt->input->col = 1;
445 }
446 cur++;
447 res++;
448 if (*cur == 0) {
449 ctxt->input->cur = cur;
450 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
451 cur = ctxt->input->cur;
452 }
453 }
454 ctxt->input->cur = cur;
455 } else {
456 int cur;
457 do {
458 cur = CUR;
459 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
460 NEXT;
461 cur = CUR;
462 res++;
463 }
464 while ((cur == 0) && (ctxt->inputNr > 1) &&
465 (ctxt->instate != XML_PARSER_COMMENT)) {
466 xmlPopInput(ctxt);
467 cur = CUR;
468 }
469 /*
470 * Need to handle support of entities branching here
471 */
472 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
473 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
474 }
Owen Taylor3473f882001-02-23 17:55:21 +0000475 return(res);
476}
477
478/************************************************************************
479 * *
480 * Commodity functions to handle entities *
481 * *
482 ************************************************************************/
483
484/**
485 * xmlPopInput:
486 * @ctxt: an XML parser context
487 *
488 * xmlPopInput: the current input pointed by ctxt->input came to an end
489 * pop it and return the next char.
490 *
491 * Returns the current xmlChar in the parser context
492 */
493xmlChar
494xmlPopInput(xmlParserCtxtPtr ctxt) {
495 if (ctxt->inputNr == 1) return(0); /* End of main Input */
496 if (xmlParserDebugEntities)
497 xmlGenericError(xmlGenericErrorContext,
498 "Popping input %d\n", ctxt->inputNr);
499 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000500 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000501 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
502 return(xmlPopInput(ctxt));
503 return(CUR);
504}
505
506/**
507 * xmlPushInput:
508 * @ctxt: an XML parser context
509 * @input: an XML parser input fragment (entity, XML fragment ...).
510 *
511 * xmlPushInput: switch to a new input stream which is stacked on top
512 * of the previous one(s).
513 */
514void
515xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
516 if (input == NULL) return;
517
518 if (xmlParserDebugEntities) {
519 if ((ctxt->input != NULL) && (ctxt->input->filename))
520 xmlGenericError(xmlGenericErrorContext,
521 "%s(%d): ", ctxt->input->filename,
522 ctxt->input->line);
523 xmlGenericError(xmlGenericErrorContext,
524 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
525 }
526 inputPush(ctxt, input);
527 GROW;
528}
529
530/**
531 * xmlParseCharRef:
532 * @ctxt: an XML parser context
533 *
534 * parse Reference declarations
535 *
536 * [66] CharRef ::= '&#' [0-9]+ ';' |
537 * '&#x' [0-9a-fA-F]+ ';'
538 *
539 * [ WFC: Legal Character ]
540 * Characters referred to using character references must match the
541 * production for Char.
542 *
543 * Returns the value parsed (as an int), 0 in case of error
544 */
545int
546xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000547 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000548 int count = 0;
549
Owen Taylor3473f882001-02-23 17:55:21 +0000550 /*
551 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
552 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000553 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000554 (NXT(2) == 'x')) {
555 SKIP(3);
556 GROW;
557 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000558 if (count++ > 20) {
559 count = 0;
560 GROW;
561 }
562 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000563 val = val * 16 + (CUR - '0');
564 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
565 val = val * 16 + (CUR - 'a') + 10;
566 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
567 val = val * 16 + (CUR - 'A') + 10;
568 else {
569 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
571 ctxt->sax->error(ctxt->userData,
572 "xmlParseCharRef: invalid hexadecimal value\n");
573 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000574 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000575 val = 0;
576 break;
577 }
578 NEXT;
579 count++;
580 }
581 if (RAW == ';') {
582 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000583 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000584 ctxt->nbChars ++;
585 ctxt->input->cur++;
586 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000587 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000588 SKIP(2);
589 GROW;
590 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000591 if (count++ > 20) {
592 count = 0;
593 GROW;
594 }
595 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000596 val = val * 10 + (CUR - '0');
597 else {
598 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
600 ctxt->sax->error(ctxt->userData,
601 "xmlParseCharRef: invalid decimal value\n");
602 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000603 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000604 val = 0;
605 break;
606 }
607 NEXT;
608 count++;
609 }
610 if (RAW == ';') {
611 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000612 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000613 ctxt->nbChars ++;
614 ctxt->input->cur++;
615 }
616 } else {
617 ctxt->errNo = XML_ERR_INVALID_CHARREF;
618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
619 ctxt->sax->error(ctxt->userData,
620 "xmlParseCharRef: invalid value\n");
621 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000622 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000623 }
624
625 /*
626 * [ WFC: Legal Character ]
627 * Characters referred to using character references must match the
628 * production for Char.
629 */
630 if (IS_CHAR(val)) {
631 return(val);
632 } else {
633 ctxt->errNo = XML_ERR_INVALID_CHAR;
634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000635 ctxt->sax->error(ctxt->userData,
636 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000637 val);
638 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000639 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000640 }
641 return(0);
642}
643
644/**
645 * xmlParseStringCharRef:
646 * @ctxt: an XML parser context
647 * @str: a pointer to an index in the string
648 *
649 * parse Reference declarations, variant parsing from a string rather
650 * than an an input flow.
651 *
652 * [66] CharRef ::= '&#' [0-9]+ ';' |
653 * '&#x' [0-9a-fA-F]+ ';'
654 *
655 * [ WFC: Legal Character ]
656 * Characters referred to using character references must match the
657 * production for Char.
658 *
659 * Returns the value parsed (as an int), 0 in case of error, str will be
660 * updated to the current value of the index
661 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000662static int
Owen Taylor3473f882001-02-23 17:55:21 +0000663xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
664 const xmlChar *ptr;
665 xmlChar cur;
666 int val = 0;
667
668 if ((str == NULL) || (*str == NULL)) return(0);
669 ptr = *str;
670 cur = *ptr;
671 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
672 ptr += 3;
673 cur = *ptr;
674 while (cur != ';') { /* Non input consuming loop */
675 if ((cur >= '0') && (cur <= '9'))
676 val = val * 16 + (cur - '0');
677 else if ((cur >= 'a') && (cur <= 'f'))
678 val = val * 16 + (cur - 'a') + 10;
679 else if ((cur >= 'A') && (cur <= 'F'))
680 val = val * 16 + (cur - 'A') + 10;
681 else {
682 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
684 ctxt->sax->error(ctxt->userData,
685 "xmlParseStringCharRef: invalid hexadecimal value\n");
686 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000687 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000688 val = 0;
689 break;
690 }
691 ptr++;
692 cur = *ptr;
693 }
694 if (cur == ';')
695 ptr++;
696 } else if ((cur == '&') && (ptr[1] == '#')){
697 ptr += 2;
698 cur = *ptr;
699 while (cur != ';') { /* Non input consuming loops */
700 if ((cur >= '0') && (cur <= '9'))
701 val = val * 10 + (cur - '0');
702 else {
703 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
705 ctxt->sax->error(ctxt->userData,
706 "xmlParseStringCharRef: invalid decimal value\n");
707 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000708 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000709 val = 0;
710 break;
711 }
712 ptr++;
713 cur = *ptr;
714 }
715 if (cur == ';')
716 ptr++;
717 } else {
718 ctxt->errNo = XML_ERR_INVALID_CHARREF;
719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
720 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000721 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000722 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000723 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000724 return(0);
725 }
726 *str = ptr;
727
728 /*
729 * [ WFC: Legal Character ]
730 * Characters referred to using character references must match the
731 * production for Char.
732 */
733 if (IS_CHAR(val)) {
734 return(val);
735 } else {
736 ctxt->errNo = XML_ERR_INVALID_CHAR;
737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
738 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000739 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000740 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000741 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000742 }
743 return(0);
744}
745
746/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000747 * xmlNewBlanksWrapperInputStream:
748 * @ctxt: an XML parser context
749 * @entity: an Entity pointer
750 *
751 * Create a new input stream for wrapping
752 * blanks around a PEReference
753 *
754 * Returns the new input stream or NULL
755 */
756
757static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
758
Daniel Veillardf4862f02002-09-10 11:13:43 +0000759static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000760xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
761 xmlParserInputPtr input;
762 xmlChar *buffer;
763 size_t length;
764 if (entity == NULL) {
765 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
767 ctxt->sax->error(ctxt->userData,
768 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
769 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
770 return(NULL);
771 }
772 if (xmlParserDebugEntities)
773 xmlGenericError(xmlGenericErrorContext,
774 "new blanks wrapper for entity: %s\n", entity->name);
775 input = xmlNewInputStream(ctxt);
776 if (input == NULL) {
777 return(NULL);
778 }
779 length = xmlStrlen(entity->name) + 5;
780 buffer = xmlMalloc(length);
781 if (buffer == NULL) {
782 return(NULL);
783 }
784 buffer [0] = ' ';
785 buffer [1] = '%';
786 buffer [length-3] = ';';
787 buffer [length-2] = ' ';
788 buffer [length-1] = 0;
789 memcpy(buffer + 2, entity->name, length - 5);
790 input->free = deallocblankswrapper;
791 input->base = buffer;
792 input->cur = buffer;
793 input->length = length;
794 input->end = &buffer[length];
795 return(input);
796}
797
798/**
Owen Taylor3473f882001-02-23 17:55:21 +0000799 * xmlParserHandlePEReference:
800 * @ctxt: the parser context
801 *
802 * [69] PEReference ::= '%' Name ';'
803 *
804 * [ WFC: No Recursion ]
805 * A parsed entity must not contain a recursive
806 * reference to itself, either directly or indirectly.
807 *
808 * [ WFC: Entity Declared ]
809 * In a document without any DTD, a document with only an internal DTD
810 * subset which contains no parameter entity references, or a document
811 * with "standalone='yes'", ... ... The declaration of a parameter
812 * entity must precede any reference to it...
813 *
814 * [ VC: Entity Declared ]
815 * In a document with an external subset or external parameter entities
816 * with "standalone='no'", ... ... The declaration of a parameter entity
817 * must precede any reference to it...
818 *
819 * [ WFC: In DTD ]
820 * Parameter-entity references may only appear in the DTD.
821 * NOTE: misleading but this is handled.
822 *
823 * A PEReference may have been detected in the current input stream
824 * the handling is done accordingly to
825 * http://www.w3.org/TR/REC-xml#entproc
826 * i.e.
827 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000828 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000829 */
830void
831xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
832 xmlChar *name;
833 xmlEntityPtr entity = NULL;
834 xmlParserInputPtr input;
835
Owen Taylor3473f882001-02-23 17:55:21 +0000836 if (RAW != '%') return;
837 switch(ctxt->instate) {
838 case XML_PARSER_CDATA_SECTION:
839 return;
840 case XML_PARSER_COMMENT:
841 return;
842 case XML_PARSER_START_TAG:
843 return;
844 case XML_PARSER_END_TAG:
845 return;
846 case XML_PARSER_EOF:
847 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
850 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000851 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000852 return;
853 case XML_PARSER_PROLOG:
854 case XML_PARSER_START:
855 case XML_PARSER_MISC:
856 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
858 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
859 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000860 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000861 return;
862 case XML_PARSER_ENTITY_DECL:
863 case XML_PARSER_CONTENT:
864 case XML_PARSER_ATTRIBUTE_VALUE:
865 case XML_PARSER_PI:
866 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000867 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000868 /* we just ignore it there */
869 return;
870 case XML_PARSER_EPILOG:
871 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
873 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
874 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000875 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000876 return;
877 case XML_PARSER_ENTITY_VALUE:
878 /*
879 * NOTE: in the case of entity values, we don't do the
880 * substitution here since we need the literal
881 * entity value to be able to save the internal
882 * subset of the document.
883 * This will be handled by xmlStringDecodeEntities
884 */
885 return;
886 case XML_PARSER_DTD:
887 /*
888 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
889 * In the internal DTD subset, parameter-entity references
890 * can occur only where markup declarations can occur, not
891 * within markup declarations.
892 * In that case this is handled in xmlParseMarkupDecl
893 */
894 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
895 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000896 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
897 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000898 break;
899 case XML_PARSER_IGNORE:
900 return;
901 }
902
903 NEXT;
904 name = xmlParseName(ctxt);
905 if (xmlParserDebugEntities)
906 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000907 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000908 if (name == NULL) {
909 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
910 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000911 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000912 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000913 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000914 } else {
915 if (RAW == ';') {
916 NEXT;
917 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
918 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
919 if (entity == NULL) {
920
921 /*
922 * [ WFC: Entity Declared ]
923 * In a document without any DTD, a document with only an
924 * internal DTD subset which contains no parameter entity
925 * references, or a document with "standalone='yes'", ...
926 * ... The declaration of a parameter entity must precede
927 * any reference to it...
928 */
929 if ((ctxt->standalone == 1) ||
930 ((ctxt->hasExternalSubset == 0) &&
931 (ctxt->hasPErefs == 0))) {
932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
933 ctxt->sax->error(ctxt->userData,
934 "PEReference: %%%s; not found\n", name);
935 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000936 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000937 } else {
938 /*
939 * [ VC: Entity Declared ]
940 * In a document with an external subset or external
941 * parameter entities with "standalone='no'", ...
942 * ... The declaration of a parameter entity must precede
943 * any reference to it...
944 */
945 if ((!ctxt->disableSAX) &&
946 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
947 ctxt->vctxt.error(ctxt->vctxt.userData,
948 "PEReference: %%%s; not found\n", name);
949 } else if ((!ctxt->disableSAX) &&
950 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
951 ctxt->sax->warning(ctxt->userData,
952 "PEReference: %%%s; not found\n", name);
953 ctxt->valid = 0;
954 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000955 } else if (ctxt->input->free != deallocblankswrapper) {
956 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
957 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000958 } else {
959 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
960 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000961 xmlChar start[4];
962 xmlCharEncoding enc;
963
Owen Taylor3473f882001-02-23 17:55:21 +0000964 /*
965 * handle the extra spaces added before and after
966 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000967 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000968 */
969 input = xmlNewEntityInputStream(ctxt, entity);
970 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000971
972 /*
973 * Get the 4 first bytes and decode the charset
974 * if enc != XML_CHAR_ENCODING_NONE
975 * plug some encoding conversion routines.
976 */
977 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000978 if (entity->length >= 4) {
979 start[0] = RAW;
980 start[1] = NXT(1);
981 start[2] = NXT(2);
982 start[3] = NXT(3);
983 enc = xmlDetectCharEncoding(start, 4);
984 if (enc != XML_CHAR_ENCODING_NONE) {
985 xmlSwitchEncoding(ctxt, enc);
986 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000987 }
988
Owen Taylor3473f882001-02-23 17:55:21 +0000989 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
990 (RAW == '<') && (NXT(1) == '?') &&
991 (NXT(2) == 'x') && (NXT(3) == 'm') &&
992 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
993 xmlParseTextDecl(ctxt);
994 }
Owen Taylor3473f882001-02-23 17:55:21 +0000995 } else {
996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
997 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000998 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000999 name);
1000 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001001 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001002 }
1003 }
1004 } else {
1005 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1007 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001008 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001011 }
1012 xmlFree(name);
1013 }
1014}
1015
1016/*
1017 * Macro used to grow the current buffer.
1018 */
1019#define growBuffer(buffer) { \
1020 buffer##_size *= 2; \
1021 buffer = (xmlChar *) \
1022 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1023 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001024 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001025 return(NULL); \
1026 } \
1027}
1028
1029/**
1030 * xmlStringDecodeEntities:
1031 * @ctxt: the parser context
1032 * @str: the input string
1033 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1034 * @end: an end marker xmlChar, 0 if none
1035 * @end2: an end marker xmlChar, 0 if none
1036 * @end3: an end marker xmlChar, 0 if none
1037 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001038 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001039 *
1040 * [67] Reference ::= EntityRef | CharRef
1041 *
1042 * [69] PEReference ::= '%' Name ';'
1043 *
1044 * Returns A newly allocated string with the substitution done. The caller
1045 * must deallocate it !
1046 */
1047xmlChar *
1048xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1049 xmlChar end, xmlChar end2, xmlChar end3) {
1050 xmlChar *buffer = NULL;
1051 int buffer_size = 0;
1052
1053 xmlChar *current = NULL;
1054 xmlEntityPtr ent;
1055 int c,l;
1056 int nbchars = 0;
1057
1058 if (str == NULL)
1059 return(NULL);
1060
1061 if (ctxt->depth > 40) {
1062 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1063 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1064 ctxt->sax->error(ctxt->userData,
1065 "Detected entity reference loop\n");
1066 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001067 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001068 return(NULL);
1069 }
1070
1071 /*
1072 * allocate a translation buffer.
1073 */
1074 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1075 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1076 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001077 xmlGenericError(xmlGenericErrorContext,
1078 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001079 return(NULL);
1080 }
1081
1082 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001083 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001084 * we are operating on already parsed values.
1085 */
1086 c = CUR_SCHAR(str, l);
1087 while ((c != 0) && (c != end) && /* non input consuming loop */
1088 (c != end2) && (c != end3)) {
1089
1090 if (c == 0) break;
1091 if ((c == '&') && (str[1] == '#')) {
1092 int val = xmlParseStringCharRef(ctxt, &str);
1093 if (val != 0) {
1094 COPY_BUF(0,buffer,nbchars,val);
1095 }
1096 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1097 if (xmlParserDebugEntities)
1098 xmlGenericError(xmlGenericErrorContext,
1099 "String decoding Entity Reference: %.30s\n",
1100 str);
1101 ent = xmlParseStringEntityRef(ctxt, &str);
1102 if ((ent != NULL) &&
1103 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1104 if (ent->content != NULL) {
1105 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1106 } else {
1107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1108 ctxt->sax->error(ctxt->userData,
1109 "internal error entity has no content\n");
1110 }
1111 } else if ((ent != NULL) && (ent->content != NULL)) {
1112 xmlChar *rep;
1113
1114 ctxt->depth++;
1115 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1116 0, 0, 0);
1117 ctxt->depth--;
1118 if (rep != NULL) {
1119 current = rep;
1120 while (*current != 0) { /* non input consuming loop */
1121 buffer[nbchars++] = *current++;
1122 if (nbchars >
1123 buffer_size - XML_PARSER_BUFFER_SIZE) {
1124 growBuffer(buffer);
1125 }
1126 }
1127 xmlFree(rep);
1128 }
1129 } else if (ent != NULL) {
1130 int i = xmlStrlen(ent->name);
1131 const xmlChar *cur = ent->name;
1132
1133 buffer[nbchars++] = '&';
1134 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1135 growBuffer(buffer);
1136 }
1137 for (;i > 0;i--)
1138 buffer[nbchars++] = *cur++;
1139 buffer[nbchars++] = ';';
1140 }
1141 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1142 if (xmlParserDebugEntities)
1143 xmlGenericError(xmlGenericErrorContext,
1144 "String decoding PE Reference: %.30s\n", str);
1145 ent = xmlParseStringPEReference(ctxt, &str);
1146 if (ent != NULL) {
1147 xmlChar *rep;
1148
1149 ctxt->depth++;
1150 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1151 0, 0, 0);
1152 ctxt->depth--;
1153 if (rep != NULL) {
1154 current = rep;
1155 while (*current != 0) { /* non input consuming loop */
1156 buffer[nbchars++] = *current++;
1157 if (nbchars >
1158 buffer_size - XML_PARSER_BUFFER_SIZE) {
1159 growBuffer(buffer);
1160 }
1161 }
1162 xmlFree(rep);
1163 }
1164 }
1165 } else {
1166 COPY_BUF(l,buffer,nbchars,c);
1167 str += l;
1168 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1169 growBuffer(buffer);
1170 }
1171 }
1172 c = CUR_SCHAR(str, l);
1173 }
1174 buffer[nbchars++] = 0;
1175 return(buffer);
1176}
1177
1178
1179/************************************************************************
1180 * *
1181 * Commodity functions to handle xmlChars *
1182 * *
1183 ************************************************************************/
1184
1185/**
1186 * xmlStrndup:
1187 * @cur: the input xmlChar *
1188 * @len: the len of @cur
1189 *
1190 * a strndup for array of xmlChar's
1191 *
1192 * Returns a new xmlChar * or NULL
1193 */
1194xmlChar *
1195xmlStrndup(const xmlChar *cur, int len) {
1196 xmlChar *ret;
1197
1198 if ((cur == NULL) || (len < 0)) return(NULL);
1199 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1200 if (ret == NULL) {
1201 xmlGenericError(xmlGenericErrorContext,
1202 "malloc of %ld byte failed\n",
1203 (len + 1) * (long)sizeof(xmlChar));
1204 return(NULL);
1205 }
1206 memcpy(ret, cur, len * sizeof(xmlChar));
1207 ret[len] = 0;
1208 return(ret);
1209}
1210
1211/**
1212 * xmlStrdup:
1213 * @cur: the input xmlChar *
1214 *
1215 * a strdup for array of xmlChar's. Since they are supposed to be
1216 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1217 * a termination mark of '0'.
1218 *
1219 * Returns a new xmlChar * or NULL
1220 */
1221xmlChar *
1222xmlStrdup(const xmlChar *cur) {
1223 const xmlChar *p = cur;
1224
1225 if (cur == NULL) return(NULL);
1226 while (*p != 0) p++; /* non input consuming */
1227 return(xmlStrndup(cur, p - cur));
1228}
1229
1230/**
1231 * xmlCharStrndup:
1232 * @cur: the input char *
1233 * @len: the len of @cur
1234 *
1235 * a strndup for char's to xmlChar's
1236 *
1237 * Returns a new xmlChar * or NULL
1238 */
1239
1240xmlChar *
1241xmlCharStrndup(const char *cur, int len) {
1242 int i;
1243 xmlChar *ret;
1244
1245 if ((cur == NULL) || (len < 0)) return(NULL);
1246 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1247 if (ret == NULL) {
1248 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1249 (len + 1) * (long)sizeof(xmlChar));
1250 return(NULL);
1251 }
1252 for (i = 0;i < len;i++)
1253 ret[i] = (xmlChar) cur[i];
1254 ret[len] = 0;
1255 return(ret);
1256}
1257
1258/**
1259 * xmlCharStrdup:
1260 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001261 *
1262 * a strdup for char's to xmlChar's
1263 *
1264 * Returns a new xmlChar * or NULL
1265 */
1266
1267xmlChar *
1268xmlCharStrdup(const char *cur) {
1269 const char *p = cur;
1270
1271 if (cur == NULL) return(NULL);
1272 while (*p != '\0') p++; /* non input consuming */
1273 return(xmlCharStrndup(cur, p - cur));
1274}
1275
1276/**
1277 * xmlStrcmp:
1278 * @str1: the first xmlChar *
1279 * @str2: the second xmlChar *
1280 *
1281 * a strcmp for xmlChar's
1282 *
1283 * Returns the integer result of the comparison
1284 */
1285
1286int
1287xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1288 register int tmp;
1289
1290 if (str1 == str2) return(0);
1291 if (str1 == NULL) return(-1);
1292 if (str2 == NULL) return(1);
1293 do {
1294 tmp = *str1++ - *str2;
1295 if (tmp != 0) return(tmp);
1296 } while (*str2++ != 0);
1297 return 0;
1298}
1299
1300/**
1301 * xmlStrEqual:
1302 * @str1: the first xmlChar *
1303 * @str2: the second xmlChar *
1304 *
1305 * Check if both string are equal of have same content
1306 * Should be a bit more readable and faster than xmlStrEqual()
1307 *
1308 * Returns 1 if they are equal, 0 if they are different
1309 */
1310
1311int
1312xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1313 if (str1 == str2) return(1);
1314 if (str1 == NULL) return(0);
1315 if (str2 == NULL) return(0);
1316 do {
1317 if (*str1++ != *str2) return(0);
1318 } while (*str2++);
1319 return(1);
1320}
1321
1322/**
1323 * xmlStrncmp:
1324 * @str1: the first xmlChar *
1325 * @str2: the second xmlChar *
1326 * @len: the max comparison length
1327 *
1328 * a strncmp for xmlChar's
1329 *
1330 * Returns the integer result of the comparison
1331 */
1332
1333int
1334xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1335 register int tmp;
1336
1337 if (len <= 0) return(0);
1338 if (str1 == str2) return(0);
1339 if (str1 == NULL) return(-1);
1340 if (str2 == NULL) return(1);
1341 do {
1342 tmp = *str1++ - *str2;
1343 if (tmp != 0 || --len == 0) return(tmp);
1344 } while (*str2++ != 0);
1345 return 0;
1346}
1347
Daniel Veillardb44025c2001-10-11 22:55:55 +00001348static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001349 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1350 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1351 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1352 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1353 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1354 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1355 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1356 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1357 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1358 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1359 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1360 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1361 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1362 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1363 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1364 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1365 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1366 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1367 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1368 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1369 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1370 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1371 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1372 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1373 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1374 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1375 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1376 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1377 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1378 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1379 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1380 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1381};
1382
1383/**
1384 * xmlStrcasecmp:
1385 * @str1: the first xmlChar *
1386 * @str2: the second xmlChar *
1387 *
1388 * a strcasecmp for xmlChar's
1389 *
1390 * Returns the integer result of the comparison
1391 */
1392
1393int
1394xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1395 register int tmp;
1396
1397 if (str1 == str2) return(0);
1398 if (str1 == NULL) return(-1);
1399 if (str2 == NULL) return(1);
1400 do {
1401 tmp = casemap[*str1++] - casemap[*str2];
1402 if (tmp != 0) return(tmp);
1403 } while (*str2++ != 0);
1404 return 0;
1405}
1406
1407/**
1408 * xmlStrncasecmp:
1409 * @str1: the first xmlChar *
1410 * @str2: the second xmlChar *
1411 * @len: the max comparison length
1412 *
1413 * a strncasecmp for xmlChar's
1414 *
1415 * Returns the integer result of the comparison
1416 */
1417
1418int
1419xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1420 register int tmp;
1421
1422 if (len <= 0) return(0);
1423 if (str1 == str2) return(0);
1424 if (str1 == NULL) return(-1);
1425 if (str2 == NULL) return(1);
1426 do {
1427 tmp = casemap[*str1++] - casemap[*str2];
1428 if (tmp != 0 || --len == 0) return(tmp);
1429 } while (*str2++ != 0);
1430 return 0;
1431}
1432
1433/**
1434 * xmlStrchr:
1435 * @str: the xmlChar * array
1436 * @val: the xmlChar to search
1437 *
1438 * a strchr for xmlChar's
1439 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001440 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001441 */
1442
1443const xmlChar *
1444xmlStrchr(const xmlChar *str, xmlChar val) {
1445 if (str == NULL) return(NULL);
1446 while (*str != 0) { /* non input consuming */
1447 if (*str == val) return((xmlChar *) str);
1448 str++;
1449 }
1450 return(NULL);
1451}
1452
1453/**
1454 * xmlStrstr:
1455 * @str: the xmlChar * array (haystack)
1456 * @val: the xmlChar to search (needle)
1457 *
1458 * a strstr for xmlChar's
1459 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001460 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001461 */
1462
1463const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001464xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001465 int n;
1466
1467 if (str == NULL) return(NULL);
1468 if (val == NULL) return(NULL);
1469 n = xmlStrlen(val);
1470
1471 if (n == 0) return(str);
1472 while (*str != 0) { /* non input consuming */
1473 if (*str == *val) {
1474 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1475 }
1476 str++;
1477 }
1478 return(NULL);
1479}
1480
1481/**
1482 * xmlStrcasestr:
1483 * @str: the xmlChar * array (haystack)
1484 * @val: the xmlChar to search (needle)
1485 *
1486 * a case-ignoring strstr for xmlChar's
1487 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001488 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001489 */
1490
1491const xmlChar *
1492xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1493 int n;
1494
1495 if (str == NULL) return(NULL);
1496 if (val == NULL) return(NULL);
1497 n = xmlStrlen(val);
1498
1499 if (n == 0) return(str);
1500 while (*str != 0) { /* non input consuming */
1501 if (casemap[*str] == casemap[*val])
1502 if (!xmlStrncasecmp(str, val, n)) return(str);
1503 str++;
1504 }
1505 return(NULL);
1506}
1507
1508/**
1509 * xmlStrsub:
1510 * @str: the xmlChar * array (haystack)
1511 * @start: the index of the first char (zero based)
1512 * @len: the length of the substring
1513 *
1514 * Extract a substring of a given string
1515 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001516 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001517 */
1518
1519xmlChar *
1520xmlStrsub(const xmlChar *str, int start, int len) {
1521 int i;
1522
1523 if (str == NULL) return(NULL);
1524 if (start < 0) return(NULL);
1525 if (len < 0) return(NULL);
1526
1527 for (i = 0;i < start;i++) {
1528 if (*str == 0) return(NULL);
1529 str++;
1530 }
1531 if (*str == 0) return(NULL);
1532 return(xmlStrndup(str, len));
1533}
1534
1535/**
1536 * xmlStrlen:
1537 * @str: the xmlChar * array
1538 *
1539 * length of a xmlChar's string
1540 *
1541 * Returns the number of xmlChar contained in the ARRAY.
1542 */
1543
1544int
1545xmlStrlen(const xmlChar *str) {
1546 int len = 0;
1547
1548 if (str == NULL) return(0);
1549 while (*str != 0) { /* non input consuming */
1550 str++;
1551 len++;
1552 }
1553 return(len);
1554}
1555
1556/**
1557 * xmlStrncat:
1558 * @cur: the original xmlChar * array
1559 * @add: the xmlChar * array added
1560 * @len: the length of @add
1561 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001562 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001563 * first bytes of @add.
1564 *
1565 * Returns a new xmlChar *, the original @cur is reallocated if needed
1566 * and should not be freed
1567 */
1568
1569xmlChar *
1570xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1571 int size;
1572 xmlChar *ret;
1573
1574 if ((add == NULL) || (len == 0))
1575 return(cur);
1576 if (cur == NULL)
1577 return(xmlStrndup(add, len));
1578
1579 size = xmlStrlen(cur);
1580 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1581 if (ret == NULL) {
1582 xmlGenericError(xmlGenericErrorContext,
1583 "xmlStrncat: realloc of %ld byte failed\n",
1584 (size + len + 1) * (long)sizeof(xmlChar));
1585 return(cur);
1586 }
1587 memcpy(&ret[size], add, len * sizeof(xmlChar));
1588 ret[size + len] = 0;
1589 return(ret);
1590}
1591
1592/**
1593 * xmlStrcat:
1594 * @cur: the original xmlChar * array
1595 * @add: the xmlChar * array added
1596 *
1597 * a strcat for array of xmlChar's. Since they are supposed to be
1598 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1599 * a termination mark of '0'.
1600 *
1601 * Returns a new xmlChar * containing the concatenated string.
1602 */
1603xmlChar *
1604xmlStrcat(xmlChar *cur, const xmlChar *add) {
1605 const xmlChar *p = add;
1606
1607 if (add == NULL) return(cur);
1608 if (cur == NULL)
1609 return(xmlStrdup(add));
1610
1611 while (*p != 0) p++; /* non input consuming */
1612 return(xmlStrncat(cur, add, p - add));
1613}
1614
1615/************************************************************************
1616 * *
1617 * Commodity functions, cleanup needed ? *
1618 * *
1619 ************************************************************************/
1620
1621/**
1622 * areBlanks:
1623 * @ctxt: an XML parser context
1624 * @str: a xmlChar *
1625 * @len: the size of @str
1626 *
1627 * Is this a sequence of blank chars that one can ignore ?
1628 *
1629 * Returns 1 if ignorable 0 otherwise.
1630 */
1631
1632static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1633 int i, ret;
1634 xmlNodePtr lastChild;
1635
Daniel Veillard05c13a22001-09-09 08:38:09 +00001636 /*
1637 * Don't spend time trying to differentiate them, the same callback is
1638 * used !
1639 */
1640 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001641 return(0);
1642
Owen Taylor3473f882001-02-23 17:55:21 +00001643 /*
1644 * Check for xml:space value.
1645 */
1646 if (*(ctxt->space) == 1)
1647 return(0);
1648
1649 /*
1650 * Check that the string is made of blanks
1651 */
1652 for (i = 0;i < len;i++)
1653 if (!(IS_BLANK(str[i]))) return(0);
1654
1655 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001656 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001657 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001658 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001659 if (ctxt->myDoc != NULL) {
1660 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1661 if (ret == 0) return(1);
1662 if (ret == 1) return(0);
1663 }
1664
1665 /*
1666 * Otherwise, heuristic :-\
1667 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001668 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001669 if ((ctxt->node->children == NULL) &&
1670 (RAW == '<') && (NXT(1) == '/')) return(0);
1671
1672 lastChild = xmlGetLastChild(ctxt->node);
1673 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001674 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1675 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001676 } else if (xmlNodeIsText(lastChild))
1677 return(0);
1678 else if ((ctxt->node->children != NULL) &&
1679 (xmlNodeIsText(ctxt->node->children)))
1680 return(0);
1681 return(1);
1682}
1683
Owen Taylor3473f882001-02-23 17:55:21 +00001684/************************************************************************
1685 * *
1686 * Extra stuff for namespace support *
1687 * Relates to http://www.w3.org/TR/WD-xml-names *
1688 * *
1689 ************************************************************************/
1690
1691/**
1692 * xmlSplitQName:
1693 * @ctxt: an XML parser context
1694 * @name: an XML parser context
1695 * @prefix: a xmlChar **
1696 *
1697 * parse an UTF8 encoded XML qualified name string
1698 *
1699 * [NS 5] QName ::= (Prefix ':')? LocalPart
1700 *
1701 * [NS 6] Prefix ::= NCName
1702 *
1703 * [NS 7] LocalPart ::= NCName
1704 *
1705 * Returns the local part, and prefix is updated
1706 * to get the Prefix if any.
1707 */
1708
1709xmlChar *
1710xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1711 xmlChar buf[XML_MAX_NAMELEN + 5];
1712 xmlChar *buffer = NULL;
1713 int len = 0;
1714 int max = XML_MAX_NAMELEN;
1715 xmlChar *ret = NULL;
1716 const xmlChar *cur = name;
1717 int c;
1718
1719 *prefix = NULL;
1720
1721#ifndef XML_XML_NAMESPACE
1722 /* xml: prefix is not really a namespace */
1723 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1724 (cur[2] == 'l') && (cur[3] == ':'))
1725 return(xmlStrdup(name));
1726#endif
1727
1728 /* nasty but valid */
1729 if (cur[0] == ':')
1730 return(xmlStrdup(name));
1731
1732 c = *cur++;
1733 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1734 buf[len++] = c;
1735 c = *cur++;
1736 }
1737 if (len >= max) {
1738 /*
1739 * Okay someone managed to make a huge name, so he's ready to pay
1740 * for the processing speed.
1741 */
1742 max = len * 2;
1743
1744 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1745 if (buffer == NULL) {
1746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1747 ctxt->sax->error(ctxt->userData,
1748 "xmlSplitQName: out of memory\n");
1749 return(NULL);
1750 }
1751 memcpy(buffer, buf, len);
1752 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1753 if (len + 10 > max) {
1754 max *= 2;
1755 buffer = (xmlChar *) xmlRealloc(buffer,
1756 max * sizeof(xmlChar));
1757 if (buffer == NULL) {
1758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1759 ctxt->sax->error(ctxt->userData,
1760 "xmlSplitQName: out of memory\n");
1761 return(NULL);
1762 }
1763 }
1764 buffer[len++] = c;
1765 c = *cur++;
1766 }
1767 buffer[len] = 0;
1768 }
1769
1770 if (buffer == NULL)
1771 ret = xmlStrndup(buf, len);
1772 else {
1773 ret = buffer;
1774 buffer = NULL;
1775 max = XML_MAX_NAMELEN;
1776 }
1777
1778
1779 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001780 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001781 if (c == 0) return(ret);
1782 *prefix = ret;
1783 len = 0;
1784
Daniel Veillardbb284f42002-10-16 18:02:47 +00001785 /*
1786 * Check that the first character is proper to start
1787 * a new name
1788 */
1789 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1790 ((c >= 0x41) && (c <= 0x5A)) ||
1791 (c == '_') || (c == ':'))) {
1792 int l;
1793 int first = CUR_SCHAR(cur, l);
1794
1795 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001796 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1797 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001798 ctxt->sax->error(ctxt->userData,
1799 "Name %s is not XML Namespace compliant\n",
1800 name);
1801 }
1802 }
1803 cur++;
1804
Owen Taylor3473f882001-02-23 17:55:21 +00001805 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1806 buf[len++] = c;
1807 c = *cur++;
1808 }
1809 if (len >= max) {
1810 /*
1811 * Okay someone managed to make a huge name, so he's ready to pay
1812 * for the processing speed.
1813 */
1814 max = len * 2;
1815
1816 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1817 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001818 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1819 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001820 ctxt->sax->error(ctxt->userData,
1821 "xmlSplitQName: out of memory\n");
1822 return(NULL);
1823 }
1824 memcpy(buffer, buf, len);
1825 while (c != 0) { /* tested bigname2.xml */
1826 if (len + 10 > max) {
1827 max *= 2;
1828 buffer = (xmlChar *) xmlRealloc(buffer,
1829 max * sizeof(xmlChar));
1830 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001831 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1832 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001833 ctxt->sax->error(ctxt->userData,
1834 "xmlSplitQName: out of memory\n");
1835 return(NULL);
1836 }
1837 }
1838 buffer[len++] = c;
1839 c = *cur++;
1840 }
1841 buffer[len] = 0;
1842 }
1843
1844 if (buffer == NULL)
1845 ret = xmlStrndup(buf, len);
1846 else {
1847 ret = buffer;
1848 }
1849 }
1850
1851 return(ret);
1852}
1853
1854/************************************************************************
1855 * *
1856 * The parser itself *
1857 * Relates to http://www.w3.org/TR/REC-xml *
1858 * *
1859 ************************************************************************/
1860
Daniel Veillard76d66f42001-05-16 21:05:17 +00001861static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001862/**
1863 * xmlParseName:
1864 * @ctxt: an XML parser context
1865 *
1866 * parse an XML name.
1867 *
1868 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1869 * CombiningChar | Extender
1870 *
1871 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1872 *
1873 * [6] Names ::= Name (S Name)*
1874 *
1875 * Returns the Name parsed or NULL
1876 */
1877
1878xmlChar *
1879xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001880 const xmlChar *in;
1881 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001882 int count = 0;
1883
1884 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001885
1886 /*
1887 * Accelerator for simple ASCII names
1888 */
1889 in = ctxt->input->cur;
1890 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1891 ((*in >= 0x41) && (*in <= 0x5A)) ||
1892 (*in == '_') || (*in == ':')) {
1893 in++;
1894 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1895 ((*in >= 0x41) && (*in <= 0x5A)) ||
1896 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001897 (*in == '_') || (*in == '-') ||
1898 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001899 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001900 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001901 count = in - ctxt->input->cur;
1902 ret = xmlStrndup(ctxt->input->cur, count);
1903 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001904 ctxt->nbChars += count;
1905 ctxt->input->col += count;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001906 return(ret);
1907 }
1908 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001909 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001910}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001911
Daniel Veillard46de64e2002-05-29 08:21:33 +00001912/**
1913 * xmlParseNameAndCompare:
1914 * @ctxt: an XML parser context
1915 *
1916 * parse an XML name and compares for match
1917 * (specialized for endtag parsing)
1918 *
1919 *
1920 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1921 * and the name for mismatch
1922 */
1923
Daniel Veillardf4862f02002-09-10 11:13:43 +00001924static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001925xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1926 const xmlChar *cmp = other;
1927 const xmlChar *in;
1928 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001929
1930 GROW;
1931
1932 in = ctxt->input->cur;
1933 while (*in != 0 && *in == *cmp) {
1934 ++in;
1935 ++cmp;
1936 }
1937 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1938 /* success */
1939 ctxt->input->cur = in;
1940 return (xmlChar*) 1;
1941 }
1942 /* failure (or end of input buffer), check with full function */
1943 ret = xmlParseName (ctxt);
1944 if (ret != 0 && xmlStrEqual (ret, other)) {
1945 xmlFree (ret);
1946 return (xmlChar*) 1;
1947 }
1948 return ret;
1949}
1950
Daniel Veillard76d66f42001-05-16 21:05:17 +00001951static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001952xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1953 xmlChar buf[XML_MAX_NAMELEN + 5];
1954 int len = 0, l;
1955 int c;
1956 int count = 0;
1957
1958 /*
1959 * Handler for more complex cases
1960 */
1961 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001962 c = CUR_CHAR(l);
1963 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1964 (!IS_LETTER(c) && (c != '_') &&
1965 (c != ':'))) {
1966 return(NULL);
1967 }
1968
1969 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1970 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1971 (c == '.') || (c == '-') ||
1972 (c == '_') || (c == ':') ||
1973 (IS_COMBINING(c)) ||
1974 (IS_EXTENDER(c)))) {
1975 if (count++ > 100) {
1976 count = 0;
1977 GROW;
1978 }
1979 COPY_BUF(l,buf,len,c);
1980 NEXTL(l);
1981 c = CUR_CHAR(l);
1982 if (len >= XML_MAX_NAMELEN) {
1983 /*
1984 * Okay someone managed to make a huge name, so he's ready to pay
1985 * for the processing speed.
1986 */
1987 xmlChar *buffer;
1988 int max = len * 2;
1989
1990 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1991 if (buffer == NULL) {
1992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1993 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001994 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001995 return(NULL);
1996 }
1997 memcpy(buffer, buf, len);
1998 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1999 (c == '.') || (c == '-') ||
2000 (c == '_') || (c == ':') ||
2001 (IS_COMBINING(c)) ||
2002 (IS_EXTENDER(c))) {
2003 if (count++ > 100) {
2004 count = 0;
2005 GROW;
2006 }
2007 if (len + 10 > max) {
2008 max *= 2;
2009 buffer = (xmlChar *) xmlRealloc(buffer,
2010 max * sizeof(xmlChar));
2011 if (buffer == NULL) {
2012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2013 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00002014 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002015 return(NULL);
2016 }
2017 }
2018 COPY_BUF(l,buffer,len,c);
2019 NEXTL(l);
2020 c = CUR_CHAR(l);
2021 }
2022 buffer[len] = 0;
2023 return(buffer);
2024 }
2025 }
2026 return(xmlStrndup(buf, len));
2027}
2028
2029/**
2030 * xmlParseStringName:
2031 * @ctxt: an XML parser context
2032 * @str: a pointer to the string pointer (IN/OUT)
2033 *
2034 * parse an XML name.
2035 *
2036 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2037 * CombiningChar | Extender
2038 *
2039 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2040 *
2041 * [6] Names ::= Name (S Name)*
2042 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002043 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002044 * is updated to the current location in the string.
2045 */
2046
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002047static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002048xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2049 xmlChar buf[XML_MAX_NAMELEN + 5];
2050 const xmlChar *cur = *str;
2051 int len = 0, l;
2052 int c;
2053
2054 c = CUR_SCHAR(cur, l);
2055 if (!IS_LETTER(c) && (c != '_') &&
2056 (c != ':')) {
2057 return(NULL);
2058 }
2059
2060 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2061 (c == '.') || (c == '-') ||
2062 (c == '_') || (c == ':') ||
2063 (IS_COMBINING(c)) ||
2064 (IS_EXTENDER(c))) {
2065 COPY_BUF(l,buf,len,c);
2066 cur += l;
2067 c = CUR_SCHAR(cur, l);
2068 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2069 /*
2070 * Okay someone managed to make a huge name, so he's ready to pay
2071 * for the processing speed.
2072 */
2073 xmlChar *buffer;
2074 int max = len * 2;
2075
2076 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2077 if (buffer == NULL) {
2078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2079 ctxt->sax->error(ctxt->userData,
2080 "xmlParseStringName: out of memory\n");
2081 return(NULL);
2082 }
2083 memcpy(buffer, buf, len);
2084 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2085 (c == '.') || (c == '-') ||
2086 (c == '_') || (c == ':') ||
2087 (IS_COMBINING(c)) ||
2088 (IS_EXTENDER(c))) {
2089 if (len + 10 > max) {
2090 max *= 2;
2091 buffer = (xmlChar *) xmlRealloc(buffer,
2092 max * sizeof(xmlChar));
2093 if (buffer == NULL) {
2094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2095 ctxt->sax->error(ctxt->userData,
2096 "xmlParseStringName: out of memory\n");
2097 return(NULL);
2098 }
2099 }
2100 COPY_BUF(l,buffer,len,c);
2101 cur += l;
2102 c = CUR_SCHAR(cur, l);
2103 }
2104 buffer[len] = 0;
2105 *str = cur;
2106 return(buffer);
2107 }
2108 }
2109 *str = cur;
2110 return(xmlStrndup(buf, len));
2111}
2112
2113/**
2114 * xmlParseNmtoken:
2115 * @ctxt: an XML parser context
2116 *
2117 * parse an XML Nmtoken.
2118 *
2119 * [7] Nmtoken ::= (NameChar)+
2120 *
2121 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2122 *
2123 * Returns the Nmtoken parsed or NULL
2124 */
2125
2126xmlChar *
2127xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2128 xmlChar buf[XML_MAX_NAMELEN + 5];
2129 int len = 0, l;
2130 int c;
2131 int count = 0;
2132
2133 GROW;
2134 c = CUR_CHAR(l);
2135
2136 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2137 (c == '.') || (c == '-') ||
2138 (c == '_') || (c == ':') ||
2139 (IS_COMBINING(c)) ||
2140 (IS_EXTENDER(c))) {
2141 if (count++ > 100) {
2142 count = 0;
2143 GROW;
2144 }
2145 COPY_BUF(l,buf,len,c);
2146 NEXTL(l);
2147 c = CUR_CHAR(l);
2148 if (len >= XML_MAX_NAMELEN) {
2149 /*
2150 * Okay someone managed to make a huge token, so he's ready to pay
2151 * for the processing speed.
2152 */
2153 xmlChar *buffer;
2154 int max = len * 2;
2155
2156 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2157 if (buffer == NULL) {
2158 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2159 ctxt->sax->error(ctxt->userData,
2160 "xmlParseNmtoken: out of memory\n");
2161 return(NULL);
2162 }
2163 memcpy(buffer, buf, len);
2164 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2165 (c == '.') || (c == '-') ||
2166 (c == '_') || (c == ':') ||
2167 (IS_COMBINING(c)) ||
2168 (IS_EXTENDER(c))) {
2169 if (count++ > 100) {
2170 count = 0;
2171 GROW;
2172 }
2173 if (len + 10 > max) {
2174 max *= 2;
2175 buffer = (xmlChar *) xmlRealloc(buffer,
2176 max * sizeof(xmlChar));
2177 if (buffer == NULL) {
2178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2179 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002180 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002181 return(NULL);
2182 }
2183 }
2184 COPY_BUF(l,buffer,len,c);
2185 NEXTL(l);
2186 c = CUR_CHAR(l);
2187 }
2188 buffer[len] = 0;
2189 return(buffer);
2190 }
2191 }
2192 if (len == 0)
2193 return(NULL);
2194 return(xmlStrndup(buf, len));
2195}
2196
2197/**
2198 * xmlParseEntityValue:
2199 * @ctxt: an XML parser context
2200 * @orig: if non-NULL store a copy of the original entity value
2201 *
2202 * parse a value for ENTITY declarations
2203 *
2204 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2205 * "'" ([^%&'] | PEReference | Reference)* "'"
2206 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002207 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002208 */
2209
2210xmlChar *
2211xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2212 xmlChar *buf = NULL;
2213 int len = 0;
2214 int size = XML_PARSER_BUFFER_SIZE;
2215 int c, l;
2216 xmlChar stop;
2217 xmlChar *ret = NULL;
2218 const xmlChar *cur = NULL;
2219 xmlParserInputPtr input;
2220
2221 if (RAW == '"') stop = '"';
2222 else if (RAW == '\'') stop = '\'';
2223 else {
2224 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2226 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2227 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002228 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002229 return(NULL);
2230 }
2231 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2232 if (buf == NULL) {
2233 xmlGenericError(xmlGenericErrorContext,
2234 "malloc of %d byte failed\n", size);
2235 return(NULL);
2236 }
2237
2238 /*
2239 * The content of the entity definition is copied in a buffer.
2240 */
2241
2242 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2243 input = ctxt->input;
2244 GROW;
2245 NEXT;
2246 c = CUR_CHAR(l);
2247 /*
2248 * NOTE: 4.4.5 Included in Literal
2249 * When a parameter entity reference appears in a literal entity
2250 * value, ... a single or double quote character in the replacement
2251 * text is always treated as a normal data character and will not
2252 * terminate the literal.
2253 * In practice it means we stop the loop only when back at parsing
2254 * the initial entity and the quote is found
2255 */
2256 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2257 (ctxt->input != input))) {
2258 if (len + 5 >= size) {
2259 size *= 2;
2260 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2261 if (buf == NULL) {
2262 xmlGenericError(xmlGenericErrorContext,
2263 "realloc of %d byte failed\n", size);
2264 return(NULL);
2265 }
2266 }
2267 COPY_BUF(l,buf,len,c);
2268 NEXTL(l);
2269 /*
2270 * Pop-up of finished entities.
2271 */
2272 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2273 xmlPopInput(ctxt);
2274
2275 GROW;
2276 c = CUR_CHAR(l);
2277 if (c == 0) {
2278 GROW;
2279 c = CUR_CHAR(l);
2280 }
2281 }
2282 buf[len] = 0;
2283
2284 /*
2285 * Raise problem w.r.t. '&' and '%' being used in non-entities
2286 * reference constructs. Note Charref will be handled in
2287 * xmlStringDecodeEntities()
2288 */
2289 cur = buf;
2290 while (*cur != 0) { /* non input consuming */
2291 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2292 xmlChar *name;
2293 xmlChar tmp = *cur;
2294
2295 cur++;
2296 name = xmlParseStringName(ctxt, &cur);
2297 if ((name == NULL) || (*cur != ';')) {
2298 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2300 ctxt->sax->error(ctxt->userData,
2301 "EntityValue: '%c' forbidden except for entities references\n",
2302 tmp);
2303 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002305 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002306 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2307 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002308 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2310 ctxt->sax->error(ctxt->userData,
2311 "EntityValue: PEReferences forbidden in internal subset\n",
2312 tmp);
2313 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002314 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002315 }
2316 if (name != NULL)
2317 xmlFree(name);
2318 }
2319 cur++;
2320 }
2321
2322 /*
2323 * Then PEReference entities are substituted.
2324 */
2325 if (c != stop) {
2326 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2328 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2329 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002330 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002331 xmlFree(buf);
2332 } else {
2333 NEXT;
2334 /*
2335 * NOTE: 4.4.7 Bypassed
2336 * When a general entity reference appears in the EntityValue in
2337 * an entity declaration, it is bypassed and left as is.
2338 * so XML_SUBSTITUTE_REF is not set here.
2339 */
2340 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2341 0, 0, 0);
2342 if (orig != NULL)
2343 *orig = buf;
2344 else
2345 xmlFree(buf);
2346 }
2347
2348 return(ret);
2349}
2350
2351/**
2352 * xmlParseAttValue:
2353 * @ctxt: an XML parser context
2354 *
2355 * parse a value for an attribute
2356 * Note: the parser won't do substitution of entities here, this
2357 * will be handled later in xmlStringGetNodeList
2358 *
2359 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2360 * "'" ([^<&'] | Reference)* "'"
2361 *
2362 * 3.3.3 Attribute-Value Normalization:
2363 * Before the value of an attribute is passed to the application or
2364 * checked for validity, the XML processor must normalize it as follows:
2365 * - a character reference is processed by appending the referenced
2366 * character to the attribute value
2367 * - an entity reference is processed by recursively processing the
2368 * replacement text of the entity
2369 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2370 * appending #x20 to the normalized value, except that only a single
2371 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2372 * parsed entity or the literal entity value of an internal parsed entity
2373 * - other characters are processed by appending them to the normalized value
2374 * If the declared value is not CDATA, then the XML processor must further
2375 * process the normalized attribute value by discarding any leading and
2376 * trailing space (#x20) characters, and by replacing sequences of space
2377 * (#x20) characters by a single space (#x20) character.
2378 * All attributes for which no declaration has been read should be treated
2379 * by a non-validating parser as if declared CDATA.
2380 *
2381 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2382 */
2383
2384xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002385xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2386
2387xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002388xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2389 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002390 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002391 xmlChar *ret = NULL;
2392 SHRINK;
2393 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002394 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002395 if (*in != '"' && *in != '\'') {
2396 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2398 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2399 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002400 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002401 return(NULL);
2402 }
2403 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2404 limit = *in;
2405 ++in;
2406
2407 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2408 *in != '&' && *in != '<'
2409 ) {
2410 ++in;
2411 }
2412 if (*in != limit) {
2413 return xmlParseAttValueComplex(ctxt);
2414 }
2415 ++in;
2416 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2417 CUR_PTR = in;
2418 return ret;
2419}
2420
Daniel Veillard01c13b52002-12-10 15:19:08 +00002421/**
2422 * xmlParseAttValueComplex:
2423 * @ctxt: an XML parser context
2424 *
2425 * parse a value for an attribute, this is the fallback function
2426 * of xmlParseAttValue() when the attribute parsing requires handling
2427 * of non-ASCII characters.
2428 *
2429 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2430 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002431xmlChar *
2432xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2433 xmlChar limit = 0;
2434 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002435 int len = 0;
2436 int buf_size = 0;
2437 int c, l;
2438 xmlChar *current = NULL;
2439 xmlEntityPtr ent;
2440
2441
2442 SHRINK;
2443 if (NXT(0) == '"') {
2444 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2445 limit = '"';
2446 NEXT;
2447 } else if (NXT(0) == '\'') {
2448 limit = '\'';
2449 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2450 NEXT;
2451 } else {
2452 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2454 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2455 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002456 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002457 return(NULL);
2458 }
2459
2460 /*
2461 * allocate a translation buffer.
2462 */
2463 buf_size = XML_PARSER_BUFFER_SIZE;
2464 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2465 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002466 xmlGenericError(xmlGenericErrorContext,
2467 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002468 return(NULL);
2469 }
2470
2471 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002472 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002473 */
2474 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002475 while ((NXT(0) != limit) && /* checked */
2476 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002477 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002478 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002479 if (NXT(1) == '#') {
2480 int val = xmlParseCharRef(ctxt);
2481 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002482 if (ctxt->replaceEntities) {
2483 if (len > buf_size - 10) {
2484 growBuffer(buf);
2485 }
2486 buf[len++] = '&';
2487 } else {
2488 /*
2489 * The reparsing will be done in xmlStringGetNodeList()
2490 * called by the attribute() function in SAX.c
2491 */
2492 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002493
Daniel Veillard319a7422001-09-11 09:27:09 +00002494 if (len > buf_size - 10) {
2495 growBuffer(buf);
2496 }
2497 current = &buffer[0];
2498 while (*current != 0) { /* non input consuming */
2499 buf[len++] = *current++;
2500 }
Owen Taylor3473f882001-02-23 17:55:21 +00002501 }
2502 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002503 if (len > buf_size - 10) {
2504 growBuffer(buf);
2505 }
Owen Taylor3473f882001-02-23 17:55:21 +00002506 len += xmlCopyChar(0, &buf[len], val);
2507 }
2508 } else {
2509 ent = xmlParseEntityRef(ctxt);
2510 if ((ent != NULL) &&
2511 (ctxt->replaceEntities != 0)) {
2512 xmlChar *rep;
2513
2514 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2515 rep = xmlStringDecodeEntities(ctxt, ent->content,
2516 XML_SUBSTITUTE_REF, 0, 0, 0);
2517 if (rep != NULL) {
2518 current = rep;
2519 while (*current != 0) { /* non input consuming */
2520 buf[len++] = *current++;
2521 if (len > buf_size - 10) {
2522 growBuffer(buf);
2523 }
2524 }
2525 xmlFree(rep);
2526 }
2527 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002528 if (len > buf_size - 10) {
2529 growBuffer(buf);
2530 }
Owen Taylor3473f882001-02-23 17:55:21 +00002531 if (ent->content != NULL)
2532 buf[len++] = ent->content[0];
2533 }
2534 } else if (ent != NULL) {
2535 int i = xmlStrlen(ent->name);
2536 const xmlChar *cur = ent->name;
2537
2538 /*
2539 * This may look absurd but is needed to detect
2540 * entities problems
2541 */
2542 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2543 (ent->content != NULL)) {
2544 xmlChar *rep;
2545 rep = xmlStringDecodeEntities(ctxt, ent->content,
2546 XML_SUBSTITUTE_REF, 0, 0, 0);
2547 if (rep != NULL)
2548 xmlFree(rep);
2549 }
2550
2551 /*
2552 * Just output the reference
2553 */
2554 buf[len++] = '&';
2555 if (len > buf_size - i - 10) {
2556 growBuffer(buf);
2557 }
2558 for (;i > 0;i--)
2559 buf[len++] = *cur++;
2560 buf[len++] = ';';
2561 }
2562 }
2563 } else {
2564 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2565 COPY_BUF(l,buf,len,0x20);
2566 if (len > buf_size - 10) {
2567 growBuffer(buf);
2568 }
2569 } else {
2570 COPY_BUF(l,buf,len,c);
2571 if (len > buf_size - 10) {
2572 growBuffer(buf);
2573 }
2574 }
2575 NEXTL(l);
2576 }
2577 GROW;
2578 c = CUR_CHAR(l);
2579 }
2580 buf[len++] = 0;
2581 if (RAW == '<') {
2582 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2584 ctxt->sax->error(ctxt->userData,
2585 "Unescaped '<' not allowed in attributes values\n");
2586 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002587 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002588 } else if (RAW != limit) {
2589 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2591 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2592 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002593 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002594 } else
2595 NEXT;
2596 return(buf);
2597}
2598
2599/**
2600 * xmlParseSystemLiteral:
2601 * @ctxt: an XML parser context
2602 *
2603 * parse an XML Literal
2604 *
2605 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2606 *
2607 * Returns the SystemLiteral parsed or NULL
2608 */
2609
2610xmlChar *
2611xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2612 xmlChar *buf = NULL;
2613 int len = 0;
2614 int size = XML_PARSER_BUFFER_SIZE;
2615 int cur, l;
2616 xmlChar stop;
2617 int state = ctxt->instate;
2618 int count = 0;
2619
2620 SHRINK;
2621 if (RAW == '"') {
2622 NEXT;
2623 stop = '"';
2624 } else if (RAW == '\'') {
2625 NEXT;
2626 stop = '\'';
2627 } else {
2628 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2630 ctxt->sax->error(ctxt->userData,
2631 "SystemLiteral \" or ' expected\n");
2632 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002633 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002634 return(NULL);
2635 }
2636
2637 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2638 if (buf == NULL) {
2639 xmlGenericError(xmlGenericErrorContext,
2640 "malloc of %d byte failed\n", size);
2641 return(NULL);
2642 }
2643 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2644 cur = CUR_CHAR(l);
2645 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2646 if (len + 5 >= size) {
2647 size *= 2;
2648 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2649 if (buf == NULL) {
2650 xmlGenericError(xmlGenericErrorContext,
2651 "realloc of %d byte failed\n", size);
2652 ctxt->instate = (xmlParserInputState) state;
2653 return(NULL);
2654 }
2655 }
2656 count++;
2657 if (count > 50) {
2658 GROW;
2659 count = 0;
2660 }
2661 COPY_BUF(l,buf,len,cur);
2662 NEXTL(l);
2663 cur = CUR_CHAR(l);
2664 if (cur == 0) {
2665 GROW;
2666 SHRINK;
2667 cur = CUR_CHAR(l);
2668 }
2669 }
2670 buf[len] = 0;
2671 ctxt->instate = (xmlParserInputState) state;
2672 if (!IS_CHAR(cur)) {
2673 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2675 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2676 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002677 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002678 } else {
2679 NEXT;
2680 }
2681 return(buf);
2682}
2683
2684/**
2685 * xmlParsePubidLiteral:
2686 * @ctxt: an XML parser context
2687 *
2688 * parse an XML public literal
2689 *
2690 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2691 *
2692 * Returns the PubidLiteral parsed or NULL.
2693 */
2694
2695xmlChar *
2696xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2697 xmlChar *buf = NULL;
2698 int len = 0;
2699 int size = XML_PARSER_BUFFER_SIZE;
2700 xmlChar cur;
2701 xmlChar stop;
2702 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002703 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002704
2705 SHRINK;
2706 if (RAW == '"') {
2707 NEXT;
2708 stop = '"';
2709 } else if (RAW == '\'') {
2710 NEXT;
2711 stop = '\'';
2712 } else {
2713 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2715 ctxt->sax->error(ctxt->userData,
2716 "SystemLiteral \" or ' expected\n");
2717 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002718 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002719 return(NULL);
2720 }
2721 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2722 if (buf == NULL) {
2723 xmlGenericError(xmlGenericErrorContext,
2724 "malloc of %d byte failed\n", size);
2725 return(NULL);
2726 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002727 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002728 cur = CUR;
2729 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2730 if (len + 1 >= size) {
2731 size *= 2;
2732 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2733 if (buf == NULL) {
2734 xmlGenericError(xmlGenericErrorContext,
2735 "realloc of %d byte failed\n", size);
2736 return(NULL);
2737 }
2738 }
2739 buf[len++] = cur;
2740 count++;
2741 if (count > 50) {
2742 GROW;
2743 count = 0;
2744 }
2745 NEXT;
2746 cur = CUR;
2747 if (cur == 0) {
2748 GROW;
2749 SHRINK;
2750 cur = CUR;
2751 }
2752 }
2753 buf[len] = 0;
2754 if (cur != stop) {
2755 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2757 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2758 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002759 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002760 } else {
2761 NEXT;
2762 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002763 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002764 return(buf);
2765}
2766
Daniel Veillard48b2f892001-02-25 16:11:03 +00002767void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002768/**
2769 * xmlParseCharData:
2770 * @ctxt: an XML parser context
2771 * @cdata: int indicating whether we are within a CDATA section
2772 *
2773 * parse a CharData section.
2774 * if we are within a CDATA section ']]>' marks an end of section.
2775 *
2776 * The right angle bracket (>) may be represented using the string "&gt;",
2777 * and must, for compatibility, be escaped using "&gt;" or a character
2778 * reference when it appears in the string "]]>" in content, when that
2779 * string is not marking the end of a CDATA section.
2780 *
2781 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2782 */
2783
2784void
2785xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002786 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002787 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002788 int line = ctxt->input->line;
2789 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002790
2791 SHRINK;
2792 GROW;
2793 /*
2794 * Accelerated common case where input don't need to be
2795 * modified before passing it to the handler.
2796 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002797 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002798 in = ctxt->input->cur;
2799 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002800get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002801 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2802 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002803 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002804 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002805 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002806 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002807 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002808 ctxt->input->line++;
2809 in++;
2810 }
2811 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002812 }
2813 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002814 if ((in[1] == ']') && (in[2] == '>')) {
2815 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2817 ctxt->sax->error(ctxt->userData,
2818 "Sequence ']]>' not allowed in content\n");
2819 ctxt->input->cur = in;
2820 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002821 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002822 return;
2823 }
2824 in++;
2825 goto get_more;
2826 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002827 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002828 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002829 if (IS_BLANK(*ctxt->input->cur)) {
2830 const xmlChar *tmp = ctxt->input->cur;
2831 ctxt->input->cur = in;
2832 if (areBlanks(ctxt, tmp, nbchar)) {
2833 if (ctxt->sax->ignorableWhitespace != NULL)
2834 ctxt->sax->ignorableWhitespace(ctxt->userData,
2835 tmp, nbchar);
2836 } else {
2837 if (ctxt->sax->characters != NULL)
2838 ctxt->sax->characters(ctxt->userData,
2839 tmp, nbchar);
2840 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002841 line = ctxt->input->line;
2842 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002843 } else {
2844 if (ctxt->sax->characters != NULL)
2845 ctxt->sax->characters(ctxt->userData,
2846 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002847 line = ctxt->input->line;
2848 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002849 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002850 }
2851 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002852 if (*in == 0xD) {
2853 in++;
2854 if (*in == 0xA) {
2855 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002856 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002857 ctxt->input->line++;
2858 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002859 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002860 in--;
2861 }
2862 if (*in == '<') {
2863 return;
2864 }
2865 if (*in == '&') {
2866 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002867 }
2868 SHRINK;
2869 GROW;
2870 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002871 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002872 nbchar = 0;
2873 }
Daniel Veillard50582112001-03-26 22:52:16 +00002874 ctxt->input->line = line;
2875 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002876 xmlParseCharDataComplex(ctxt, cdata);
2877}
2878
Daniel Veillard01c13b52002-12-10 15:19:08 +00002879/**
2880 * xmlParseCharDataComplex:
2881 * @ctxt: an XML parser context
2882 * @cdata: int indicating whether we are within a CDATA section
2883 *
2884 * parse a CharData section.this is the fallback function
2885 * of xmlParseCharData() when the parsing requires handling
2886 * of non-ASCII characters.
2887 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002888void
2889xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002890 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2891 int nbchar = 0;
2892 int cur, l;
2893 int count = 0;
2894
2895 SHRINK;
2896 GROW;
2897 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002898 while ((cur != '<') && /* checked */
2899 (cur != '&') &&
2900 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002901 if ((cur == ']') && (NXT(1) == ']') &&
2902 (NXT(2) == '>')) {
2903 if (cdata) break;
2904 else {
2905 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2907 ctxt->sax->error(ctxt->userData,
2908 "Sequence ']]>' not allowed in content\n");
2909 /* Should this be relaxed ??? I see a "must here */
2910 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002911 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002912 }
2913 }
2914 COPY_BUF(l,buf,nbchar,cur);
2915 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2916 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002917 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002918 */
2919 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2920 if (areBlanks(ctxt, buf, nbchar)) {
2921 if (ctxt->sax->ignorableWhitespace != NULL)
2922 ctxt->sax->ignorableWhitespace(ctxt->userData,
2923 buf, nbchar);
2924 } else {
2925 if (ctxt->sax->characters != NULL)
2926 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2927 }
2928 }
2929 nbchar = 0;
2930 }
2931 count++;
2932 if (count > 50) {
2933 GROW;
2934 count = 0;
2935 }
2936 NEXTL(l);
2937 cur = CUR_CHAR(l);
2938 }
2939 if (nbchar != 0) {
2940 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002941 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002942 */
2943 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2944 if (areBlanks(ctxt, buf, nbchar)) {
2945 if (ctxt->sax->ignorableWhitespace != NULL)
2946 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2947 } else {
2948 if (ctxt->sax->characters != NULL)
2949 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2950 }
2951 }
2952 }
2953}
2954
2955/**
2956 * xmlParseExternalID:
2957 * @ctxt: an XML parser context
2958 * @publicID: a xmlChar** receiving PubidLiteral
2959 * @strict: indicate whether we should restrict parsing to only
2960 * production [75], see NOTE below
2961 *
2962 * Parse an External ID or a Public ID
2963 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002964 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002965 * 'PUBLIC' S PubidLiteral S SystemLiteral
2966 *
2967 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2968 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2969 *
2970 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2971 *
2972 * Returns the function returns SystemLiteral and in the second
2973 * case publicID receives PubidLiteral, is strict is off
2974 * it is possible to return NULL and have publicID set.
2975 */
2976
2977xmlChar *
2978xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2979 xmlChar *URI = NULL;
2980
2981 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002982
2983 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002984 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2985 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2986 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2987 SKIP(6);
2988 if (!IS_BLANK(CUR)) {
2989 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2991 ctxt->sax->error(ctxt->userData,
2992 "Space required after 'SYSTEM'\n");
2993 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002994 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002995 }
2996 SKIP_BLANKS;
2997 URI = xmlParseSystemLiteral(ctxt);
2998 if (URI == NULL) {
2999 ctxt->errNo = XML_ERR_URI_REQUIRED;
3000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3001 ctxt->sax->error(ctxt->userData,
3002 "xmlParseExternalID: SYSTEM, no URI\n");
3003 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003004 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003005 }
3006 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3007 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3008 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3009 SKIP(6);
3010 if (!IS_BLANK(CUR)) {
3011 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3013 ctxt->sax->error(ctxt->userData,
3014 "Space required after 'PUBLIC'\n");
3015 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003016 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003017 }
3018 SKIP_BLANKS;
3019 *publicID = xmlParsePubidLiteral(ctxt);
3020 if (*publicID == NULL) {
3021 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
3022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3023 ctxt->sax->error(ctxt->userData,
3024 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
3025 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003026 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003027 }
3028 if (strict) {
3029 /*
3030 * We don't handle [83] so "S SystemLiteral" is required.
3031 */
3032 if (!IS_BLANK(CUR)) {
3033 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3035 ctxt->sax->error(ctxt->userData,
3036 "Space required after the Public Identifier\n");
3037 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003038 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003039 }
3040 } else {
3041 /*
3042 * We handle [83] so we return immediately, if
3043 * "S SystemLiteral" is not detected. From a purely parsing
3044 * point of view that's a nice mess.
3045 */
3046 const xmlChar *ptr;
3047 GROW;
3048
3049 ptr = CUR_PTR;
3050 if (!IS_BLANK(*ptr)) return(NULL);
3051
3052 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3053 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3054 }
3055 SKIP_BLANKS;
3056 URI = xmlParseSystemLiteral(ctxt);
3057 if (URI == NULL) {
3058 ctxt->errNo = XML_ERR_URI_REQUIRED;
3059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3060 ctxt->sax->error(ctxt->userData,
3061 "xmlParseExternalID: PUBLIC, no URI\n");
3062 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003063 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003064 }
3065 }
3066 return(URI);
3067}
3068
3069/**
3070 * xmlParseComment:
3071 * @ctxt: an XML parser context
3072 *
3073 * Skip an XML (SGML) comment <!-- .... -->
3074 * The spec says that "For compatibility, the string "--" (double-hyphen)
3075 * must not occur within comments. "
3076 *
3077 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3078 */
3079void
3080xmlParseComment(xmlParserCtxtPtr ctxt) {
3081 xmlChar *buf = NULL;
3082 int len;
3083 int size = XML_PARSER_BUFFER_SIZE;
3084 int q, ql;
3085 int r, rl;
3086 int cur, l;
3087 xmlParserInputState state;
3088 xmlParserInputPtr input = ctxt->input;
3089 int count = 0;
3090
3091 /*
3092 * Check that there is a comment right here.
3093 */
3094 if ((RAW != '<') || (NXT(1) != '!') ||
3095 (NXT(2) != '-') || (NXT(3) != '-')) return;
3096
3097 state = ctxt->instate;
3098 ctxt->instate = XML_PARSER_COMMENT;
3099 SHRINK;
3100 SKIP(4);
3101 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3102 if (buf == NULL) {
3103 xmlGenericError(xmlGenericErrorContext,
3104 "malloc of %d byte failed\n", size);
3105 ctxt->instate = state;
3106 return;
3107 }
3108 q = CUR_CHAR(ql);
3109 NEXTL(ql);
3110 r = CUR_CHAR(rl);
3111 NEXTL(rl);
3112 cur = CUR_CHAR(l);
3113 len = 0;
3114 while (IS_CHAR(cur) && /* checked */
3115 ((cur != '>') ||
3116 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003117 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003118 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3119 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3120 ctxt->sax->error(ctxt->userData,
3121 "Comment must not contain '--' (double-hyphen)`\n");
3122 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003123 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003124 }
3125 if (len + 5 >= size) {
3126 size *= 2;
3127 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3128 if (buf == NULL) {
3129 xmlGenericError(xmlGenericErrorContext,
3130 "realloc of %d byte failed\n", size);
3131 ctxt->instate = state;
3132 return;
3133 }
3134 }
3135 COPY_BUF(ql,buf,len,q);
3136 q = r;
3137 ql = rl;
3138 r = cur;
3139 rl = l;
3140
3141 count++;
3142 if (count > 50) {
3143 GROW;
3144 count = 0;
3145 }
3146 NEXTL(l);
3147 cur = CUR_CHAR(l);
3148 if (cur == 0) {
3149 SHRINK;
3150 GROW;
3151 cur = CUR_CHAR(l);
3152 }
3153 }
3154 buf[len] = 0;
3155 if (!IS_CHAR(cur)) {
3156 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3158 ctxt->sax->error(ctxt->userData,
3159 "Comment not terminated \n<!--%.50s\n", buf);
3160 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003161 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003162 xmlFree(buf);
3163 } else {
3164 if (input != ctxt->input) {
3165 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3167 ctxt->sax->error(ctxt->userData,
3168"Comment doesn't start and stop in the same entity\n");
3169 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003170 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003171 }
3172 NEXT;
3173 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3174 (!ctxt->disableSAX))
3175 ctxt->sax->comment(ctxt->userData, buf);
3176 xmlFree(buf);
3177 }
3178 ctxt->instate = state;
3179}
3180
3181/**
3182 * xmlParsePITarget:
3183 * @ctxt: an XML parser context
3184 *
3185 * parse the name of a PI
3186 *
3187 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3188 *
3189 * Returns the PITarget name or NULL
3190 */
3191
3192xmlChar *
3193xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3194 xmlChar *name;
3195
3196 name = xmlParseName(ctxt);
3197 if ((name != NULL) &&
3198 ((name[0] == 'x') || (name[0] == 'X')) &&
3199 ((name[1] == 'm') || (name[1] == 'M')) &&
3200 ((name[2] == 'l') || (name[2] == 'L'))) {
3201 int i;
3202 if ((name[0] == 'x') && (name[1] == 'm') &&
3203 (name[2] == 'l') && (name[3] == 0)) {
3204 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3206 ctxt->sax->error(ctxt->userData,
3207 "XML declaration allowed only at the start of the document\n");
3208 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003209 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003210 return(name);
3211 } else if (name[3] == 0) {
3212 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3214 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3215 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003216 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003217 return(name);
3218 }
3219 for (i = 0;;i++) {
3220 if (xmlW3CPIs[i] == NULL) break;
3221 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3222 return(name);
3223 }
3224 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3225 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3226 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003227 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003228 }
3229 }
3230 return(name);
3231}
3232
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003233#ifdef LIBXML_CATALOG_ENABLED
3234/**
3235 * xmlParseCatalogPI:
3236 * @ctxt: an XML parser context
3237 * @catalog: the PI value string
3238 *
3239 * parse an XML Catalog Processing Instruction.
3240 *
3241 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3242 *
3243 * Occurs only if allowed by the user and if happening in the Misc
3244 * part of the document before any doctype informations
3245 * This will add the given catalog to the parsing context in order
3246 * to be used if there is a resolution need further down in the document
3247 */
3248
3249static void
3250xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3251 xmlChar *URL = NULL;
3252 const xmlChar *tmp, *base;
3253 xmlChar marker;
3254
3255 tmp = catalog;
3256 while (IS_BLANK(*tmp)) tmp++;
3257 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3258 goto error;
3259 tmp += 7;
3260 while (IS_BLANK(*tmp)) tmp++;
3261 if (*tmp != '=') {
3262 return;
3263 }
3264 tmp++;
3265 while (IS_BLANK(*tmp)) tmp++;
3266 marker = *tmp;
3267 if ((marker != '\'') && (marker != '"'))
3268 goto error;
3269 tmp++;
3270 base = tmp;
3271 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3272 if (*tmp == 0)
3273 goto error;
3274 URL = xmlStrndup(base, tmp - base);
3275 tmp++;
3276 while (IS_BLANK(*tmp)) tmp++;
3277 if (*tmp != 0)
3278 goto error;
3279
3280 if (URL != NULL) {
3281 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3282 xmlFree(URL);
3283 }
3284 return;
3285
3286error:
3287 ctxt->errNo = XML_WAR_CATALOG_PI;
3288 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3289 ctxt->sax->warning(ctxt->userData,
3290 "Catalog PI syntax error: %s\n", catalog);
3291 if (URL != NULL)
3292 xmlFree(URL);
3293}
3294#endif
3295
Owen Taylor3473f882001-02-23 17:55:21 +00003296/**
3297 * xmlParsePI:
3298 * @ctxt: an XML parser context
3299 *
3300 * parse an XML Processing Instruction.
3301 *
3302 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3303 *
3304 * The processing is transfered to SAX once parsed.
3305 */
3306
3307void
3308xmlParsePI(xmlParserCtxtPtr ctxt) {
3309 xmlChar *buf = NULL;
3310 int len = 0;
3311 int size = XML_PARSER_BUFFER_SIZE;
3312 int cur, l;
3313 xmlChar *target;
3314 xmlParserInputState state;
3315 int count = 0;
3316
3317 if ((RAW == '<') && (NXT(1) == '?')) {
3318 xmlParserInputPtr input = ctxt->input;
3319 state = ctxt->instate;
3320 ctxt->instate = XML_PARSER_PI;
3321 /*
3322 * this is a Processing Instruction.
3323 */
3324 SKIP(2);
3325 SHRINK;
3326
3327 /*
3328 * Parse the target name and check for special support like
3329 * namespace.
3330 */
3331 target = xmlParsePITarget(ctxt);
3332 if (target != NULL) {
3333 if ((RAW == '?') && (NXT(1) == '>')) {
3334 if (input != ctxt->input) {
3335 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3337 ctxt->sax->error(ctxt->userData,
3338 "PI declaration doesn't start and stop in the same entity\n");
3339 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003340 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003341 }
3342 SKIP(2);
3343
3344 /*
3345 * SAX: PI detected.
3346 */
3347 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3348 (ctxt->sax->processingInstruction != NULL))
3349 ctxt->sax->processingInstruction(ctxt->userData,
3350 target, NULL);
3351 ctxt->instate = state;
3352 xmlFree(target);
3353 return;
3354 }
3355 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3356 if (buf == NULL) {
3357 xmlGenericError(xmlGenericErrorContext,
3358 "malloc of %d byte failed\n", size);
3359 ctxt->instate = state;
3360 return;
3361 }
3362 cur = CUR;
3363 if (!IS_BLANK(cur)) {
3364 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3366 ctxt->sax->error(ctxt->userData,
3367 "xmlParsePI: PI %s space expected\n", target);
3368 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003369 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003370 }
3371 SKIP_BLANKS;
3372 cur = CUR_CHAR(l);
3373 while (IS_CHAR(cur) && /* checked */
3374 ((cur != '?') || (NXT(1) != '>'))) {
3375 if (len + 5 >= size) {
3376 size *= 2;
3377 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3378 if (buf == NULL) {
3379 xmlGenericError(xmlGenericErrorContext,
3380 "realloc of %d byte failed\n", size);
3381 ctxt->instate = state;
3382 return;
3383 }
3384 }
3385 count++;
3386 if (count > 50) {
3387 GROW;
3388 count = 0;
3389 }
3390 COPY_BUF(l,buf,len,cur);
3391 NEXTL(l);
3392 cur = CUR_CHAR(l);
3393 if (cur == 0) {
3394 SHRINK;
3395 GROW;
3396 cur = CUR_CHAR(l);
3397 }
3398 }
3399 buf[len] = 0;
3400 if (cur != '?') {
3401 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3403 ctxt->sax->error(ctxt->userData,
3404 "xmlParsePI: PI %s never end ...\n", target);
3405 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003406 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003407 } else {
3408 if (input != ctxt->input) {
3409 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3411 ctxt->sax->error(ctxt->userData,
3412 "PI declaration doesn't start and stop in the same entity\n");
3413 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003414 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003415 }
3416 SKIP(2);
3417
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003418#ifdef LIBXML_CATALOG_ENABLED
3419 if (((state == XML_PARSER_MISC) ||
3420 (state == XML_PARSER_START)) &&
3421 (xmlStrEqual(target, XML_CATALOG_PI))) {
3422 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3423 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3424 (allow == XML_CATA_ALLOW_ALL))
3425 xmlParseCatalogPI(ctxt, buf);
3426 }
3427#endif
3428
3429
Owen Taylor3473f882001-02-23 17:55:21 +00003430 /*
3431 * SAX: PI detected.
3432 */
3433 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3434 (ctxt->sax->processingInstruction != NULL))
3435 ctxt->sax->processingInstruction(ctxt->userData,
3436 target, buf);
3437 }
3438 xmlFree(buf);
3439 xmlFree(target);
3440 } else {
3441 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3443 ctxt->sax->error(ctxt->userData,
3444 "xmlParsePI : no target name\n");
3445 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003446 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003447 }
3448 ctxt->instate = state;
3449 }
3450}
3451
3452/**
3453 * xmlParseNotationDecl:
3454 * @ctxt: an XML parser context
3455 *
3456 * parse a notation declaration
3457 *
3458 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3459 *
3460 * Hence there is actually 3 choices:
3461 * 'PUBLIC' S PubidLiteral
3462 * 'PUBLIC' S PubidLiteral S SystemLiteral
3463 * and 'SYSTEM' S SystemLiteral
3464 *
3465 * See the NOTE on xmlParseExternalID().
3466 */
3467
3468void
3469xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3470 xmlChar *name;
3471 xmlChar *Pubid;
3472 xmlChar *Systemid;
3473
3474 if ((RAW == '<') && (NXT(1) == '!') &&
3475 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3476 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3477 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3478 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3479 xmlParserInputPtr input = ctxt->input;
3480 SHRINK;
3481 SKIP(10);
3482 if (!IS_BLANK(CUR)) {
3483 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3485 ctxt->sax->error(ctxt->userData,
3486 "Space required after '<!NOTATION'\n");
3487 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003488 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003489 return;
3490 }
3491 SKIP_BLANKS;
3492
Daniel Veillard76d66f42001-05-16 21:05:17 +00003493 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003494 if (name == NULL) {
3495 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3497 ctxt->sax->error(ctxt->userData,
3498 "NOTATION: Name expected here\n");
3499 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003500 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003501 return;
3502 }
3503 if (!IS_BLANK(CUR)) {
3504 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3506 ctxt->sax->error(ctxt->userData,
3507 "Space required after the NOTATION name'\n");
3508 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003509 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003510 return;
3511 }
3512 SKIP_BLANKS;
3513
3514 /*
3515 * Parse the IDs.
3516 */
3517 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3518 SKIP_BLANKS;
3519
3520 if (RAW == '>') {
3521 if (input != ctxt->input) {
3522 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3524 ctxt->sax->error(ctxt->userData,
3525"Notation declaration doesn't start and stop in the same entity\n");
3526 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003527 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003528 }
3529 NEXT;
3530 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3531 (ctxt->sax->notationDecl != NULL))
3532 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3533 } else {
3534 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3536 ctxt->sax->error(ctxt->userData,
3537 "'>' required to close NOTATION declaration\n");
3538 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003539 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003540 }
3541 xmlFree(name);
3542 if (Systemid != NULL) xmlFree(Systemid);
3543 if (Pubid != NULL) xmlFree(Pubid);
3544 }
3545}
3546
3547/**
3548 * xmlParseEntityDecl:
3549 * @ctxt: an XML parser context
3550 *
3551 * parse <!ENTITY declarations
3552 *
3553 * [70] EntityDecl ::= GEDecl | PEDecl
3554 *
3555 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3556 *
3557 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3558 *
3559 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3560 *
3561 * [74] PEDef ::= EntityValue | ExternalID
3562 *
3563 * [76] NDataDecl ::= S 'NDATA' S Name
3564 *
3565 * [ VC: Notation Declared ]
3566 * The Name must match the declared name of a notation.
3567 */
3568
3569void
3570xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3571 xmlChar *name = NULL;
3572 xmlChar *value = NULL;
3573 xmlChar *URI = NULL, *literal = NULL;
3574 xmlChar *ndata = NULL;
3575 int isParameter = 0;
3576 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003577 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003578
3579 GROW;
3580 if ((RAW == '<') && (NXT(1) == '!') &&
3581 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3582 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3583 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3584 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003585 SHRINK;
3586 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003587 skipped = SKIP_BLANKS;
3588 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003589 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3591 ctxt->sax->error(ctxt->userData,
3592 "Space required after '<!ENTITY'\n");
3593 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003594 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003595 }
Owen Taylor3473f882001-02-23 17:55:21 +00003596
3597 if (RAW == '%') {
3598 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003599 skipped = SKIP_BLANKS;
3600 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003601 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3603 ctxt->sax->error(ctxt->userData,
3604 "Space required after '%'\n");
3605 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003606 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003607 }
Owen Taylor3473f882001-02-23 17:55:21 +00003608 isParameter = 1;
3609 }
3610
Daniel Veillard76d66f42001-05-16 21:05:17 +00003611 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003612 if (name == NULL) {
3613 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3615 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3616 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003617 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003618 return;
3619 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003620 skipped = SKIP_BLANKS;
3621 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003622 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3624 ctxt->sax->error(ctxt->userData,
3625 "Space required after the entity name\n");
3626 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003627 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003628 }
Owen Taylor3473f882001-02-23 17:55:21 +00003629
Daniel Veillardf5582f12002-06-11 10:08:16 +00003630 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003631 /*
3632 * handle the various case of definitions...
3633 */
3634 if (isParameter) {
3635 if ((RAW == '"') || (RAW == '\'')) {
3636 value = xmlParseEntityValue(ctxt, &orig);
3637 if (value) {
3638 if ((ctxt->sax != NULL) &&
3639 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3640 ctxt->sax->entityDecl(ctxt->userData, name,
3641 XML_INTERNAL_PARAMETER_ENTITY,
3642 NULL, NULL, value);
3643 }
3644 } else {
3645 URI = xmlParseExternalID(ctxt, &literal, 1);
3646 if ((URI == NULL) && (literal == NULL)) {
3647 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3649 ctxt->sax->error(ctxt->userData,
3650 "Entity value required\n");
3651 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003652 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003653 }
3654 if (URI) {
3655 xmlURIPtr uri;
3656
3657 uri = xmlParseURI((const char *) URI);
3658 if (uri == NULL) {
3659 ctxt->errNo = XML_ERR_INVALID_URI;
3660 if ((ctxt->sax != NULL) &&
3661 (!ctxt->disableSAX) &&
3662 (ctxt->sax->error != NULL))
3663 ctxt->sax->error(ctxt->userData,
3664 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003665 /*
3666 * This really ought to be a well formedness error
3667 * but the XML Core WG decided otherwise c.f. issue
3668 * E26 of the XML erratas.
3669 */
Owen Taylor3473f882001-02-23 17:55:21 +00003670 } else {
3671 if (uri->fragment != NULL) {
3672 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3673 if ((ctxt->sax != NULL) &&
3674 (!ctxt->disableSAX) &&
3675 (ctxt->sax->error != NULL))
3676 ctxt->sax->error(ctxt->userData,
3677 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003678 /*
3679 * Okay this is foolish to block those but not
3680 * invalid URIs.
3681 */
Owen Taylor3473f882001-02-23 17:55:21 +00003682 ctxt->wellFormed = 0;
3683 } else {
3684 if ((ctxt->sax != NULL) &&
3685 (!ctxt->disableSAX) &&
3686 (ctxt->sax->entityDecl != NULL))
3687 ctxt->sax->entityDecl(ctxt->userData, name,
3688 XML_EXTERNAL_PARAMETER_ENTITY,
3689 literal, URI, NULL);
3690 }
3691 xmlFreeURI(uri);
3692 }
3693 }
3694 }
3695 } else {
3696 if ((RAW == '"') || (RAW == '\'')) {
3697 value = xmlParseEntityValue(ctxt, &orig);
3698 if ((ctxt->sax != NULL) &&
3699 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3700 ctxt->sax->entityDecl(ctxt->userData, name,
3701 XML_INTERNAL_GENERAL_ENTITY,
3702 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003703 /*
3704 * For expat compatibility in SAX mode.
3705 */
3706 if ((ctxt->myDoc == NULL) ||
3707 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3708 if (ctxt->myDoc == NULL) {
3709 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3710 }
3711 if (ctxt->myDoc->intSubset == NULL)
3712 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3713 BAD_CAST "fake", NULL, NULL);
3714
3715 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3716 NULL, NULL, value);
3717 }
Owen Taylor3473f882001-02-23 17:55:21 +00003718 } else {
3719 URI = xmlParseExternalID(ctxt, &literal, 1);
3720 if ((URI == NULL) && (literal == NULL)) {
3721 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3723 ctxt->sax->error(ctxt->userData,
3724 "Entity value required\n");
3725 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003726 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003727 }
3728 if (URI) {
3729 xmlURIPtr uri;
3730
3731 uri = xmlParseURI((const char *)URI);
3732 if (uri == NULL) {
3733 ctxt->errNo = XML_ERR_INVALID_URI;
3734 if ((ctxt->sax != NULL) &&
3735 (!ctxt->disableSAX) &&
3736 (ctxt->sax->error != NULL))
3737 ctxt->sax->error(ctxt->userData,
3738 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003739 /*
3740 * This really ought to be a well formedness error
3741 * but the XML Core WG decided otherwise c.f. issue
3742 * E26 of the XML erratas.
3743 */
Owen Taylor3473f882001-02-23 17:55:21 +00003744 } else {
3745 if (uri->fragment != NULL) {
3746 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3747 if ((ctxt->sax != NULL) &&
3748 (!ctxt->disableSAX) &&
3749 (ctxt->sax->error != NULL))
3750 ctxt->sax->error(ctxt->userData,
3751 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003752 /*
3753 * Okay this is foolish to block those but not
3754 * invalid URIs.
3755 */
Owen Taylor3473f882001-02-23 17:55:21 +00003756 ctxt->wellFormed = 0;
3757 }
3758 xmlFreeURI(uri);
3759 }
3760 }
3761 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3762 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3764 ctxt->sax->error(ctxt->userData,
3765 "Space required before 'NDATA'\n");
3766 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003767 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003768 }
3769 SKIP_BLANKS;
3770 if ((RAW == 'N') && (NXT(1) == 'D') &&
3771 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3772 (NXT(4) == 'A')) {
3773 SKIP(5);
3774 if (!IS_BLANK(CUR)) {
3775 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3777 ctxt->sax->error(ctxt->userData,
3778 "Space required after 'NDATA'\n");
3779 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003780 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003781 }
3782 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003783 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003784 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3785 (ctxt->sax->unparsedEntityDecl != NULL))
3786 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3787 literal, URI, ndata);
3788 } else {
3789 if ((ctxt->sax != NULL) &&
3790 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3791 ctxt->sax->entityDecl(ctxt->userData, name,
3792 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3793 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003794 /*
3795 * For expat compatibility in SAX mode.
3796 * assuming the entity repalcement was asked for
3797 */
3798 if ((ctxt->replaceEntities != 0) &&
3799 ((ctxt->myDoc == NULL) ||
3800 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3801 if (ctxt->myDoc == NULL) {
3802 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3803 }
3804
3805 if (ctxt->myDoc->intSubset == NULL)
3806 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3807 BAD_CAST "fake", NULL, NULL);
3808 entityDecl(ctxt, name,
3809 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3810 literal, URI, NULL);
3811 }
Owen Taylor3473f882001-02-23 17:55:21 +00003812 }
3813 }
3814 }
3815 SKIP_BLANKS;
3816 if (RAW != '>') {
3817 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3819 ctxt->sax->error(ctxt->userData,
3820 "xmlParseEntityDecl: entity %s not terminated\n", name);
3821 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003822 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003823 } else {
3824 if (input != ctxt->input) {
3825 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3827 ctxt->sax->error(ctxt->userData,
3828"Entity declaration doesn't start and stop in the same entity\n");
3829 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003830 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003831 }
3832 NEXT;
3833 }
3834 if (orig != NULL) {
3835 /*
3836 * Ugly mechanism to save the raw entity value.
3837 */
3838 xmlEntityPtr cur = NULL;
3839
3840 if (isParameter) {
3841 if ((ctxt->sax != NULL) &&
3842 (ctxt->sax->getParameterEntity != NULL))
3843 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3844 } else {
3845 if ((ctxt->sax != NULL) &&
3846 (ctxt->sax->getEntity != NULL))
3847 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003848 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3849 cur = getEntity(ctxt, name);
3850 }
Owen Taylor3473f882001-02-23 17:55:21 +00003851 }
3852 if (cur != NULL) {
3853 if (cur->orig != NULL)
3854 xmlFree(orig);
3855 else
3856 cur->orig = orig;
3857 } else
3858 xmlFree(orig);
3859 }
3860 if (name != NULL) xmlFree(name);
3861 if (value != NULL) xmlFree(value);
3862 if (URI != NULL) xmlFree(URI);
3863 if (literal != NULL) xmlFree(literal);
3864 if (ndata != NULL) xmlFree(ndata);
3865 }
3866}
3867
3868/**
3869 * xmlParseDefaultDecl:
3870 * @ctxt: an XML parser context
3871 * @value: Receive a possible fixed default value for the attribute
3872 *
3873 * Parse an attribute default declaration
3874 *
3875 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3876 *
3877 * [ VC: Required Attribute ]
3878 * if the default declaration is the keyword #REQUIRED, then the
3879 * attribute must be specified for all elements of the type in the
3880 * attribute-list declaration.
3881 *
3882 * [ VC: Attribute Default Legal ]
3883 * The declared default value must meet the lexical constraints of
3884 * the declared attribute type c.f. xmlValidateAttributeDecl()
3885 *
3886 * [ VC: Fixed Attribute Default ]
3887 * if an attribute has a default value declared with the #FIXED
3888 * keyword, instances of that attribute must match the default value.
3889 *
3890 * [ WFC: No < in Attribute Values ]
3891 * handled in xmlParseAttValue()
3892 *
3893 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3894 * or XML_ATTRIBUTE_FIXED.
3895 */
3896
3897int
3898xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3899 int val;
3900 xmlChar *ret;
3901
3902 *value = NULL;
3903 if ((RAW == '#') && (NXT(1) == 'R') &&
3904 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3905 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3906 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3907 (NXT(8) == 'D')) {
3908 SKIP(9);
3909 return(XML_ATTRIBUTE_REQUIRED);
3910 }
3911 if ((RAW == '#') && (NXT(1) == 'I') &&
3912 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3913 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3914 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3915 SKIP(8);
3916 return(XML_ATTRIBUTE_IMPLIED);
3917 }
3918 val = XML_ATTRIBUTE_NONE;
3919 if ((RAW == '#') && (NXT(1) == 'F') &&
3920 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3921 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3922 SKIP(6);
3923 val = XML_ATTRIBUTE_FIXED;
3924 if (!IS_BLANK(CUR)) {
3925 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3927 ctxt->sax->error(ctxt->userData,
3928 "Space required after '#FIXED'\n");
3929 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003930 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003931 }
3932 SKIP_BLANKS;
3933 }
3934 ret = xmlParseAttValue(ctxt);
3935 ctxt->instate = XML_PARSER_DTD;
3936 if (ret == NULL) {
3937 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3938 ctxt->sax->error(ctxt->userData,
3939 "Attribute default value declaration error\n");
3940 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003941 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003942 } else
3943 *value = ret;
3944 return(val);
3945}
3946
3947/**
3948 * xmlParseNotationType:
3949 * @ctxt: an XML parser context
3950 *
3951 * parse an Notation attribute type.
3952 *
3953 * Note: the leading 'NOTATION' S part has already being parsed...
3954 *
3955 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3956 *
3957 * [ VC: Notation Attributes ]
3958 * Values of this type must match one of the notation names included
3959 * in the declaration; all notation names in the declaration must be declared.
3960 *
3961 * Returns: the notation attribute tree built while parsing
3962 */
3963
3964xmlEnumerationPtr
3965xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3966 xmlChar *name;
3967 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3968
3969 if (RAW != '(') {
3970 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3972 ctxt->sax->error(ctxt->userData,
3973 "'(' required to start 'NOTATION'\n");
3974 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003975 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003976 return(NULL);
3977 }
3978 SHRINK;
3979 do {
3980 NEXT;
3981 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003982 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003983 if (name == NULL) {
3984 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3986 ctxt->sax->error(ctxt->userData,
3987 "Name expected in NOTATION declaration\n");
3988 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003989 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003990 return(ret);
3991 }
3992 cur = xmlCreateEnumeration(name);
3993 xmlFree(name);
3994 if (cur == NULL) return(ret);
3995 if (last == NULL) ret = last = cur;
3996 else {
3997 last->next = cur;
3998 last = cur;
3999 }
4000 SKIP_BLANKS;
4001 } while (RAW == '|');
4002 if (RAW != ')') {
4003 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
4004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4005 ctxt->sax->error(ctxt->userData,
4006 "')' required to finish NOTATION declaration\n");
4007 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004008 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004009 if ((last != NULL) && (last != ret))
4010 xmlFreeEnumeration(last);
4011 return(ret);
4012 }
4013 NEXT;
4014 return(ret);
4015}
4016
4017/**
4018 * xmlParseEnumerationType:
4019 * @ctxt: an XML parser context
4020 *
4021 * parse an Enumeration attribute type.
4022 *
4023 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4024 *
4025 * [ VC: Enumeration ]
4026 * Values of this type must match one of the Nmtoken tokens in
4027 * the declaration
4028 *
4029 * Returns: the enumeration attribute tree built while parsing
4030 */
4031
4032xmlEnumerationPtr
4033xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4034 xmlChar *name;
4035 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4036
4037 if (RAW != '(') {
4038 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4040 ctxt->sax->error(ctxt->userData,
4041 "'(' required to start ATTLIST enumeration\n");
4042 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004044 return(NULL);
4045 }
4046 SHRINK;
4047 do {
4048 NEXT;
4049 SKIP_BLANKS;
4050 name = xmlParseNmtoken(ctxt);
4051 if (name == NULL) {
4052 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4054 ctxt->sax->error(ctxt->userData,
4055 "NmToken expected in ATTLIST enumeration\n");
4056 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004058 return(ret);
4059 }
4060 cur = xmlCreateEnumeration(name);
4061 xmlFree(name);
4062 if (cur == NULL) return(ret);
4063 if (last == NULL) ret = last = cur;
4064 else {
4065 last->next = cur;
4066 last = cur;
4067 }
4068 SKIP_BLANKS;
4069 } while (RAW == '|');
4070 if (RAW != ')') {
4071 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4073 ctxt->sax->error(ctxt->userData,
4074 "')' required to finish ATTLIST enumeration\n");
4075 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004076 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004077 return(ret);
4078 }
4079 NEXT;
4080 return(ret);
4081}
4082
4083/**
4084 * xmlParseEnumeratedType:
4085 * @ctxt: an XML parser context
4086 * @tree: the enumeration tree built while parsing
4087 *
4088 * parse an Enumerated attribute type.
4089 *
4090 * [57] EnumeratedType ::= NotationType | Enumeration
4091 *
4092 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4093 *
4094 *
4095 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4096 */
4097
4098int
4099xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4100 if ((RAW == 'N') && (NXT(1) == 'O') &&
4101 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4102 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4103 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4104 SKIP(8);
4105 if (!IS_BLANK(CUR)) {
4106 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4108 ctxt->sax->error(ctxt->userData,
4109 "Space required after 'NOTATION'\n");
4110 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004111 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004112 return(0);
4113 }
4114 SKIP_BLANKS;
4115 *tree = xmlParseNotationType(ctxt);
4116 if (*tree == NULL) return(0);
4117 return(XML_ATTRIBUTE_NOTATION);
4118 }
4119 *tree = xmlParseEnumerationType(ctxt);
4120 if (*tree == NULL) return(0);
4121 return(XML_ATTRIBUTE_ENUMERATION);
4122}
4123
4124/**
4125 * xmlParseAttributeType:
4126 * @ctxt: an XML parser context
4127 * @tree: the enumeration tree built while parsing
4128 *
4129 * parse the Attribute list def for an element
4130 *
4131 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4132 *
4133 * [55] StringType ::= 'CDATA'
4134 *
4135 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4136 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4137 *
4138 * Validity constraints for attribute values syntax are checked in
4139 * xmlValidateAttributeValue()
4140 *
4141 * [ VC: ID ]
4142 * Values of type ID must match the Name production. A name must not
4143 * appear more than once in an XML document as a value of this type;
4144 * i.e., ID values must uniquely identify the elements which bear them.
4145 *
4146 * [ VC: One ID per Element Type ]
4147 * No element type may have more than one ID attribute specified.
4148 *
4149 * [ VC: ID Attribute Default ]
4150 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4151 *
4152 * [ VC: IDREF ]
4153 * Values of type IDREF must match the Name production, and values
4154 * of type IDREFS must match Names; each IDREF Name must match the value
4155 * of an ID attribute on some element in the XML document; i.e. IDREF
4156 * values must match the value of some ID attribute.
4157 *
4158 * [ VC: Entity Name ]
4159 * Values of type ENTITY must match the Name production, values
4160 * of type ENTITIES must match Names; each Entity Name must match the
4161 * name of an unparsed entity declared in the DTD.
4162 *
4163 * [ VC: Name Token ]
4164 * Values of type NMTOKEN must match the Nmtoken production; values
4165 * of type NMTOKENS must match Nmtokens.
4166 *
4167 * Returns the attribute type
4168 */
4169int
4170xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4171 SHRINK;
4172 if ((RAW == 'C') && (NXT(1) == 'D') &&
4173 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4174 (NXT(4) == 'A')) {
4175 SKIP(5);
4176 return(XML_ATTRIBUTE_CDATA);
4177 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4178 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4179 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4180 SKIP(6);
4181 return(XML_ATTRIBUTE_IDREFS);
4182 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4183 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4184 (NXT(4) == 'F')) {
4185 SKIP(5);
4186 return(XML_ATTRIBUTE_IDREF);
4187 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4188 SKIP(2);
4189 return(XML_ATTRIBUTE_ID);
4190 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4191 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4192 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4193 SKIP(6);
4194 return(XML_ATTRIBUTE_ENTITY);
4195 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4196 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4197 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4198 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4199 SKIP(8);
4200 return(XML_ATTRIBUTE_ENTITIES);
4201 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4202 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4203 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4204 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4205 SKIP(8);
4206 return(XML_ATTRIBUTE_NMTOKENS);
4207 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4208 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4209 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4210 (NXT(6) == 'N')) {
4211 SKIP(7);
4212 return(XML_ATTRIBUTE_NMTOKEN);
4213 }
4214 return(xmlParseEnumeratedType(ctxt, tree));
4215}
4216
4217/**
4218 * xmlParseAttributeListDecl:
4219 * @ctxt: an XML parser context
4220 *
4221 * : parse the Attribute list def for an element
4222 *
4223 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4224 *
4225 * [53] AttDef ::= S Name S AttType S DefaultDecl
4226 *
4227 */
4228void
4229xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4230 xmlChar *elemName;
4231 xmlChar *attrName;
4232 xmlEnumerationPtr tree;
4233
4234 if ((RAW == '<') && (NXT(1) == '!') &&
4235 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4236 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4237 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4238 (NXT(8) == 'T')) {
4239 xmlParserInputPtr input = ctxt->input;
4240
4241 SKIP(9);
4242 if (!IS_BLANK(CUR)) {
4243 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4245 ctxt->sax->error(ctxt->userData,
4246 "Space required after '<!ATTLIST'\n");
4247 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004248 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004249 }
4250 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004251 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004252 if (elemName == NULL) {
4253 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4254 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4255 ctxt->sax->error(ctxt->userData,
4256 "ATTLIST: no name for Element\n");
4257 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004258 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004259 return;
4260 }
4261 SKIP_BLANKS;
4262 GROW;
4263 while (RAW != '>') {
4264 const xmlChar *check = CUR_PTR;
4265 int type;
4266 int def;
4267 xmlChar *defaultValue = NULL;
4268
4269 GROW;
4270 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004271 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004272 if (attrName == NULL) {
4273 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4275 ctxt->sax->error(ctxt->userData,
4276 "ATTLIST: no name for Attribute\n");
4277 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004278 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004279 break;
4280 }
4281 GROW;
4282 if (!IS_BLANK(CUR)) {
4283 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4285 ctxt->sax->error(ctxt->userData,
4286 "Space required after the attribute name\n");
4287 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004288 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004289 if (attrName != NULL)
4290 xmlFree(attrName);
4291 if (defaultValue != NULL)
4292 xmlFree(defaultValue);
4293 break;
4294 }
4295 SKIP_BLANKS;
4296
4297 type = xmlParseAttributeType(ctxt, &tree);
4298 if (type <= 0) {
4299 if (attrName != NULL)
4300 xmlFree(attrName);
4301 if (defaultValue != NULL)
4302 xmlFree(defaultValue);
4303 break;
4304 }
4305
4306 GROW;
4307 if (!IS_BLANK(CUR)) {
4308 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4310 ctxt->sax->error(ctxt->userData,
4311 "Space required after the attribute type\n");
4312 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004313 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004314 if (attrName != NULL)
4315 xmlFree(attrName);
4316 if (defaultValue != NULL)
4317 xmlFree(defaultValue);
4318 if (tree != NULL)
4319 xmlFreeEnumeration(tree);
4320 break;
4321 }
4322 SKIP_BLANKS;
4323
4324 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4325 if (def <= 0) {
4326 if (attrName != NULL)
4327 xmlFree(attrName);
4328 if (defaultValue != NULL)
4329 xmlFree(defaultValue);
4330 if (tree != NULL)
4331 xmlFreeEnumeration(tree);
4332 break;
4333 }
4334
4335 GROW;
4336 if (RAW != '>') {
4337 if (!IS_BLANK(CUR)) {
4338 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4340 ctxt->sax->error(ctxt->userData,
4341 "Space required after the attribute default value\n");
4342 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004343 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004344 if (attrName != NULL)
4345 xmlFree(attrName);
4346 if (defaultValue != NULL)
4347 xmlFree(defaultValue);
4348 if (tree != NULL)
4349 xmlFreeEnumeration(tree);
4350 break;
4351 }
4352 SKIP_BLANKS;
4353 }
4354 if (check == CUR_PTR) {
4355 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4357 ctxt->sax->error(ctxt->userData,
4358 "xmlParseAttributeListDecl: detected internal error\n");
4359 if (attrName != NULL)
4360 xmlFree(attrName);
4361 if (defaultValue != NULL)
4362 xmlFree(defaultValue);
4363 if (tree != NULL)
4364 xmlFreeEnumeration(tree);
4365 break;
4366 }
4367 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4368 (ctxt->sax->attributeDecl != NULL))
4369 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4370 type, def, defaultValue, tree);
4371 if (attrName != NULL)
4372 xmlFree(attrName);
4373 if (defaultValue != NULL)
4374 xmlFree(defaultValue);
4375 GROW;
4376 }
4377 if (RAW == '>') {
4378 if (input != ctxt->input) {
4379 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4381 ctxt->sax->error(ctxt->userData,
4382"Attribute list declaration doesn't start and stop in the same entity\n");
4383 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004384 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004385 }
4386 NEXT;
4387 }
4388
4389 xmlFree(elemName);
4390 }
4391}
4392
4393/**
4394 * xmlParseElementMixedContentDecl:
4395 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004396 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004397 *
4398 * parse the declaration for a Mixed Element content
4399 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4400 *
4401 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4402 * '(' S? '#PCDATA' S? ')'
4403 *
4404 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4405 *
4406 * [ VC: No Duplicate Types ]
4407 * The same name must not appear more than once in a single
4408 * mixed-content declaration.
4409 *
4410 * returns: the list of the xmlElementContentPtr describing the element choices
4411 */
4412xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004413xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004414 xmlElementContentPtr ret = NULL, cur = NULL, n;
4415 xmlChar *elem = NULL;
4416
4417 GROW;
4418 if ((RAW == '#') && (NXT(1) == 'P') &&
4419 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4420 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4421 (NXT(6) == 'A')) {
4422 SKIP(7);
4423 SKIP_BLANKS;
4424 SHRINK;
4425 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004426 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4427 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4428 if (ctxt->vctxt.error != NULL)
4429 ctxt->vctxt.error(ctxt->vctxt.userData,
4430"Element content declaration doesn't start and stop in the same entity\n");
4431 ctxt->valid = 0;
4432 }
Owen Taylor3473f882001-02-23 17:55:21 +00004433 NEXT;
4434 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4435 if (RAW == '*') {
4436 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4437 NEXT;
4438 }
4439 return(ret);
4440 }
4441 if ((RAW == '(') || (RAW == '|')) {
4442 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4443 if (ret == NULL) return(NULL);
4444 }
4445 while (RAW == '|') {
4446 NEXT;
4447 if (elem == NULL) {
4448 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4449 if (ret == NULL) return(NULL);
4450 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004451 if (cur != NULL)
4452 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004453 cur = ret;
4454 } else {
4455 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4456 if (n == NULL) return(NULL);
4457 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004458 if (n->c1 != NULL)
4459 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004460 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004461 if (n != NULL)
4462 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004463 cur = n;
4464 xmlFree(elem);
4465 }
4466 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004467 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004468 if (elem == NULL) {
4469 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4471 ctxt->sax->error(ctxt->userData,
4472 "xmlParseElementMixedContentDecl : Name expected\n");
4473 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004474 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004475 xmlFreeElementContent(cur);
4476 return(NULL);
4477 }
4478 SKIP_BLANKS;
4479 GROW;
4480 }
4481 if ((RAW == ')') && (NXT(1) == '*')) {
4482 if (elem != NULL) {
4483 cur->c2 = xmlNewElementContent(elem,
4484 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004485 if (cur->c2 != NULL)
4486 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004487 xmlFree(elem);
4488 }
4489 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004490 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4491 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4492 if (ctxt->vctxt.error != NULL)
4493 ctxt->vctxt.error(ctxt->vctxt.userData,
4494"Element content declaration doesn't start and stop in the same entity\n");
4495 ctxt->valid = 0;
4496 }
Owen Taylor3473f882001-02-23 17:55:21 +00004497 SKIP(2);
4498 } else {
4499 if (elem != NULL) xmlFree(elem);
4500 xmlFreeElementContent(ret);
4501 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4503 ctxt->sax->error(ctxt->userData,
4504 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4505 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004506 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004507 return(NULL);
4508 }
4509
4510 } else {
4511 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4513 ctxt->sax->error(ctxt->userData,
4514 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4515 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004516 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004517 }
4518 return(ret);
4519}
4520
4521/**
4522 * xmlParseElementChildrenContentDecl:
4523 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004524 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004525 *
4526 * parse the declaration for a Mixed Element content
4527 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4528 *
4529 *
4530 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4531 *
4532 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4533 *
4534 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4535 *
4536 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4537 *
4538 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4539 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004540 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004541 * opening or closing parentheses in a choice, seq, or Mixed
4542 * construct is contained in the replacement text for a parameter
4543 * entity, both must be contained in the same replacement text. For
4544 * interoperability, if a parameter-entity reference appears in a
4545 * choice, seq, or Mixed construct, its replacement text should not
4546 * be empty, and neither the first nor last non-blank character of
4547 * the replacement text should be a connector (| or ,).
4548 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004549 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004550 * hierarchy.
4551 */
4552xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004553xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004554(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004555 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4556 xmlChar *elem;
4557 xmlChar type = 0;
4558
4559 SKIP_BLANKS;
4560 GROW;
4561 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004562 xmlParserInputPtr input = ctxt->input;
4563
Owen Taylor3473f882001-02-23 17:55:21 +00004564 /* Recurse on first child */
4565 NEXT;
4566 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004567 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004568 SKIP_BLANKS;
4569 GROW;
4570 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004571 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004572 if (elem == NULL) {
4573 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4575 ctxt->sax->error(ctxt->userData,
4576 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4577 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004578 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004579 return(NULL);
4580 }
4581 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4582 GROW;
4583 if (RAW == '?') {
4584 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4585 NEXT;
4586 } else if (RAW == '*') {
4587 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4588 NEXT;
4589 } else if (RAW == '+') {
4590 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4591 NEXT;
4592 } else {
4593 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4594 }
4595 xmlFree(elem);
4596 GROW;
4597 }
4598 SKIP_BLANKS;
4599 SHRINK;
4600 while (RAW != ')') {
4601 /*
4602 * Each loop we parse one separator and one element.
4603 */
4604 if (RAW == ',') {
4605 if (type == 0) type = CUR;
4606
4607 /*
4608 * Detect "Name | Name , Name" error
4609 */
4610 else if (type != CUR) {
4611 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4613 ctxt->sax->error(ctxt->userData,
4614 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4615 type);
4616 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004617 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004618 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004619 xmlFreeElementContent(last);
4620 if (ret != NULL)
4621 xmlFreeElementContent(ret);
4622 return(NULL);
4623 }
4624 NEXT;
4625
4626 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4627 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004628 if ((last != NULL) && (last != ret))
4629 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004630 xmlFreeElementContent(ret);
4631 return(NULL);
4632 }
4633 if (last == NULL) {
4634 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004635 if (ret != NULL)
4636 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004637 ret = cur = op;
4638 } else {
4639 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004640 if (op != NULL)
4641 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004642 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004643 if (last != NULL)
4644 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004645 cur =op;
4646 last = NULL;
4647 }
4648 } else if (RAW == '|') {
4649 if (type == 0) type = CUR;
4650
4651 /*
4652 * Detect "Name , Name | Name" error
4653 */
4654 else if (type != CUR) {
4655 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4657 ctxt->sax->error(ctxt->userData,
4658 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4659 type);
4660 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004661 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004662 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004663 xmlFreeElementContent(last);
4664 if (ret != NULL)
4665 xmlFreeElementContent(ret);
4666 return(NULL);
4667 }
4668 NEXT;
4669
4670 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4671 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004672 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004673 xmlFreeElementContent(last);
4674 if (ret != NULL)
4675 xmlFreeElementContent(ret);
4676 return(NULL);
4677 }
4678 if (last == NULL) {
4679 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004680 if (ret != NULL)
4681 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004682 ret = cur = op;
4683 } else {
4684 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004685 if (op != NULL)
4686 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004687 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004688 if (last != NULL)
4689 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004690 cur =op;
4691 last = NULL;
4692 }
4693 } else {
4694 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4696 ctxt->sax->error(ctxt->userData,
4697 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4698 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004699 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004700 if (ret != NULL)
4701 xmlFreeElementContent(ret);
4702 return(NULL);
4703 }
4704 GROW;
4705 SKIP_BLANKS;
4706 GROW;
4707 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004708 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004709 /* Recurse on second child */
4710 NEXT;
4711 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004712 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004713 SKIP_BLANKS;
4714 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004715 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004716 if (elem == NULL) {
4717 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4719 ctxt->sax->error(ctxt->userData,
4720 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4721 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004722 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004723 if (ret != NULL)
4724 xmlFreeElementContent(ret);
4725 return(NULL);
4726 }
4727 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4728 xmlFree(elem);
4729 if (RAW == '?') {
4730 last->ocur = XML_ELEMENT_CONTENT_OPT;
4731 NEXT;
4732 } else if (RAW == '*') {
4733 last->ocur = XML_ELEMENT_CONTENT_MULT;
4734 NEXT;
4735 } else if (RAW == '+') {
4736 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4737 NEXT;
4738 } else {
4739 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4740 }
4741 }
4742 SKIP_BLANKS;
4743 GROW;
4744 }
4745 if ((cur != NULL) && (last != NULL)) {
4746 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004747 if (last != NULL)
4748 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004749 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004750 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4751 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4752 if (ctxt->vctxt.error != NULL)
4753 ctxt->vctxt.error(ctxt->vctxt.userData,
4754"Element content declaration doesn't start and stop in the same entity\n");
4755 ctxt->valid = 0;
4756 }
Owen Taylor3473f882001-02-23 17:55:21 +00004757 NEXT;
4758 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004759 if (ret != NULL)
4760 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004761 NEXT;
4762 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004763 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004764 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004765 cur = ret;
4766 /*
4767 * Some normalization:
4768 * (a | b* | c?)* == (a | b | c)*
4769 */
4770 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4771 if ((cur->c1 != NULL) &&
4772 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4773 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4774 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4775 if ((cur->c2 != NULL) &&
4776 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4777 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4778 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4779 cur = cur->c2;
4780 }
4781 }
Owen Taylor3473f882001-02-23 17:55:21 +00004782 NEXT;
4783 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004784 if (ret != NULL) {
4785 int found = 0;
4786
Daniel Veillarde470df72001-04-18 21:41:07 +00004787 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004788 /*
4789 * Some normalization:
4790 * (a | b*)+ == (a | b)*
4791 * (a | b?)+ == (a | b)*
4792 */
4793 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4794 if ((cur->c1 != NULL) &&
4795 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4796 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4797 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4798 found = 1;
4799 }
4800 if ((cur->c2 != NULL) &&
4801 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4802 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4803 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4804 found = 1;
4805 }
4806 cur = cur->c2;
4807 }
4808 if (found)
4809 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4810 }
Owen Taylor3473f882001-02-23 17:55:21 +00004811 NEXT;
4812 }
4813 return(ret);
4814}
4815
4816/**
4817 * xmlParseElementContentDecl:
4818 * @ctxt: an XML parser context
4819 * @name: the name of the element being defined.
4820 * @result: the Element Content pointer will be stored here if any
4821 *
4822 * parse the declaration for an Element content either Mixed or Children,
4823 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4824 *
4825 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4826 *
4827 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4828 */
4829
4830int
4831xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4832 xmlElementContentPtr *result) {
4833
4834 xmlElementContentPtr tree = NULL;
4835 xmlParserInputPtr input = ctxt->input;
4836 int res;
4837
4838 *result = NULL;
4839
4840 if (RAW != '(') {
4841 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4843 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004844 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004845 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004846 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004847 return(-1);
4848 }
4849 NEXT;
4850 GROW;
4851 SKIP_BLANKS;
4852 if ((RAW == '#') && (NXT(1) == 'P') &&
4853 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4854 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4855 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004856 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004857 res = XML_ELEMENT_TYPE_MIXED;
4858 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004859 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004860 res = XML_ELEMENT_TYPE_ELEMENT;
4861 }
Owen Taylor3473f882001-02-23 17:55:21 +00004862 SKIP_BLANKS;
4863 *result = tree;
4864 return(res);
4865}
4866
4867/**
4868 * xmlParseElementDecl:
4869 * @ctxt: an XML parser context
4870 *
4871 * parse an Element declaration.
4872 *
4873 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4874 *
4875 * [ VC: Unique Element Type Declaration ]
4876 * No element type may be declared more than once
4877 *
4878 * Returns the type of the element, or -1 in case of error
4879 */
4880int
4881xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4882 xmlChar *name;
4883 int ret = -1;
4884 xmlElementContentPtr content = NULL;
4885
4886 GROW;
4887 if ((RAW == '<') && (NXT(1) == '!') &&
4888 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4889 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4890 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4891 (NXT(8) == 'T')) {
4892 xmlParserInputPtr input = ctxt->input;
4893
4894 SKIP(9);
4895 if (!IS_BLANK(CUR)) {
4896 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4898 ctxt->sax->error(ctxt->userData,
4899 "Space required after 'ELEMENT'\n");
4900 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004901 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004902 }
4903 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004904 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004905 if (name == NULL) {
4906 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4908 ctxt->sax->error(ctxt->userData,
4909 "xmlParseElementDecl: no name for Element\n");
4910 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004911 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004912 return(-1);
4913 }
4914 while ((RAW == 0) && (ctxt->inputNr > 1))
4915 xmlPopInput(ctxt);
4916 if (!IS_BLANK(CUR)) {
4917 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4919 ctxt->sax->error(ctxt->userData,
4920 "Space required after the element name\n");
4921 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004922 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004923 }
4924 SKIP_BLANKS;
4925 if ((RAW == 'E') && (NXT(1) == 'M') &&
4926 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4927 (NXT(4) == 'Y')) {
4928 SKIP(5);
4929 /*
4930 * Element must always be empty.
4931 */
4932 ret = XML_ELEMENT_TYPE_EMPTY;
4933 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4934 (NXT(2) == 'Y')) {
4935 SKIP(3);
4936 /*
4937 * Element is a generic container.
4938 */
4939 ret = XML_ELEMENT_TYPE_ANY;
4940 } else if (RAW == '(') {
4941 ret = xmlParseElementContentDecl(ctxt, name, &content);
4942 } else {
4943 /*
4944 * [ WFC: PEs in Internal Subset ] error handling.
4945 */
4946 if ((RAW == '%') && (ctxt->external == 0) &&
4947 (ctxt->inputNr == 1)) {
4948 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4950 ctxt->sax->error(ctxt->userData,
4951 "PEReference: forbidden within markup decl in internal subset\n");
4952 } else {
4953 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4955 ctxt->sax->error(ctxt->userData,
4956 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4957 }
4958 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004959 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004960 if (name != NULL) xmlFree(name);
4961 return(-1);
4962 }
4963
4964 SKIP_BLANKS;
4965 /*
4966 * Pop-up of finished entities.
4967 */
4968 while ((RAW == 0) && (ctxt->inputNr > 1))
4969 xmlPopInput(ctxt);
4970 SKIP_BLANKS;
4971
4972 if (RAW != '>') {
4973 ctxt->errNo = XML_ERR_GT_REQUIRED;
4974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4975 ctxt->sax->error(ctxt->userData,
4976 "xmlParseElementDecl: expected '>' at the end\n");
4977 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004978 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004979 } else {
4980 if (input != ctxt->input) {
4981 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4983 ctxt->sax->error(ctxt->userData,
4984"Element declaration doesn't start and stop in the same entity\n");
4985 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004986 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004987 }
4988
4989 NEXT;
4990 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4991 (ctxt->sax->elementDecl != NULL))
4992 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4993 content);
4994 }
4995 if (content != NULL) {
4996 xmlFreeElementContent(content);
4997 }
4998 if (name != NULL) {
4999 xmlFree(name);
5000 }
5001 }
5002 return(ret);
5003}
5004
5005/**
Owen Taylor3473f882001-02-23 17:55:21 +00005006 * xmlParseConditionalSections
5007 * @ctxt: an XML parser context
5008 *
5009 * [61] conditionalSect ::= includeSect | ignoreSect
5010 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5011 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5012 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5013 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5014 */
5015
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005016static void
Owen Taylor3473f882001-02-23 17:55:21 +00005017xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5018 SKIP(3);
5019 SKIP_BLANKS;
5020 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5021 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5022 (NXT(6) == 'E')) {
5023 SKIP(7);
5024 SKIP_BLANKS;
5025 if (RAW != '[') {
5026 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5028 ctxt->sax->error(ctxt->userData,
5029 "XML conditional section '[' expected\n");
5030 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005031 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005032 } else {
5033 NEXT;
5034 }
5035 if (xmlParserDebugEntities) {
5036 if ((ctxt->input != NULL) && (ctxt->input->filename))
5037 xmlGenericError(xmlGenericErrorContext,
5038 "%s(%d): ", ctxt->input->filename,
5039 ctxt->input->line);
5040 xmlGenericError(xmlGenericErrorContext,
5041 "Entering INCLUDE Conditional Section\n");
5042 }
5043
5044 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5045 (NXT(2) != '>'))) {
5046 const xmlChar *check = CUR_PTR;
5047 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005048
5049 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5050 xmlParseConditionalSections(ctxt);
5051 } else if (IS_BLANK(CUR)) {
5052 NEXT;
5053 } else if (RAW == '%') {
5054 xmlParsePEReference(ctxt);
5055 } else
5056 xmlParseMarkupDecl(ctxt);
5057
5058 /*
5059 * Pop-up of finished entities.
5060 */
5061 while ((RAW == 0) && (ctxt->inputNr > 1))
5062 xmlPopInput(ctxt);
5063
Daniel Veillardfdc91562002-07-01 21:52:03 +00005064 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005065 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5067 ctxt->sax->error(ctxt->userData,
5068 "Content error in the external subset\n");
5069 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005070 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005071 break;
5072 }
5073 }
5074 if (xmlParserDebugEntities) {
5075 if ((ctxt->input != NULL) && (ctxt->input->filename))
5076 xmlGenericError(xmlGenericErrorContext,
5077 "%s(%d): ", ctxt->input->filename,
5078 ctxt->input->line);
5079 xmlGenericError(xmlGenericErrorContext,
5080 "Leaving INCLUDE Conditional Section\n");
5081 }
5082
5083 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5084 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5085 int state;
5086 int instate;
5087 int depth = 0;
5088
5089 SKIP(6);
5090 SKIP_BLANKS;
5091 if (RAW != '[') {
5092 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5094 ctxt->sax->error(ctxt->userData,
5095 "XML conditional section '[' expected\n");
5096 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005097 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005098 } else {
5099 NEXT;
5100 }
5101 if (xmlParserDebugEntities) {
5102 if ((ctxt->input != NULL) && (ctxt->input->filename))
5103 xmlGenericError(xmlGenericErrorContext,
5104 "%s(%d): ", ctxt->input->filename,
5105 ctxt->input->line);
5106 xmlGenericError(xmlGenericErrorContext,
5107 "Entering IGNORE Conditional Section\n");
5108 }
5109
5110 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005111 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005112 * But disable SAX event generating DTD building in the meantime
5113 */
5114 state = ctxt->disableSAX;
5115 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005116 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005117 ctxt->instate = XML_PARSER_IGNORE;
5118
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005119 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005120 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5121 depth++;
5122 SKIP(3);
5123 continue;
5124 }
5125 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5126 if (--depth >= 0) SKIP(3);
5127 continue;
5128 }
5129 NEXT;
5130 continue;
5131 }
5132
5133 ctxt->disableSAX = state;
5134 ctxt->instate = instate;
5135
5136 if (xmlParserDebugEntities) {
5137 if ((ctxt->input != NULL) && (ctxt->input->filename))
5138 xmlGenericError(xmlGenericErrorContext,
5139 "%s(%d): ", ctxt->input->filename,
5140 ctxt->input->line);
5141 xmlGenericError(xmlGenericErrorContext,
5142 "Leaving IGNORE Conditional Section\n");
5143 }
5144
5145 } else {
5146 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5147 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5148 ctxt->sax->error(ctxt->userData,
5149 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5150 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005151 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005152 }
5153
5154 if (RAW == 0)
5155 SHRINK;
5156
5157 if (RAW == 0) {
5158 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5159 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5160 ctxt->sax->error(ctxt->userData,
5161 "XML conditional section not closed\n");
5162 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005163 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005164 } else {
5165 SKIP(3);
5166 }
5167}
5168
5169/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005170 * xmlParseMarkupDecl:
5171 * @ctxt: an XML parser context
5172 *
5173 * parse Markup declarations
5174 *
5175 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5176 * NotationDecl | PI | Comment
5177 *
5178 * [ VC: Proper Declaration/PE Nesting ]
5179 * Parameter-entity replacement text must be properly nested with
5180 * markup declarations. That is to say, if either the first character
5181 * or the last character of a markup declaration (markupdecl above) is
5182 * contained in the replacement text for a parameter-entity reference,
5183 * both must be contained in the same replacement text.
5184 *
5185 * [ WFC: PEs in Internal Subset ]
5186 * In the internal DTD subset, parameter-entity references can occur
5187 * only where markup declarations can occur, not within markup declarations.
5188 * (This does not apply to references that occur in external parameter
5189 * entities or to the external subset.)
5190 */
5191void
5192xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5193 GROW;
5194 xmlParseElementDecl(ctxt);
5195 xmlParseAttributeListDecl(ctxt);
5196 xmlParseEntityDecl(ctxt);
5197 xmlParseNotationDecl(ctxt);
5198 xmlParsePI(ctxt);
5199 xmlParseComment(ctxt);
5200 /*
5201 * This is only for internal subset. On external entities,
5202 * the replacement is done before parsing stage
5203 */
5204 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5205 xmlParsePEReference(ctxt);
5206
5207 /*
5208 * Conditional sections are allowed from entities included
5209 * by PE References in the internal subset.
5210 */
5211 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5212 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5213 xmlParseConditionalSections(ctxt);
5214 }
5215 }
5216
5217 ctxt->instate = XML_PARSER_DTD;
5218}
5219
5220/**
5221 * xmlParseTextDecl:
5222 * @ctxt: an XML parser context
5223 *
5224 * parse an XML declaration header for external entities
5225 *
5226 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5227 *
5228 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5229 */
5230
5231void
5232xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5233 xmlChar *version;
5234
5235 /*
5236 * We know that '<?xml' is here.
5237 */
5238 if ((RAW == '<') && (NXT(1) == '?') &&
5239 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5240 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5241 SKIP(5);
5242 } else {
5243 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5245 ctxt->sax->error(ctxt->userData,
5246 "Text declaration '<?xml' required\n");
5247 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005248 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005249
5250 return;
5251 }
5252
5253 if (!IS_BLANK(CUR)) {
5254 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5256 ctxt->sax->error(ctxt->userData,
5257 "Space needed after '<?xml'\n");
5258 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005259 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005260 }
5261 SKIP_BLANKS;
5262
5263 /*
5264 * We may have the VersionInfo here.
5265 */
5266 version = xmlParseVersionInfo(ctxt);
5267 if (version == NULL)
5268 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005269 else {
5270 if (!IS_BLANK(CUR)) {
5271 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5273 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5274 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005275 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005276 }
5277 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005278 ctxt->input->version = version;
5279
5280 /*
5281 * We must have the encoding declaration
5282 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005283 xmlParseEncodingDecl(ctxt);
5284 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5285 /*
5286 * The XML REC instructs us to stop parsing right here
5287 */
5288 return;
5289 }
5290
5291 SKIP_BLANKS;
5292 if ((RAW == '?') && (NXT(1) == '>')) {
5293 SKIP(2);
5294 } else if (RAW == '>') {
5295 /* Deprecated old WD ... */
5296 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5298 ctxt->sax->error(ctxt->userData,
5299 "XML declaration must end-up with '?>'\n");
5300 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005301 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005302 NEXT;
5303 } else {
5304 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5306 ctxt->sax->error(ctxt->userData,
5307 "parsing XML declaration: '?>' expected\n");
5308 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005309 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005310 MOVETO_ENDTAG(CUR_PTR);
5311 NEXT;
5312 }
5313}
5314
5315/**
Owen Taylor3473f882001-02-23 17:55:21 +00005316 * xmlParseExternalSubset:
5317 * @ctxt: an XML parser context
5318 * @ExternalID: the external identifier
5319 * @SystemID: the system identifier (or URL)
5320 *
5321 * parse Markup declarations from an external subset
5322 *
5323 * [30] extSubset ::= textDecl? extSubsetDecl
5324 *
5325 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5326 */
5327void
5328xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5329 const xmlChar *SystemID) {
5330 GROW;
5331 if ((RAW == '<') && (NXT(1) == '?') &&
5332 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5333 (NXT(4) == 'l')) {
5334 xmlParseTextDecl(ctxt);
5335 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5336 /*
5337 * The XML REC instructs us to stop parsing right here
5338 */
5339 ctxt->instate = XML_PARSER_EOF;
5340 return;
5341 }
5342 }
5343 if (ctxt->myDoc == NULL) {
5344 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5345 }
5346 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5347 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5348
5349 ctxt->instate = XML_PARSER_DTD;
5350 ctxt->external = 1;
5351 while (((RAW == '<') && (NXT(1) == '?')) ||
5352 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005353 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005354 const xmlChar *check = CUR_PTR;
5355 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005356
5357 GROW;
5358 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5359 xmlParseConditionalSections(ctxt);
5360 } else if (IS_BLANK(CUR)) {
5361 NEXT;
5362 } else if (RAW == '%') {
5363 xmlParsePEReference(ctxt);
5364 } else
5365 xmlParseMarkupDecl(ctxt);
5366
5367 /*
5368 * Pop-up of finished entities.
5369 */
5370 while ((RAW == 0) && (ctxt->inputNr > 1))
5371 xmlPopInput(ctxt);
5372
Daniel Veillardfdc91562002-07-01 21:52:03 +00005373 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005374 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5376 ctxt->sax->error(ctxt->userData,
5377 "Content error in the external subset\n");
5378 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005379 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005380 break;
5381 }
5382 }
5383
5384 if (RAW != 0) {
5385 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5387 ctxt->sax->error(ctxt->userData,
5388 "Extra content at the end of the document\n");
5389 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005390 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005391 }
5392
5393}
5394
5395/**
5396 * xmlParseReference:
5397 * @ctxt: an XML parser context
5398 *
5399 * parse and handle entity references in content, depending on the SAX
5400 * interface, this may end-up in a call to character() if this is a
5401 * CharRef, a predefined entity, if there is no reference() callback.
5402 * or if the parser was asked to switch to that mode.
5403 *
5404 * [67] Reference ::= EntityRef | CharRef
5405 */
5406void
5407xmlParseReference(xmlParserCtxtPtr ctxt) {
5408 xmlEntityPtr ent;
5409 xmlChar *val;
5410 if (RAW != '&') return;
5411
5412 if (NXT(1) == '#') {
5413 int i = 0;
5414 xmlChar out[10];
5415 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005416 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005417
5418 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5419 /*
5420 * So we are using non-UTF-8 buffers
5421 * Check that the char fit on 8bits, if not
5422 * generate a CharRef.
5423 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005424 if (value <= 0xFF) {
5425 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005426 out[1] = 0;
5427 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5428 (!ctxt->disableSAX))
5429 ctxt->sax->characters(ctxt->userData, out, 1);
5430 } else {
5431 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005432 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005433 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005434 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005435 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5436 (!ctxt->disableSAX))
5437 ctxt->sax->reference(ctxt->userData, out);
5438 }
5439 } else {
5440 /*
5441 * Just encode the value in UTF-8
5442 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005443 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005444 out[i] = 0;
5445 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5446 (!ctxt->disableSAX))
5447 ctxt->sax->characters(ctxt->userData, out, i);
5448 }
5449 } else {
5450 ent = xmlParseEntityRef(ctxt);
5451 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005452 if (!ctxt->wellFormed)
5453 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005454 if ((ent->name != NULL) &&
5455 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5456 xmlNodePtr list = NULL;
5457 int ret;
5458
5459
5460 /*
5461 * The first reference to the entity trigger a parsing phase
5462 * where the ent->children is filled with the result from
5463 * the parsing.
5464 */
5465 if (ent->children == NULL) {
5466 xmlChar *value;
5467 value = ent->content;
5468
5469 /*
5470 * Check that this entity is well formed
5471 */
5472 if ((value != NULL) &&
5473 (value[1] == 0) && (value[0] == '<') &&
5474 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5475 /*
5476 * DONE: get definite answer on this !!!
5477 * Lots of entity decls are used to declare a single
5478 * char
5479 * <!ENTITY lt "<">
5480 * Which seems to be valid since
5481 * 2.4: The ampersand character (&) and the left angle
5482 * bracket (<) may appear in their literal form only
5483 * when used ... They are also legal within the literal
5484 * entity value of an internal entity declaration;i
5485 * see "4.3.2 Well-Formed Parsed Entities".
5486 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5487 * Looking at the OASIS test suite and James Clark
5488 * tests, this is broken. However the XML REC uses
5489 * it. Is the XML REC not well-formed ????
5490 * This is a hack to avoid this problem
5491 *
5492 * ANSWER: since lt gt amp .. are already defined,
5493 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005494 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005495 * is lousy but acceptable.
5496 */
5497 list = xmlNewDocText(ctxt->myDoc, value);
5498 if (list != NULL) {
5499 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5500 (ent->children == NULL)) {
5501 ent->children = list;
5502 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005503 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005504 list->parent = (xmlNodePtr) ent;
5505 } else {
5506 xmlFreeNodeList(list);
5507 }
5508 } else if (list != NULL) {
5509 xmlFreeNodeList(list);
5510 }
5511 } else {
5512 /*
5513 * 4.3.2: An internal general parsed entity is well-formed
5514 * if its replacement text matches the production labeled
5515 * content.
5516 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005517
5518 void *user_data;
5519 /*
5520 * This is a bit hackish but this seems the best
5521 * way to make sure both SAX and DOM entity support
5522 * behaves okay.
5523 */
5524 if (ctxt->userData == ctxt)
5525 user_data = NULL;
5526 else
5527 user_data = ctxt->userData;
5528
Owen Taylor3473f882001-02-23 17:55:21 +00005529 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5530 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005531 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5532 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005533 ctxt->depth--;
5534 } else if (ent->etype ==
5535 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5536 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005537 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005538 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005539 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005540 ctxt->depth--;
5541 } else {
5542 ret = -1;
5543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5544 ctxt->sax->error(ctxt->userData,
5545 "Internal: invalid entity type\n");
5546 }
5547 if (ret == XML_ERR_ENTITY_LOOP) {
5548 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5550 ctxt->sax->error(ctxt->userData,
5551 "Detected entity reference loop\n");
5552 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005553 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005554 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005555 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005556 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5557 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005558 (ent->children == NULL)) {
5559 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005560 if (ctxt->replaceEntities) {
5561 /*
5562 * Prune it directly in the generated document
5563 * except for single text nodes.
5564 */
5565 if ((list->type == XML_TEXT_NODE) &&
5566 (list->next == NULL)) {
5567 list->parent = (xmlNodePtr) ent;
5568 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005569 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005570 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005571 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005572 while (list != NULL) {
5573 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005574 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005575 if (list->next == NULL)
5576 ent->last = list;
5577 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005578 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005579 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005580 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5581 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005582 }
5583 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005584 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005585 while (list != NULL) {
5586 list->parent = (xmlNodePtr) ent;
5587 if (list->next == NULL)
5588 ent->last = list;
5589 list = list->next;
5590 }
Owen Taylor3473f882001-02-23 17:55:21 +00005591 }
5592 } else {
5593 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005594 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005595 }
5596 } else if (ret > 0) {
5597 ctxt->errNo = ret;
5598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5599 ctxt->sax->error(ctxt->userData,
5600 "Entity value required\n");
5601 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005602 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005603 } else if (list != NULL) {
5604 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005605 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005606 }
5607 }
5608 }
5609 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5610 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5611 /*
5612 * Create a node.
5613 */
5614 ctxt->sax->reference(ctxt->userData, ent->name);
5615 return;
5616 } else if (ctxt->replaceEntities) {
5617 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5618 /*
5619 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005620 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005621 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005622 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005623 if ((list == NULL) && (ent->owner == 0)) {
5624 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005625 cur = ent->children;
5626 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005627 nw = xmlCopyNode(cur, 1);
5628 if (nw != NULL) {
5629 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005630 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005631 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005632 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005633 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005634 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005635 if (cur == ent->last)
5636 break;
5637 cur = cur->next;
5638 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005639 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005640 xmlAddEntityReference(ent, firstChild, nw);
5641 } else if (list == NULL) {
5642 xmlNodePtr nw = NULL, cur, next, last,
5643 firstChild = NULL;
5644 /*
5645 * Copy the entity child list and make it the new
5646 * entity child list. The goal is to make sure any
5647 * ID or REF referenced will be the one from the
5648 * document content and not the entity copy.
5649 */
5650 cur = ent->children;
5651 ent->children = NULL;
5652 last = ent->last;
5653 ent->last = NULL;
5654 while (cur != NULL) {
5655 next = cur->next;
5656 cur->next = NULL;
5657 cur->parent = NULL;
5658 nw = xmlCopyNode(cur, 1);
5659 if (nw != NULL) {
5660 nw->_private = cur->_private;
5661 if (firstChild == NULL){
5662 firstChild = cur;
5663 }
5664 xmlAddChild((xmlNodePtr) ent, nw);
5665 xmlAddChild(ctxt->node, cur);
5666 }
5667 if (cur == last)
5668 break;
5669 cur = next;
5670 }
5671 ent->owner = 1;
5672 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5673 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005674 } else {
5675 /*
5676 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005677 * node with a possible previous text one which
5678 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005679 */
5680 if (ent->children->type == XML_TEXT_NODE)
5681 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5682 if ((ent->last != ent->children) &&
5683 (ent->last->type == XML_TEXT_NODE))
5684 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5685 xmlAddChildList(ctxt->node, ent->children);
5686 }
5687
Owen Taylor3473f882001-02-23 17:55:21 +00005688 /*
5689 * This is to avoid a nasty side effect, see
5690 * characters() in SAX.c
5691 */
5692 ctxt->nodemem = 0;
5693 ctxt->nodelen = 0;
5694 return;
5695 } else {
5696 /*
5697 * Probably running in SAX mode
5698 */
5699 xmlParserInputPtr input;
5700
5701 input = xmlNewEntityInputStream(ctxt, ent);
5702 xmlPushInput(ctxt, input);
5703 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5704 (RAW == '<') && (NXT(1) == '?') &&
5705 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5706 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5707 xmlParseTextDecl(ctxt);
5708 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5709 /*
5710 * The XML REC instructs us to stop parsing right here
5711 */
5712 ctxt->instate = XML_PARSER_EOF;
5713 return;
5714 }
5715 if (input->standalone == 1) {
5716 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5718 ctxt->sax->error(ctxt->userData,
5719 "external parsed entities cannot be standalone\n");
5720 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005721 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005722 }
5723 }
5724 return;
5725 }
5726 }
5727 } else {
5728 val = ent->content;
5729 if (val == NULL) return;
5730 /*
5731 * inline the entity.
5732 */
5733 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5734 (!ctxt->disableSAX))
5735 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5736 }
5737 }
5738}
5739
5740/**
5741 * xmlParseEntityRef:
5742 * @ctxt: an XML parser context
5743 *
5744 * parse ENTITY references declarations
5745 *
5746 * [68] EntityRef ::= '&' Name ';'
5747 *
5748 * [ WFC: Entity Declared ]
5749 * In a document without any DTD, a document with only an internal DTD
5750 * subset which contains no parameter entity references, or a document
5751 * with "standalone='yes'", the Name given in the entity reference
5752 * must match that in an entity declaration, except that well-formed
5753 * documents need not declare any of the following entities: amp, lt,
5754 * gt, apos, quot. The declaration of a parameter entity must precede
5755 * any reference to it. Similarly, the declaration of a general entity
5756 * must precede any reference to it which appears in a default value in an
5757 * attribute-list declaration. Note that if entities are declared in the
5758 * external subset or in external parameter entities, a non-validating
5759 * processor is not obligated to read and process their declarations;
5760 * for such documents, the rule that an entity must be declared is a
5761 * well-formedness constraint only if standalone='yes'.
5762 *
5763 * [ WFC: Parsed Entity ]
5764 * An entity reference must not contain the name of an unparsed entity
5765 *
5766 * Returns the xmlEntityPtr if found, or NULL otherwise.
5767 */
5768xmlEntityPtr
5769xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5770 xmlChar *name;
5771 xmlEntityPtr ent = NULL;
5772
5773 GROW;
5774
5775 if (RAW == '&') {
5776 NEXT;
5777 name = xmlParseName(ctxt);
5778 if (name == NULL) {
5779 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5781 ctxt->sax->error(ctxt->userData,
5782 "xmlParseEntityRef: no name\n");
5783 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005784 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005785 } else {
5786 if (RAW == ';') {
5787 NEXT;
5788 /*
5789 * Ask first SAX for entity resolution, otherwise try the
5790 * predefined set.
5791 */
5792 if (ctxt->sax != NULL) {
5793 if (ctxt->sax->getEntity != NULL)
5794 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005795 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005796 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005797 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5798 (ctxt->userData==ctxt)) {
Daniel Veillard5997aca2002-03-18 18:36:20 +00005799 ent = getEntity(ctxt, name);
5800 }
Owen Taylor3473f882001-02-23 17:55:21 +00005801 }
5802 /*
5803 * [ WFC: Entity Declared ]
5804 * In a document without any DTD, a document with only an
5805 * internal DTD subset which contains no parameter entity
5806 * references, or a document with "standalone='yes'", the
5807 * Name given in the entity reference must match that in an
5808 * entity declaration, except that well-formed documents
5809 * need not declare any of the following entities: amp, lt,
5810 * gt, apos, quot.
5811 * The declaration of a parameter entity must precede any
5812 * reference to it.
5813 * Similarly, the declaration of a general entity must
5814 * precede any reference to it which appears in a default
5815 * value in an attribute-list declaration. Note that if
5816 * entities are declared in the external subset or in
5817 * external parameter entities, a non-validating processor
5818 * is not obligated to read and process their declarations;
5819 * for such documents, the rule that an entity must be
5820 * declared is a well-formedness constraint only if
5821 * standalone='yes'.
5822 */
5823 if (ent == NULL) {
5824 if ((ctxt->standalone == 1) ||
5825 ((ctxt->hasExternalSubset == 0) &&
5826 (ctxt->hasPErefs == 0))) {
5827 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5829 ctxt->sax->error(ctxt->userData,
5830 "Entity '%s' not defined\n", name);
5831 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005832 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005833 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005834 } else {
5835 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005837 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005838 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005839 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005840 }
5841 }
5842
5843 /*
5844 * [ WFC: Parsed Entity ]
5845 * An entity reference must not contain the name of an
5846 * unparsed entity
5847 */
5848 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5849 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5851 ctxt->sax->error(ctxt->userData,
5852 "Entity reference to unparsed entity %s\n", name);
5853 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005854 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005855 }
5856
5857 /*
5858 * [ WFC: No External Entity References ]
5859 * Attribute values cannot contain direct or indirect
5860 * entity references to external entities.
5861 */
5862 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5863 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5864 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5866 ctxt->sax->error(ctxt->userData,
5867 "Attribute references external entity '%s'\n", name);
5868 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005869 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005870 }
5871 /*
5872 * [ WFC: No < in Attribute Values ]
5873 * The replacement text of any entity referred to directly or
5874 * indirectly in an attribute value (other than "&lt;") must
5875 * not contain a <.
5876 */
5877 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5878 (ent != NULL) &&
5879 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5880 (ent->content != NULL) &&
5881 (xmlStrchr(ent->content, '<'))) {
5882 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5884 ctxt->sax->error(ctxt->userData,
5885 "'<' in entity '%s' is not allowed in attributes values\n", name);
5886 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005887 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005888 }
5889
5890 /*
5891 * Internal check, no parameter entities here ...
5892 */
5893 else {
5894 switch (ent->etype) {
5895 case XML_INTERNAL_PARAMETER_ENTITY:
5896 case XML_EXTERNAL_PARAMETER_ENTITY:
5897 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5899 ctxt->sax->error(ctxt->userData,
5900 "Attempt to reference the parameter entity '%s'\n", name);
5901 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005902 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005903 break;
5904 default:
5905 break;
5906 }
5907 }
5908
5909 /*
5910 * [ WFC: No Recursion ]
5911 * A parsed entity must not contain a recursive reference
5912 * to itself, either directly or indirectly.
5913 * Done somewhere else
5914 */
5915
5916 } else {
5917 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5919 ctxt->sax->error(ctxt->userData,
5920 "xmlParseEntityRef: expecting ';'\n");
5921 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005922 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005923 }
5924 xmlFree(name);
5925 }
5926 }
5927 return(ent);
5928}
5929
5930/**
5931 * xmlParseStringEntityRef:
5932 * @ctxt: an XML parser context
5933 * @str: a pointer to an index in the string
5934 *
5935 * parse ENTITY references declarations, but this version parses it from
5936 * a string value.
5937 *
5938 * [68] EntityRef ::= '&' Name ';'
5939 *
5940 * [ WFC: Entity Declared ]
5941 * In a document without any DTD, a document with only an internal DTD
5942 * subset which contains no parameter entity references, or a document
5943 * with "standalone='yes'", the Name given in the entity reference
5944 * must match that in an entity declaration, except that well-formed
5945 * documents need not declare any of the following entities: amp, lt,
5946 * gt, apos, quot. The declaration of a parameter entity must precede
5947 * any reference to it. Similarly, the declaration of a general entity
5948 * must precede any reference to it which appears in a default value in an
5949 * attribute-list declaration. Note that if entities are declared in the
5950 * external subset or in external parameter entities, a non-validating
5951 * processor is not obligated to read and process their declarations;
5952 * for such documents, the rule that an entity must be declared is a
5953 * well-formedness constraint only if standalone='yes'.
5954 *
5955 * [ WFC: Parsed Entity ]
5956 * An entity reference must not contain the name of an unparsed entity
5957 *
5958 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5959 * is updated to the current location in the string.
5960 */
5961xmlEntityPtr
5962xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5963 xmlChar *name;
5964 const xmlChar *ptr;
5965 xmlChar cur;
5966 xmlEntityPtr ent = NULL;
5967
5968 if ((str == NULL) || (*str == NULL))
5969 return(NULL);
5970 ptr = *str;
5971 cur = *ptr;
5972 if (cur == '&') {
5973 ptr++;
5974 cur = *ptr;
5975 name = xmlParseStringName(ctxt, &ptr);
5976 if (name == NULL) {
5977 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5979 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005980 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005981 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005982 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005983 } else {
5984 if (*ptr == ';') {
5985 ptr++;
5986 /*
5987 * Ask first SAX for entity resolution, otherwise try the
5988 * predefined set.
5989 */
5990 if (ctxt->sax != NULL) {
5991 if (ctxt->sax->getEntity != NULL)
5992 ent = ctxt->sax->getEntity(ctxt->userData, name);
5993 if (ent == NULL)
5994 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005995 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5996 ent = getEntity(ctxt, name);
5997 }
Owen Taylor3473f882001-02-23 17:55:21 +00005998 }
5999 /*
6000 * [ WFC: Entity Declared ]
6001 * In a document without any DTD, a document with only an
6002 * internal DTD subset which contains no parameter entity
6003 * references, or a document with "standalone='yes'", the
6004 * Name given in the entity reference must match that in an
6005 * entity declaration, except that well-formed documents
6006 * need not declare any of the following entities: amp, lt,
6007 * gt, apos, quot.
6008 * The declaration of a parameter entity must precede any
6009 * reference to it.
6010 * Similarly, the declaration of a general entity must
6011 * precede any reference to it which appears in a default
6012 * value in an attribute-list declaration. Note that if
6013 * entities are declared in the external subset or in
6014 * external parameter entities, a non-validating processor
6015 * is not obligated to read and process their declarations;
6016 * for such documents, the rule that an entity must be
6017 * declared is a well-formedness constraint only if
6018 * standalone='yes'.
6019 */
6020 if (ent == NULL) {
6021 if ((ctxt->standalone == 1) ||
6022 ((ctxt->hasExternalSubset == 0) &&
6023 (ctxt->hasPErefs == 0))) {
6024 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6026 ctxt->sax->error(ctxt->userData,
6027 "Entity '%s' not defined\n", name);
6028 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006029 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006030 } else {
6031 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6032 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6033 ctxt->sax->warning(ctxt->userData,
6034 "Entity '%s' not defined\n", name);
6035 }
6036 }
6037
6038 /*
6039 * [ WFC: Parsed Entity ]
6040 * An entity reference must not contain the name of an
6041 * unparsed entity
6042 */
6043 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6044 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6046 ctxt->sax->error(ctxt->userData,
6047 "Entity reference to unparsed entity %s\n", name);
6048 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006049 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006050 }
6051
6052 /*
6053 * [ WFC: No External Entity References ]
6054 * Attribute values cannot contain direct or indirect
6055 * entity references to external entities.
6056 */
6057 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6058 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6059 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6061 ctxt->sax->error(ctxt->userData,
6062 "Attribute references external entity '%s'\n", name);
6063 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006064 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006065 }
6066 /*
6067 * [ WFC: No < in Attribute Values ]
6068 * The replacement text of any entity referred to directly or
6069 * indirectly in an attribute value (other than "&lt;") must
6070 * not contain a <.
6071 */
6072 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6073 (ent != NULL) &&
6074 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6075 (ent->content != NULL) &&
6076 (xmlStrchr(ent->content, '<'))) {
6077 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6079 ctxt->sax->error(ctxt->userData,
6080 "'<' in entity '%s' is not allowed in attributes values\n", name);
6081 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006082 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006083 }
6084
6085 /*
6086 * Internal check, no parameter entities here ...
6087 */
6088 else {
6089 switch (ent->etype) {
6090 case XML_INTERNAL_PARAMETER_ENTITY:
6091 case XML_EXTERNAL_PARAMETER_ENTITY:
6092 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6094 ctxt->sax->error(ctxt->userData,
6095 "Attempt to reference the parameter entity '%s'\n", name);
6096 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006097 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006098 break;
6099 default:
6100 break;
6101 }
6102 }
6103
6104 /*
6105 * [ WFC: No Recursion ]
6106 * A parsed entity must not contain a recursive reference
6107 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006108 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006109 */
6110
6111 } else {
6112 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6114 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006115 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006116 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006117 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006118 }
6119 xmlFree(name);
6120 }
6121 }
6122 *str = ptr;
6123 return(ent);
6124}
6125
6126/**
6127 * xmlParsePEReference:
6128 * @ctxt: an XML parser context
6129 *
6130 * parse PEReference declarations
6131 * The entity content is handled directly by pushing it's content as
6132 * a new input stream.
6133 *
6134 * [69] PEReference ::= '%' Name ';'
6135 *
6136 * [ WFC: No Recursion ]
6137 * A parsed entity must not contain a recursive
6138 * reference to itself, either directly or indirectly.
6139 *
6140 * [ WFC: Entity Declared ]
6141 * In a document without any DTD, a document with only an internal DTD
6142 * subset which contains no parameter entity references, or a document
6143 * with "standalone='yes'", ... ... The declaration of a parameter
6144 * entity must precede any reference to it...
6145 *
6146 * [ VC: Entity Declared ]
6147 * In a document with an external subset or external parameter entities
6148 * with "standalone='no'", ... ... The declaration of a parameter entity
6149 * must precede any reference to it...
6150 *
6151 * [ WFC: In DTD ]
6152 * Parameter-entity references may only appear in the DTD.
6153 * NOTE: misleading but this is handled.
6154 */
6155void
6156xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6157 xmlChar *name;
6158 xmlEntityPtr entity = NULL;
6159 xmlParserInputPtr input;
6160
6161 if (RAW == '%') {
6162 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006163 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006164 if (name == NULL) {
6165 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6167 ctxt->sax->error(ctxt->userData,
6168 "xmlParsePEReference: no name\n");
6169 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006170 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006171 } else {
6172 if (RAW == ';') {
6173 NEXT;
6174 if ((ctxt->sax != NULL) &&
6175 (ctxt->sax->getParameterEntity != NULL))
6176 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6177 name);
6178 if (entity == NULL) {
6179 /*
6180 * [ WFC: Entity Declared ]
6181 * In a document without any DTD, a document with only an
6182 * internal DTD subset which contains no parameter entity
6183 * references, or a document with "standalone='yes'", ...
6184 * ... The declaration of a parameter entity must precede
6185 * any reference to it...
6186 */
6187 if ((ctxt->standalone == 1) ||
6188 ((ctxt->hasExternalSubset == 0) &&
6189 (ctxt->hasPErefs == 0))) {
6190 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6191 if ((!ctxt->disableSAX) &&
6192 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6193 ctxt->sax->error(ctxt->userData,
6194 "PEReference: %%%s; not found\n", name);
6195 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006196 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006197 } else {
6198 /*
6199 * [ VC: Entity Declared ]
6200 * In a document with an external subset or external
6201 * parameter entities with "standalone='no'", ...
6202 * ... The declaration of a parameter entity must precede
6203 * any reference to it...
6204 */
6205 if ((!ctxt->disableSAX) &&
6206 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6207 ctxt->sax->warning(ctxt->userData,
6208 "PEReference: %%%s; not found\n", name);
6209 ctxt->valid = 0;
6210 }
6211 } else {
6212 /*
6213 * Internal checking in case the entity quest barfed
6214 */
6215 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6216 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6217 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6218 ctxt->sax->warning(ctxt->userData,
6219 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006220 } else if (ctxt->input->free != deallocblankswrapper) {
6221 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6222 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006223 } else {
6224 /*
6225 * TODO !!!
6226 * handle the extra spaces added before and after
6227 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6228 */
6229 input = xmlNewEntityInputStream(ctxt, entity);
6230 xmlPushInput(ctxt, input);
6231 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6232 (RAW == '<') && (NXT(1) == '?') &&
6233 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6234 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6235 xmlParseTextDecl(ctxt);
6236 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6237 /*
6238 * The XML REC instructs us to stop parsing
6239 * right here
6240 */
6241 ctxt->instate = XML_PARSER_EOF;
6242 xmlFree(name);
6243 return;
6244 }
6245 }
Owen Taylor3473f882001-02-23 17:55:21 +00006246 }
6247 }
6248 ctxt->hasPErefs = 1;
6249 } else {
6250 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6251 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6252 ctxt->sax->error(ctxt->userData,
6253 "xmlParsePEReference: expecting ';'\n");
6254 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006255 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006256 }
6257 xmlFree(name);
6258 }
6259 }
6260}
6261
6262/**
6263 * xmlParseStringPEReference:
6264 * @ctxt: an XML parser context
6265 * @str: a pointer to an index in the string
6266 *
6267 * parse PEReference declarations
6268 *
6269 * [69] PEReference ::= '%' Name ';'
6270 *
6271 * [ WFC: No Recursion ]
6272 * A parsed entity must not contain a recursive
6273 * reference to itself, either directly or indirectly.
6274 *
6275 * [ WFC: Entity Declared ]
6276 * In a document without any DTD, a document with only an internal DTD
6277 * subset which contains no parameter entity references, or a document
6278 * with "standalone='yes'", ... ... The declaration of a parameter
6279 * entity must precede any reference to it...
6280 *
6281 * [ VC: Entity Declared ]
6282 * In a document with an external subset or external parameter entities
6283 * with "standalone='no'", ... ... The declaration of a parameter entity
6284 * must precede any reference to it...
6285 *
6286 * [ WFC: In DTD ]
6287 * Parameter-entity references may only appear in the DTD.
6288 * NOTE: misleading but this is handled.
6289 *
6290 * Returns the string of the entity content.
6291 * str is updated to the current value of the index
6292 */
6293xmlEntityPtr
6294xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6295 const xmlChar *ptr;
6296 xmlChar cur;
6297 xmlChar *name;
6298 xmlEntityPtr entity = NULL;
6299
6300 if ((str == NULL) || (*str == NULL)) return(NULL);
6301 ptr = *str;
6302 cur = *ptr;
6303 if (cur == '%') {
6304 ptr++;
6305 cur = *ptr;
6306 name = xmlParseStringName(ctxt, &ptr);
6307 if (name == NULL) {
6308 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6310 ctxt->sax->error(ctxt->userData,
6311 "xmlParseStringPEReference: no name\n");
6312 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006313 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006314 } else {
6315 cur = *ptr;
6316 if (cur == ';') {
6317 ptr++;
6318 cur = *ptr;
6319 if ((ctxt->sax != NULL) &&
6320 (ctxt->sax->getParameterEntity != NULL))
6321 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6322 name);
6323 if (entity == NULL) {
6324 /*
6325 * [ WFC: Entity Declared ]
6326 * In a document without any DTD, a document with only an
6327 * internal DTD subset which contains no parameter entity
6328 * references, or a document with "standalone='yes'", ...
6329 * ... The declaration of a parameter entity must precede
6330 * any reference to it...
6331 */
6332 if ((ctxt->standalone == 1) ||
6333 ((ctxt->hasExternalSubset == 0) &&
6334 (ctxt->hasPErefs == 0))) {
6335 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6337 ctxt->sax->error(ctxt->userData,
6338 "PEReference: %%%s; not found\n", name);
6339 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006340 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006341 } else {
6342 /*
6343 * [ VC: Entity Declared ]
6344 * In a document with an external subset or external
6345 * parameter entities with "standalone='no'", ...
6346 * ... The declaration of a parameter entity must
6347 * precede any reference to it...
6348 */
6349 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6350 ctxt->sax->warning(ctxt->userData,
6351 "PEReference: %%%s; not found\n", name);
6352 ctxt->valid = 0;
6353 }
6354 } else {
6355 /*
6356 * Internal checking in case the entity quest barfed
6357 */
6358 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6359 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6360 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6361 ctxt->sax->warning(ctxt->userData,
6362 "Internal: %%%s; is not a parameter entity\n", name);
6363 }
6364 }
6365 ctxt->hasPErefs = 1;
6366 } else {
6367 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6369 ctxt->sax->error(ctxt->userData,
6370 "xmlParseStringPEReference: expecting ';'\n");
6371 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006372 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006373 }
6374 xmlFree(name);
6375 }
6376 }
6377 *str = ptr;
6378 return(entity);
6379}
6380
6381/**
6382 * xmlParseDocTypeDecl:
6383 * @ctxt: an XML parser context
6384 *
6385 * parse a DOCTYPE declaration
6386 *
6387 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6388 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6389 *
6390 * [ VC: Root Element Type ]
6391 * The Name in the document type declaration must match the element
6392 * type of the root element.
6393 */
6394
6395void
6396xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6397 xmlChar *name = NULL;
6398 xmlChar *ExternalID = NULL;
6399 xmlChar *URI = NULL;
6400
6401 /*
6402 * We know that '<!DOCTYPE' has been detected.
6403 */
6404 SKIP(9);
6405
6406 SKIP_BLANKS;
6407
6408 /*
6409 * Parse the DOCTYPE name.
6410 */
6411 name = xmlParseName(ctxt);
6412 if (name == NULL) {
6413 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6415 ctxt->sax->error(ctxt->userData,
6416 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6417 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006418 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006419 }
6420 ctxt->intSubName = name;
6421
6422 SKIP_BLANKS;
6423
6424 /*
6425 * Check for SystemID and ExternalID
6426 */
6427 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6428
6429 if ((URI != NULL) || (ExternalID != NULL)) {
6430 ctxt->hasExternalSubset = 1;
6431 }
6432 ctxt->extSubURI = URI;
6433 ctxt->extSubSystem = ExternalID;
6434
6435 SKIP_BLANKS;
6436
6437 /*
6438 * Create and update the internal subset.
6439 */
6440 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6441 (!ctxt->disableSAX))
6442 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6443
6444 /*
6445 * Is there any internal subset declarations ?
6446 * they are handled separately in xmlParseInternalSubset()
6447 */
6448 if (RAW == '[')
6449 return;
6450
6451 /*
6452 * We should be at the end of the DOCTYPE declaration.
6453 */
6454 if (RAW != '>') {
6455 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006457 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006458 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006459 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006460 }
6461 NEXT;
6462}
6463
6464/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006465 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006466 * @ctxt: an XML parser context
6467 *
6468 * parse the internal subset declaration
6469 *
6470 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6471 */
6472
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006473static void
Owen Taylor3473f882001-02-23 17:55:21 +00006474xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6475 /*
6476 * Is there any DTD definition ?
6477 */
6478 if (RAW == '[') {
6479 ctxt->instate = XML_PARSER_DTD;
6480 NEXT;
6481 /*
6482 * Parse the succession of Markup declarations and
6483 * PEReferences.
6484 * Subsequence (markupdecl | PEReference | S)*
6485 */
6486 while (RAW != ']') {
6487 const xmlChar *check = CUR_PTR;
6488 int cons = ctxt->input->consumed;
6489
6490 SKIP_BLANKS;
6491 xmlParseMarkupDecl(ctxt);
6492 xmlParsePEReference(ctxt);
6493
6494 /*
6495 * Pop-up of finished entities.
6496 */
6497 while ((RAW == 0) && (ctxt->inputNr > 1))
6498 xmlPopInput(ctxt);
6499
6500 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6501 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6503 ctxt->sax->error(ctxt->userData,
6504 "xmlParseInternalSubset: error detected in Markup declaration\n");
6505 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006506 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006507 break;
6508 }
6509 }
6510 if (RAW == ']') {
6511 NEXT;
6512 SKIP_BLANKS;
6513 }
6514 }
6515
6516 /*
6517 * We should be at the end of the DOCTYPE declaration.
6518 */
6519 if (RAW != '>') {
6520 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006522 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006523 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006524 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006525 }
6526 NEXT;
6527}
6528
6529/**
6530 * xmlParseAttribute:
6531 * @ctxt: an XML parser context
6532 * @value: a xmlChar ** used to store the value of the attribute
6533 *
6534 * parse an attribute
6535 *
6536 * [41] Attribute ::= Name Eq AttValue
6537 *
6538 * [ WFC: No External Entity References ]
6539 * Attribute values cannot contain direct or indirect entity references
6540 * to external entities.
6541 *
6542 * [ WFC: No < in Attribute Values ]
6543 * The replacement text of any entity referred to directly or indirectly in
6544 * an attribute value (other than "&lt;") must not contain a <.
6545 *
6546 * [ VC: Attribute Value Type ]
6547 * The attribute must have been declared; the value must be of the type
6548 * declared for it.
6549 *
6550 * [25] Eq ::= S? '=' S?
6551 *
6552 * With namespace:
6553 *
6554 * [NS 11] Attribute ::= QName Eq AttValue
6555 *
6556 * Also the case QName == xmlns:??? is handled independently as a namespace
6557 * definition.
6558 *
6559 * Returns the attribute name, and the value in *value.
6560 */
6561
6562xmlChar *
6563xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6564 xmlChar *name, *val;
6565
6566 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006567 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006568 name = xmlParseName(ctxt);
6569 if (name == NULL) {
6570 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6572 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6573 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006574 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006575 return(NULL);
6576 }
6577
6578 /*
6579 * read the value
6580 */
6581 SKIP_BLANKS;
6582 if (RAW == '=') {
6583 NEXT;
6584 SKIP_BLANKS;
6585 val = xmlParseAttValue(ctxt);
6586 ctxt->instate = XML_PARSER_CONTENT;
6587 } else {
6588 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6590 ctxt->sax->error(ctxt->userData,
6591 "Specification mandate value for attribute %s\n", name);
6592 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006593 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006594 xmlFree(name);
6595 return(NULL);
6596 }
6597
6598 /*
6599 * Check that xml:lang conforms to the specification
6600 * No more registered as an error, just generate a warning now
6601 * since this was deprecated in XML second edition
6602 */
6603 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6604 if (!xmlCheckLanguageID(val)) {
6605 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6606 ctxt->sax->warning(ctxt->userData,
6607 "Malformed value for xml:lang : %s\n", val);
6608 }
6609 }
6610
6611 /*
6612 * Check that xml:space conforms to the specification
6613 */
6614 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6615 if (xmlStrEqual(val, BAD_CAST "default"))
6616 *(ctxt->space) = 0;
6617 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6618 *(ctxt->space) = 1;
6619 else {
6620 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6622 ctxt->sax->error(ctxt->userData,
Daniel Veillard642104e2003-03-26 16:32:05 +00006623"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006624 val);
6625 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006626 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006627 }
6628 }
6629
6630 *value = val;
6631 return(name);
6632}
6633
6634/**
6635 * xmlParseStartTag:
6636 * @ctxt: an XML parser context
6637 *
6638 * parse a start of tag either for rule element or
6639 * EmptyElement. In both case we don't parse the tag closing chars.
6640 *
6641 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6642 *
6643 * [ WFC: Unique Att Spec ]
6644 * No attribute name may appear more than once in the same start-tag or
6645 * empty-element tag.
6646 *
6647 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6648 *
6649 * [ WFC: Unique Att Spec ]
6650 * No attribute name may appear more than once in the same start-tag or
6651 * empty-element tag.
6652 *
6653 * With namespace:
6654 *
6655 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6656 *
6657 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6658 *
6659 * Returns the element name parsed
6660 */
6661
6662xmlChar *
6663xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6664 xmlChar *name;
6665 xmlChar *attname;
6666 xmlChar *attvalue;
6667 const xmlChar **atts = NULL;
6668 int nbatts = 0;
6669 int maxatts = 0;
6670 int i;
6671
6672 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006673 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006674
6675 name = xmlParseName(ctxt);
6676 if (name == NULL) {
6677 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6679 ctxt->sax->error(ctxt->userData,
6680 "xmlParseStartTag: invalid element name\n");
6681 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006682 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006683 return(NULL);
6684 }
6685
6686 /*
6687 * Now parse the attributes, it ends up with the ending
6688 *
6689 * (S Attribute)* S?
6690 */
6691 SKIP_BLANKS;
6692 GROW;
6693
Daniel Veillard21a0f912001-02-25 19:54:14 +00006694 while ((RAW != '>') &&
6695 ((RAW != '/') || (NXT(1) != '>')) &&
6696 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006697 const xmlChar *q = CUR_PTR;
6698 int cons = ctxt->input->consumed;
6699
6700 attname = xmlParseAttribute(ctxt, &attvalue);
6701 if ((attname != NULL) && (attvalue != NULL)) {
6702 /*
6703 * [ WFC: Unique Att Spec ]
6704 * No attribute name may appear more than once in the same
6705 * start-tag or empty-element tag.
6706 */
6707 for (i = 0; i < nbatts;i += 2) {
6708 if (xmlStrEqual(atts[i], attname)) {
6709 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6711 ctxt->sax->error(ctxt->userData,
6712 "Attribute %s redefined\n",
6713 attname);
6714 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006715 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006716 xmlFree(attname);
6717 xmlFree(attvalue);
6718 goto failed;
6719 }
6720 }
6721
6722 /*
6723 * Add the pair to atts
6724 */
6725 if (atts == NULL) {
6726 maxatts = 10;
6727 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6728 if (atts == NULL) {
6729 xmlGenericError(xmlGenericErrorContext,
6730 "malloc of %ld byte failed\n",
6731 maxatts * (long)sizeof(xmlChar *));
6732 return(NULL);
6733 }
6734 } else if (nbatts + 4 > maxatts) {
6735 maxatts *= 2;
6736 atts = (const xmlChar **) xmlRealloc((void *) atts,
6737 maxatts * sizeof(xmlChar *));
6738 if (atts == NULL) {
6739 xmlGenericError(xmlGenericErrorContext,
6740 "realloc of %ld byte failed\n",
6741 maxatts * (long)sizeof(xmlChar *));
6742 return(NULL);
6743 }
6744 }
6745 atts[nbatts++] = attname;
6746 atts[nbatts++] = attvalue;
6747 atts[nbatts] = NULL;
6748 atts[nbatts + 1] = NULL;
6749 } else {
6750 if (attname != NULL)
6751 xmlFree(attname);
6752 if (attvalue != NULL)
6753 xmlFree(attvalue);
6754 }
6755
6756failed:
6757
Daniel Veillard3772de32002-12-17 10:31:45 +00006758 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006759 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6760 break;
6761 if (!IS_BLANK(RAW)) {
6762 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6764 ctxt->sax->error(ctxt->userData,
6765 "attributes construct error\n");
6766 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006767 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006768 }
6769 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006770 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6771 (attname == NULL) && (attvalue == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006772 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6774 ctxt->sax->error(ctxt->userData,
6775 "xmlParseStartTag: problem parsing attributes\n");
6776 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006777 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006778 break;
6779 }
6780 GROW;
6781 }
6782
6783 /*
6784 * SAX: Start of Element !
6785 */
6786 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6787 (!ctxt->disableSAX))
6788 ctxt->sax->startElement(ctxt->userData, name, atts);
6789
6790 if (atts != NULL) {
6791 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6792 xmlFree((void *) atts);
6793 }
6794 return(name);
6795}
6796
6797/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006798 * xmlParseEndTagInternal:
Owen Taylor3473f882001-02-23 17:55:21 +00006799 * @ctxt: an XML parser context
6800 *
6801 * parse an end of tag
6802 *
6803 * [42] ETag ::= '</' Name S? '>'
6804 *
6805 * With namespace
6806 *
6807 * [NS 9] ETag ::= '</' QName S? '>'
6808 */
6809
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006810static void
6811xmlParseEndTagInternal(xmlParserCtxtPtr ctxt, int line) {
Owen Taylor3473f882001-02-23 17:55:21 +00006812 xmlChar *name;
6813 xmlChar *oldname;
6814
6815 GROW;
6816 if ((RAW != '<') || (NXT(1) != '/')) {
6817 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6819 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6820 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006821 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006822 return;
6823 }
6824 SKIP(2);
6825
Daniel Veillard46de64e2002-05-29 08:21:33 +00006826 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006827
6828 /*
6829 * We should definitely be at the ending "S? '>'" part
6830 */
6831 GROW;
6832 SKIP_BLANKS;
6833 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6834 ctxt->errNo = XML_ERR_GT_REQUIRED;
6835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6836 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6837 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006838 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006839 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006840 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006841
6842 /*
6843 * [ WFC: Element Type Match ]
6844 * The Name in an element's end-tag must match the element type in the
6845 * start-tag.
6846 *
6847 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006848 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006849 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006851 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006852 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006853 "Opening and ending tag mismatch: %s line %d and %s\n",
6854 ctxt->name, line, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006855 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006856 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006857 "Ending tag error for: %s line %d\n", ctxt->name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00006858 }
6859
6860 }
6861 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006862 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006863 if (name != NULL)
6864 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006865 }
6866
6867 /*
6868 * SAX: End of Tag
6869 */
6870 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6871 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006872 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006873
Owen Taylor3473f882001-02-23 17:55:21 +00006874 oldname = namePop(ctxt);
6875 spacePop(ctxt);
6876 if (oldname != NULL) {
6877#ifdef DEBUG_STACK
6878 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6879#endif
6880 xmlFree(oldname);
6881 }
6882 return;
6883}
6884
6885/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006886 * xmlParseEndTag:
6887 * @ctxt: an XML parser context
6888 *
6889 * parse an end of tag
6890 *
6891 * [42] ETag ::= '</' Name S? '>'
6892 *
6893 * With namespace
6894 *
6895 * [NS 9] ETag ::= '</' QName S? '>'
6896 */
6897
6898void
6899xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6900 xmlParseEndTagInternal(ctxt, 0);
6901}
6902
6903/**
Owen Taylor3473f882001-02-23 17:55:21 +00006904 * xmlParseCDSect:
6905 * @ctxt: an XML parser context
6906 *
6907 * Parse escaped pure raw content.
6908 *
6909 * [18] CDSect ::= CDStart CData CDEnd
6910 *
6911 * [19] CDStart ::= '<![CDATA['
6912 *
6913 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6914 *
6915 * [21] CDEnd ::= ']]>'
6916 */
6917void
6918xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6919 xmlChar *buf = NULL;
6920 int len = 0;
6921 int size = XML_PARSER_BUFFER_SIZE;
6922 int r, rl;
6923 int s, sl;
6924 int cur, l;
6925 int count = 0;
6926
6927 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6928 (NXT(2) == '[') && (NXT(3) == 'C') &&
6929 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6930 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6931 (NXT(8) == '[')) {
6932 SKIP(9);
6933 } else
6934 return;
6935
6936 ctxt->instate = XML_PARSER_CDATA_SECTION;
6937 r = CUR_CHAR(rl);
6938 if (!IS_CHAR(r)) {
6939 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6941 ctxt->sax->error(ctxt->userData,
6942 "CData section not finished\n");
6943 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006944 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006945 ctxt->instate = XML_PARSER_CONTENT;
6946 return;
6947 }
6948 NEXTL(rl);
6949 s = CUR_CHAR(sl);
6950 if (!IS_CHAR(s)) {
6951 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6953 ctxt->sax->error(ctxt->userData,
6954 "CData section not finished\n");
6955 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006956 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006957 ctxt->instate = XML_PARSER_CONTENT;
6958 return;
6959 }
6960 NEXTL(sl);
6961 cur = CUR_CHAR(l);
6962 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6963 if (buf == NULL) {
6964 xmlGenericError(xmlGenericErrorContext,
6965 "malloc of %d byte failed\n", size);
6966 return;
6967 }
6968 while (IS_CHAR(cur) &&
6969 ((r != ']') || (s != ']') || (cur != '>'))) {
6970 if (len + 5 >= size) {
6971 size *= 2;
6972 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6973 if (buf == NULL) {
6974 xmlGenericError(xmlGenericErrorContext,
6975 "realloc of %d byte failed\n", size);
6976 return;
6977 }
6978 }
6979 COPY_BUF(rl,buf,len,r);
6980 r = s;
6981 rl = sl;
6982 s = cur;
6983 sl = l;
6984 count++;
6985 if (count > 50) {
6986 GROW;
6987 count = 0;
6988 }
6989 NEXTL(l);
6990 cur = CUR_CHAR(l);
6991 }
6992 buf[len] = 0;
6993 ctxt->instate = XML_PARSER_CONTENT;
6994 if (cur != '>') {
6995 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6997 ctxt->sax->error(ctxt->userData,
6998 "CData section not finished\n%.50s\n", buf);
6999 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007000 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007001 xmlFree(buf);
7002 return;
7003 }
7004 NEXTL(l);
7005
7006 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007007 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007008 */
7009 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7010 if (ctxt->sax->cdataBlock != NULL)
7011 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007012 else if (ctxt->sax->characters != NULL)
7013 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007014 }
7015 xmlFree(buf);
7016}
7017
7018/**
7019 * xmlParseContent:
7020 * @ctxt: an XML parser context
7021 *
7022 * Parse a content:
7023 *
7024 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7025 */
7026
7027void
7028xmlParseContent(xmlParserCtxtPtr ctxt) {
7029 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007030 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007031 ((RAW != '<') || (NXT(1) != '/'))) {
7032 const xmlChar *test = CUR_PTR;
7033 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007034 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007035
7036 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007037 * First case : a Processing Instruction.
7038 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007039 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007040 xmlParsePI(ctxt);
7041 }
7042
7043 /*
7044 * Second case : a CDSection
7045 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007046 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007047 (NXT(2) == '[') && (NXT(3) == 'C') &&
7048 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7049 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7050 (NXT(8) == '[')) {
7051 xmlParseCDSect(ctxt);
7052 }
7053
7054 /*
7055 * Third case : a comment
7056 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007057 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007058 (NXT(2) == '-') && (NXT(3) == '-')) {
7059 xmlParseComment(ctxt);
7060 ctxt->instate = XML_PARSER_CONTENT;
7061 }
7062
7063 /*
7064 * Fourth case : a sub-element.
7065 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007066 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007067 xmlParseElement(ctxt);
7068 }
7069
7070 /*
7071 * Fifth case : a reference. If if has not been resolved,
7072 * parsing returns it's Name, create the node
7073 */
7074
Daniel Veillard21a0f912001-02-25 19:54:14 +00007075 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007076 xmlParseReference(ctxt);
7077 }
7078
7079 /*
7080 * Last case, text. Note that References are handled directly.
7081 */
7082 else {
7083 xmlParseCharData(ctxt, 0);
7084 }
7085
7086 GROW;
7087 /*
7088 * Pop-up of finished entities.
7089 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007090 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007091 xmlPopInput(ctxt);
7092 SHRINK;
7093
Daniel Veillardfdc91562002-07-01 21:52:03 +00007094 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007095 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7097 ctxt->sax->error(ctxt->userData,
7098 "detected an error in element content\n");
7099 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007100 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007101 ctxt->instate = XML_PARSER_EOF;
7102 break;
7103 }
7104 }
7105}
7106
7107/**
7108 * xmlParseElement:
7109 * @ctxt: an XML parser context
7110 *
7111 * parse an XML element, this is highly recursive
7112 *
7113 * [39] element ::= EmptyElemTag | STag content ETag
7114 *
7115 * [ WFC: Element Type Match ]
7116 * The Name in an element's end-tag must match the element type in the
7117 * start-tag.
7118 *
7119 * [ VC: Element Valid ]
7120 * An element is valid if there is a declaration matching elementdecl
7121 * where the Name matches the element type and one of the following holds:
7122 * - The declaration matches EMPTY and the element has no content.
7123 * - The declaration matches children and the sequence of child elements
7124 * belongs to the language generated by the regular expression in the
7125 * content model, with optional white space (characters matching the
7126 * nonterminal S) between each pair of child elements.
7127 * - The declaration matches Mixed and the content consists of character
7128 * data and child elements whose types match names in the content model.
7129 * - The declaration matches ANY, and the types of any child elements have
7130 * been declared.
7131 */
7132
7133void
7134xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007135 xmlChar *name;
7136 xmlChar *oldname;
7137 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007138 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00007139 xmlNodePtr ret;
7140
7141 /* Capture start position */
7142 if (ctxt->record_info) {
7143 node_info.begin_pos = ctxt->input->consumed +
7144 (CUR_PTR - ctxt->input->base);
7145 node_info.begin_line = ctxt->input->line;
7146 }
7147
7148 if (ctxt->spaceNr == 0)
7149 spacePush(ctxt, -1);
7150 else
7151 spacePush(ctxt, *ctxt->space);
7152
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007153 line = ctxt->input->line;
Owen Taylor3473f882001-02-23 17:55:21 +00007154 name = xmlParseStartTag(ctxt);
7155 if (name == NULL) {
7156 spacePop(ctxt);
7157 return;
7158 }
7159 namePush(ctxt, name);
7160 ret = ctxt->node;
7161
7162 /*
7163 * [ VC: Root Element Type ]
7164 * The Name in the document type declaration must match the element
7165 * type of the root element.
7166 */
7167 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7168 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7169 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7170
7171 /*
7172 * Check for an Empty Element.
7173 */
7174 if ((RAW == '/') && (NXT(1) == '>')) {
7175 SKIP(2);
7176 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7177 (!ctxt->disableSAX))
7178 ctxt->sax->endElement(ctxt->userData, name);
7179 oldname = namePop(ctxt);
7180 spacePop(ctxt);
7181 if (oldname != NULL) {
7182#ifdef DEBUG_STACK
7183 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7184#endif
7185 xmlFree(oldname);
7186 }
7187 if ( ret != NULL && ctxt->record_info ) {
7188 node_info.end_pos = ctxt->input->consumed +
7189 (CUR_PTR - ctxt->input->base);
7190 node_info.end_line = ctxt->input->line;
7191 node_info.node = ret;
7192 xmlParserAddNodeInfo(ctxt, &node_info);
7193 }
7194 return;
7195 }
7196 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007197 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007198 } else {
7199 ctxt->errNo = XML_ERR_GT_REQUIRED;
7200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7201 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007202 "Couldn't find end of Start Tag %s line %d\n",
7203 name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007204 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007205 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007206
7207 /*
7208 * end of parsing of this node.
7209 */
7210 nodePop(ctxt);
7211 oldname = namePop(ctxt);
7212 spacePop(ctxt);
7213 if (oldname != NULL) {
7214#ifdef DEBUG_STACK
7215 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7216#endif
7217 xmlFree(oldname);
7218 }
7219
7220 /*
7221 * Capture end position and add node
7222 */
7223 if ( ret != NULL && ctxt->record_info ) {
7224 node_info.end_pos = ctxt->input->consumed +
7225 (CUR_PTR - ctxt->input->base);
7226 node_info.end_line = ctxt->input->line;
7227 node_info.node = ret;
7228 xmlParserAddNodeInfo(ctxt, &node_info);
7229 }
7230 return;
7231 }
7232
7233 /*
7234 * Parse the content of the element:
7235 */
7236 xmlParseContent(ctxt);
7237 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007238 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7240 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007241 "Premature end of data in tag %s line %d\n", name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007242 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007243 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007244
7245 /*
7246 * end of parsing of this node.
7247 */
7248 nodePop(ctxt);
7249 oldname = namePop(ctxt);
7250 spacePop(ctxt);
7251 if (oldname != NULL) {
7252#ifdef DEBUG_STACK
7253 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7254#endif
7255 xmlFree(oldname);
7256 }
7257 return;
7258 }
7259
7260 /*
7261 * parse the end of tag: '</' should be here.
7262 */
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007263 xmlParseEndTagInternal(ctxt, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007264
7265 /*
7266 * Capture end position and add node
7267 */
7268 if ( ret != NULL && ctxt->record_info ) {
7269 node_info.end_pos = ctxt->input->consumed +
7270 (CUR_PTR - ctxt->input->base);
7271 node_info.end_line = ctxt->input->line;
7272 node_info.node = ret;
7273 xmlParserAddNodeInfo(ctxt, &node_info);
7274 }
7275}
7276
7277/**
7278 * xmlParseVersionNum:
7279 * @ctxt: an XML parser context
7280 *
7281 * parse the XML version value.
7282 *
7283 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7284 *
7285 * Returns the string giving the XML version number, or NULL
7286 */
7287xmlChar *
7288xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7289 xmlChar *buf = NULL;
7290 int len = 0;
7291 int size = 10;
7292 xmlChar cur;
7293
7294 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7295 if (buf == NULL) {
7296 xmlGenericError(xmlGenericErrorContext,
7297 "malloc of %d byte failed\n", size);
7298 return(NULL);
7299 }
7300 cur = CUR;
7301 while (((cur >= 'a') && (cur <= 'z')) ||
7302 ((cur >= 'A') && (cur <= 'Z')) ||
7303 ((cur >= '0') && (cur <= '9')) ||
7304 (cur == '_') || (cur == '.') ||
7305 (cur == ':') || (cur == '-')) {
7306 if (len + 1 >= size) {
7307 size *= 2;
7308 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7309 if (buf == NULL) {
7310 xmlGenericError(xmlGenericErrorContext,
7311 "realloc of %d byte failed\n", size);
7312 return(NULL);
7313 }
7314 }
7315 buf[len++] = cur;
7316 NEXT;
7317 cur=CUR;
7318 }
7319 buf[len] = 0;
7320 return(buf);
7321}
7322
7323/**
7324 * xmlParseVersionInfo:
7325 * @ctxt: an XML parser context
7326 *
7327 * parse the XML version.
7328 *
7329 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7330 *
7331 * [25] Eq ::= S? '=' S?
7332 *
7333 * Returns the version string, e.g. "1.0"
7334 */
7335
7336xmlChar *
7337xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7338 xmlChar *version = NULL;
7339 const xmlChar *q;
7340
7341 if ((RAW == 'v') && (NXT(1) == 'e') &&
7342 (NXT(2) == 'r') && (NXT(3) == 's') &&
7343 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7344 (NXT(6) == 'n')) {
7345 SKIP(7);
7346 SKIP_BLANKS;
7347 if (RAW != '=') {
7348 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7350 ctxt->sax->error(ctxt->userData,
7351 "xmlParseVersionInfo : expected '='\n");
7352 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007353 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007354 return(NULL);
7355 }
7356 NEXT;
7357 SKIP_BLANKS;
7358 if (RAW == '"') {
7359 NEXT;
7360 q = CUR_PTR;
7361 version = xmlParseVersionNum(ctxt);
7362 if (RAW != '"') {
7363 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7365 ctxt->sax->error(ctxt->userData,
7366 "String not closed\n%.50s\n", q);
7367 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007368 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007369 } else
7370 NEXT;
7371 } else if (RAW == '\''){
7372 NEXT;
7373 q = CUR_PTR;
7374 version = xmlParseVersionNum(ctxt);
7375 if (RAW != '\'') {
7376 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7378 ctxt->sax->error(ctxt->userData,
7379 "String not closed\n%.50s\n", q);
7380 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007381 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007382 } else
7383 NEXT;
7384 } else {
7385 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7387 ctxt->sax->error(ctxt->userData,
7388 "xmlParseVersionInfo : expected ' or \"\n");
7389 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007390 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007391 }
7392 }
7393 return(version);
7394}
7395
7396/**
7397 * xmlParseEncName:
7398 * @ctxt: an XML parser context
7399 *
7400 * parse the XML encoding name
7401 *
7402 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7403 *
7404 * Returns the encoding name value or NULL
7405 */
7406xmlChar *
7407xmlParseEncName(xmlParserCtxtPtr ctxt) {
7408 xmlChar *buf = NULL;
7409 int len = 0;
7410 int size = 10;
7411 xmlChar cur;
7412
7413 cur = CUR;
7414 if (((cur >= 'a') && (cur <= 'z')) ||
7415 ((cur >= 'A') && (cur <= 'Z'))) {
7416 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7417 if (buf == NULL) {
7418 xmlGenericError(xmlGenericErrorContext,
7419 "malloc of %d byte failed\n", size);
7420 return(NULL);
7421 }
7422
7423 buf[len++] = cur;
7424 NEXT;
7425 cur = CUR;
7426 while (((cur >= 'a') && (cur <= 'z')) ||
7427 ((cur >= 'A') && (cur <= 'Z')) ||
7428 ((cur >= '0') && (cur <= '9')) ||
7429 (cur == '.') || (cur == '_') ||
7430 (cur == '-')) {
7431 if (len + 1 >= size) {
7432 size *= 2;
7433 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7434 if (buf == NULL) {
7435 xmlGenericError(xmlGenericErrorContext,
7436 "realloc of %d byte failed\n", size);
7437 return(NULL);
7438 }
7439 }
7440 buf[len++] = cur;
7441 NEXT;
7442 cur = CUR;
7443 if (cur == 0) {
7444 SHRINK;
7445 GROW;
7446 cur = CUR;
7447 }
7448 }
7449 buf[len] = 0;
7450 } else {
7451 ctxt->errNo = XML_ERR_ENCODING_NAME;
7452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7453 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7454 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007455 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007456 }
7457 return(buf);
7458}
7459
7460/**
7461 * xmlParseEncodingDecl:
7462 * @ctxt: an XML parser context
7463 *
7464 * parse the XML encoding declaration
7465 *
7466 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7467 *
7468 * this setups the conversion filters.
7469 *
7470 * Returns the encoding value or NULL
7471 */
7472
7473xmlChar *
7474xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7475 xmlChar *encoding = NULL;
7476 const xmlChar *q;
7477
7478 SKIP_BLANKS;
7479 if ((RAW == 'e') && (NXT(1) == 'n') &&
7480 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7481 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7482 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7483 SKIP(8);
7484 SKIP_BLANKS;
7485 if (RAW != '=') {
7486 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7488 ctxt->sax->error(ctxt->userData,
7489 "xmlParseEncodingDecl : expected '='\n");
7490 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007491 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007492 return(NULL);
7493 }
7494 NEXT;
7495 SKIP_BLANKS;
7496 if (RAW == '"') {
7497 NEXT;
7498 q = CUR_PTR;
7499 encoding = xmlParseEncName(ctxt);
7500 if (RAW != '"') {
7501 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7503 ctxt->sax->error(ctxt->userData,
7504 "String not closed\n%.50s\n", q);
7505 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007506 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007507 } else
7508 NEXT;
7509 } else if (RAW == '\''){
7510 NEXT;
7511 q = CUR_PTR;
7512 encoding = xmlParseEncName(ctxt);
7513 if (RAW != '\'') {
7514 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7516 ctxt->sax->error(ctxt->userData,
7517 "String not closed\n%.50s\n", q);
7518 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007519 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007520 } else
7521 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007522 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007523 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7525 ctxt->sax->error(ctxt->userData,
7526 "xmlParseEncodingDecl : expected ' or \"\n");
7527 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007528 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007529 }
7530 if (encoding != NULL) {
7531 xmlCharEncoding enc;
7532 xmlCharEncodingHandlerPtr handler;
7533
7534 if (ctxt->input->encoding != NULL)
7535 xmlFree((xmlChar *) ctxt->input->encoding);
7536 ctxt->input->encoding = encoding;
7537
7538 enc = xmlParseCharEncoding((const char *) encoding);
7539 /*
7540 * registered set of known encodings
7541 */
7542 if (enc != XML_CHAR_ENCODING_ERROR) {
7543 xmlSwitchEncoding(ctxt, enc);
7544 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007545 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007546 xmlFree(encoding);
7547 return(NULL);
7548 }
7549 } else {
7550 /*
7551 * fallback for unknown encodings
7552 */
7553 handler = xmlFindCharEncodingHandler((const char *) encoding);
7554 if (handler != NULL) {
7555 xmlSwitchToEncoding(ctxt, handler);
7556 } else {
7557 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7559 ctxt->sax->error(ctxt->userData,
7560 "Unsupported encoding %s\n", encoding);
7561 return(NULL);
7562 }
7563 }
7564 }
7565 }
7566 return(encoding);
7567}
7568
7569/**
7570 * xmlParseSDDecl:
7571 * @ctxt: an XML parser context
7572 *
7573 * parse the XML standalone declaration
7574 *
7575 * [32] SDDecl ::= S 'standalone' Eq
7576 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7577 *
7578 * [ VC: Standalone Document Declaration ]
7579 * TODO The standalone document declaration must have the value "no"
7580 * if any external markup declarations contain declarations of:
7581 * - attributes with default values, if elements to which these
7582 * attributes apply appear in the document without specifications
7583 * of values for these attributes, or
7584 * - entities (other than amp, lt, gt, apos, quot), if references
7585 * to those entities appear in the document, or
7586 * - attributes with values subject to normalization, where the
7587 * attribute appears in the document with a value which will change
7588 * as a result of normalization, or
7589 * - element types with element content, if white space occurs directly
7590 * within any instance of those types.
7591 *
7592 * Returns 1 if standalone, 0 otherwise
7593 */
7594
7595int
7596xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7597 int standalone = -1;
7598
7599 SKIP_BLANKS;
7600 if ((RAW == 's') && (NXT(1) == 't') &&
7601 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7602 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7603 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7604 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7605 SKIP(10);
7606 SKIP_BLANKS;
7607 if (RAW != '=') {
7608 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7610 ctxt->sax->error(ctxt->userData,
7611 "XML standalone declaration : expected '='\n");
7612 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007613 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007614 return(standalone);
7615 }
7616 NEXT;
7617 SKIP_BLANKS;
7618 if (RAW == '\''){
7619 NEXT;
7620 if ((RAW == 'n') && (NXT(1) == 'o')) {
7621 standalone = 0;
7622 SKIP(2);
7623 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7624 (NXT(2) == 's')) {
7625 standalone = 1;
7626 SKIP(3);
7627 } else {
7628 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7630 ctxt->sax->error(ctxt->userData,
7631 "standalone accepts only 'yes' or 'no'\n");
7632 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007633 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007634 }
7635 if (RAW != '\'') {
7636 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7638 ctxt->sax->error(ctxt->userData, "String not closed\n");
7639 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007640 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007641 } else
7642 NEXT;
7643 } else if (RAW == '"'){
7644 NEXT;
7645 if ((RAW == 'n') && (NXT(1) == 'o')) {
7646 standalone = 0;
7647 SKIP(2);
7648 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7649 (NXT(2) == 's')) {
7650 standalone = 1;
7651 SKIP(3);
7652 } else {
7653 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7655 ctxt->sax->error(ctxt->userData,
7656 "standalone accepts only 'yes' or 'no'\n");
7657 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007658 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007659 }
7660 if (RAW != '"') {
7661 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7663 ctxt->sax->error(ctxt->userData, "String not closed\n");
7664 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007665 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007666 } else
7667 NEXT;
7668 } else {
7669 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7671 ctxt->sax->error(ctxt->userData,
7672 "Standalone value not found\n");
7673 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007674 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007675 }
7676 }
7677 return(standalone);
7678}
7679
7680/**
7681 * xmlParseXMLDecl:
7682 * @ctxt: an XML parser context
7683 *
7684 * parse an XML declaration header
7685 *
7686 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7687 */
7688
7689void
7690xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7691 xmlChar *version;
7692
7693 /*
7694 * We know that '<?xml' is here.
7695 */
7696 SKIP(5);
7697
7698 if (!IS_BLANK(RAW)) {
7699 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7701 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7702 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007703 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007704 }
7705 SKIP_BLANKS;
7706
7707 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007708 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007709 */
7710 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007711 if (version == NULL) {
7712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7713 ctxt->sax->error(ctxt->userData,
7714 "Malformed declaration expecting version\n");
7715 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007716 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007717 } else {
7718 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7719 /*
7720 * TODO: Blueberry should be detected here
7721 */
7722 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7723 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7724 version);
7725 }
7726 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007727 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007728 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007729 }
Owen Taylor3473f882001-02-23 17:55:21 +00007730
7731 /*
7732 * We may have the encoding declaration
7733 */
7734 if (!IS_BLANK(RAW)) {
7735 if ((RAW == '?') && (NXT(1) == '>')) {
7736 SKIP(2);
7737 return;
7738 }
7739 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7741 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7742 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007743 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007744 }
7745 xmlParseEncodingDecl(ctxt);
7746 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7747 /*
7748 * The XML REC instructs us to stop parsing right here
7749 */
7750 return;
7751 }
7752
7753 /*
7754 * We may have the standalone status.
7755 */
7756 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7757 if ((RAW == '?') && (NXT(1) == '>')) {
7758 SKIP(2);
7759 return;
7760 }
7761 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7763 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7764 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007765 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007766 }
7767 SKIP_BLANKS;
7768 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7769
7770 SKIP_BLANKS;
7771 if ((RAW == '?') && (NXT(1) == '>')) {
7772 SKIP(2);
7773 } else if (RAW == '>') {
7774 /* Deprecated old WD ... */
7775 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7777 ctxt->sax->error(ctxt->userData,
7778 "XML declaration must end-up with '?>'\n");
7779 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007780 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007781 NEXT;
7782 } else {
7783 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7785 ctxt->sax->error(ctxt->userData,
7786 "parsing XML declaration: '?>' expected\n");
7787 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007788 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007789 MOVETO_ENDTAG(CUR_PTR);
7790 NEXT;
7791 }
7792}
7793
7794/**
7795 * xmlParseMisc:
7796 * @ctxt: an XML parser context
7797 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007798 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007799 *
7800 * [27] Misc ::= Comment | PI | S
7801 */
7802
7803void
7804xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007805 while (((RAW == '<') && (NXT(1) == '?')) ||
7806 ((RAW == '<') && (NXT(1) == '!') &&
7807 (NXT(2) == '-') && (NXT(3) == '-')) ||
7808 IS_BLANK(CUR)) {
7809 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007810 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007811 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007812 NEXT;
7813 } else
7814 xmlParseComment(ctxt);
7815 }
7816}
7817
7818/**
7819 * xmlParseDocument:
7820 * @ctxt: an XML parser context
7821 *
7822 * parse an XML document (and build a tree if using the standard SAX
7823 * interface).
7824 *
7825 * [1] document ::= prolog element Misc*
7826 *
7827 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7828 *
7829 * Returns 0, -1 in case of error. the parser context is augmented
7830 * as a result of the parsing.
7831 */
7832
7833int
7834xmlParseDocument(xmlParserCtxtPtr ctxt) {
7835 xmlChar start[4];
7836 xmlCharEncoding enc;
7837
7838 xmlInitParser();
7839
7840 GROW;
7841
7842 /*
7843 * SAX: beginning of the document processing.
7844 */
7845 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7846 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7847
Daniel Veillard50f34372001-08-03 12:06:36 +00007848 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007849 /*
7850 * Get the 4 first bytes and decode the charset
7851 * if enc != XML_CHAR_ENCODING_NONE
7852 * plug some encoding conversion routines.
7853 */
7854 start[0] = RAW;
7855 start[1] = NXT(1);
7856 start[2] = NXT(2);
7857 start[3] = NXT(3);
7858 enc = xmlDetectCharEncoding(start, 4);
7859 if (enc != XML_CHAR_ENCODING_NONE) {
7860 xmlSwitchEncoding(ctxt, enc);
7861 }
Owen Taylor3473f882001-02-23 17:55:21 +00007862 }
7863
7864
7865 if (CUR == 0) {
7866 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7868 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7869 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007870 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007871 }
7872
7873 /*
7874 * Check for the XMLDecl in the Prolog.
7875 */
7876 GROW;
7877 if ((RAW == '<') && (NXT(1) == '?') &&
7878 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7879 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7880
7881 /*
7882 * Note that we will switch encoding on the fly.
7883 */
7884 xmlParseXMLDecl(ctxt);
7885 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7886 /*
7887 * The XML REC instructs us to stop parsing right here
7888 */
7889 return(-1);
7890 }
7891 ctxt->standalone = ctxt->input->standalone;
7892 SKIP_BLANKS;
7893 } else {
7894 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7895 }
7896 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7897 ctxt->sax->startDocument(ctxt->userData);
7898
7899 /*
7900 * The Misc part of the Prolog
7901 */
7902 GROW;
7903 xmlParseMisc(ctxt);
7904
7905 /*
7906 * Then possibly doc type declaration(s) and more Misc
7907 * (doctypedecl Misc*)?
7908 */
7909 GROW;
7910 if ((RAW == '<') && (NXT(1) == '!') &&
7911 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7912 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7913 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7914 (NXT(8) == 'E')) {
7915
7916 ctxt->inSubset = 1;
7917 xmlParseDocTypeDecl(ctxt);
7918 if (RAW == '[') {
7919 ctxt->instate = XML_PARSER_DTD;
7920 xmlParseInternalSubset(ctxt);
7921 }
7922
7923 /*
7924 * Create and update the external subset.
7925 */
7926 ctxt->inSubset = 2;
7927 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7928 (!ctxt->disableSAX))
7929 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7930 ctxt->extSubSystem, ctxt->extSubURI);
7931 ctxt->inSubset = 0;
7932
7933
7934 ctxt->instate = XML_PARSER_PROLOG;
7935 xmlParseMisc(ctxt);
7936 }
7937
7938 /*
7939 * Time to start parsing the tree itself
7940 */
7941 GROW;
7942 if (RAW != '<') {
7943 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7945 ctxt->sax->error(ctxt->userData,
7946 "Start tag expected, '<' not found\n");
7947 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007948 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007949 ctxt->instate = XML_PARSER_EOF;
7950 } else {
7951 ctxt->instate = XML_PARSER_CONTENT;
7952 xmlParseElement(ctxt);
7953 ctxt->instate = XML_PARSER_EPILOG;
7954
7955
7956 /*
7957 * The Misc part at the end
7958 */
7959 xmlParseMisc(ctxt);
7960
Daniel Veillard561b7f82002-03-20 21:55:57 +00007961 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007962 ctxt->errNo = XML_ERR_DOCUMENT_END;
7963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7964 ctxt->sax->error(ctxt->userData,
7965 "Extra content at the end of the document\n");
7966 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007967 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007968 }
7969 ctxt->instate = XML_PARSER_EOF;
7970 }
7971
7972 /*
7973 * SAX: end of the document processing.
7974 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007975 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007976 ctxt->sax->endDocument(ctxt->userData);
7977
Daniel Veillard5997aca2002-03-18 18:36:20 +00007978 /*
7979 * Remove locally kept entity definitions if the tree was not built
7980 */
7981 if ((ctxt->myDoc != NULL) &&
7982 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7983 xmlFreeDoc(ctxt->myDoc);
7984 ctxt->myDoc = NULL;
7985 }
7986
Daniel Veillardc7612992002-02-17 22:47:37 +00007987 if (! ctxt->wellFormed) {
7988 ctxt->valid = 0;
7989 return(-1);
7990 }
Owen Taylor3473f882001-02-23 17:55:21 +00007991 return(0);
7992}
7993
7994/**
7995 * xmlParseExtParsedEnt:
7996 * @ctxt: an XML parser context
7997 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007998 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007999 * An external general parsed entity is well-formed if it matches the
8000 * production labeled extParsedEnt.
8001 *
8002 * [78] extParsedEnt ::= TextDecl? content
8003 *
8004 * Returns 0, -1 in case of error. the parser context is augmented
8005 * as a result of the parsing.
8006 */
8007
8008int
8009xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8010 xmlChar start[4];
8011 xmlCharEncoding enc;
8012
8013 xmlDefaultSAXHandlerInit();
8014
8015 GROW;
8016
8017 /*
8018 * SAX: beginning of the document processing.
8019 */
8020 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8021 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8022
8023 /*
8024 * Get the 4 first bytes and decode the charset
8025 * if enc != XML_CHAR_ENCODING_NONE
8026 * plug some encoding conversion routines.
8027 */
8028 start[0] = RAW;
8029 start[1] = NXT(1);
8030 start[2] = NXT(2);
8031 start[3] = NXT(3);
8032 enc = xmlDetectCharEncoding(start, 4);
8033 if (enc != XML_CHAR_ENCODING_NONE) {
8034 xmlSwitchEncoding(ctxt, enc);
8035 }
8036
8037
8038 if (CUR == 0) {
8039 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8041 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8042 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008044 }
8045
8046 /*
8047 * Check for the XMLDecl in the Prolog.
8048 */
8049 GROW;
8050 if ((RAW == '<') && (NXT(1) == '?') &&
8051 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8052 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8053
8054 /*
8055 * Note that we will switch encoding on the fly.
8056 */
8057 xmlParseXMLDecl(ctxt);
8058 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8059 /*
8060 * The XML REC instructs us to stop parsing right here
8061 */
8062 return(-1);
8063 }
8064 SKIP_BLANKS;
8065 } else {
8066 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8067 }
8068 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8069 ctxt->sax->startDocument(ctxt->userData);
8070
8071 /*
8072 * Doing validity checking on chunk doesn't make sense
8073 */
8074 ctxt->instate = XML_PARSER_CONTENT;
8075 ctxt->validate = 0;
8076 ctxt->loadsubset = 0;
8077 ctxt->depth = 0;
8078
8079 xmlParseContent(ctxt);
8080
8081 if ((RAW == '<') && (NXT(1) == '/')) {
8082 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8084 ctxt->sax->error(ctxt->userData,
8085 "chunk is not well balanced\n");
8086 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008087 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008088 } else if (RAW != 0) {
8089 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8091 ctxt->sax->error(ctxt->userData,
8092 "extra content at the end of well balanced chunk\n");
8093 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008094 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008095 }
8096
8097 /*
8098 * SAX: end of the document processing.
8099 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008100 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008101 ctxt->sax->endDocument(ctxt->userData);
8102
8103 if (! ctxt->wellFormed) return(-1);
8104 return(0);
8105}
8106
8107/************************************************************************
8108 * *
8109 * Progressive parsing interfaces *
8110 * *
8111 ************************************************************************/
8112
8113/**
8114 * xmlParseLookupSequence:
8115 * @ctxt: an XML parser context
8116 * @first: the first char to lookup
8117 * @next: the next char to lookup or zero
8118 * @third: the next char to lookup or zero
8119 *
8120 * Try to find if a sequence (first, next, third) or just (first next) or
8121 * (first) is available in the input stream.
8122 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8123 * to avoid rescanning sequences of bytes, it DOES change the state of the
8124 * parser, do not use liberally.
8125 *
8126 * Returns the index to the current parsing point if the full sequence
8127 * is available, -1 otherwise.
8128 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008129static int
Owen Taylor3473f882001-02-23 17:55:21 +00008130xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8131 xmlChar next, xmlChar third) {
8132 int base, len;
8133 xmlParserInputPtr in;
8134 const xmlChar *buf;
8135
8136 in = ctxt->input;
8137 if (in == NULL) return(-1);
8138 base = in->cur - in->base;
8139 if (base < 0) return(-1);
8140 if (ctxt->checkIndex > base)
8141 base = ctxt->checkIndex;
8142 if (in->buf == NULL) {
8143 buf = in->base;
8144 len = in->length;
8145 } else {
8146 buf = in->buf->buffer->content;
8147 len = in->buf->buffer->use;
8148 }
8149 /* take into account the sequence length */
8150 if (third) len -= 2;
8151 else if (next) len --;
8152 for (;base < len;base++) {
8153 if (buf[base] == first) {
8154 if (third != 0) {
8155 if ((buf[base + 1] != next) ||
8156 (buf[base + 2] != third)) continue;
8157 } else if (next != 0) {
8158 if (buf[base + 1] != next) continue;
8159 }
8160 ctxt->checkIndex = 0;
8161#ifdef DEBUG_PUSH
8162 if (next == 0)
8163 xmlGenericError(xmlGenericErrorContext,
8164 "PP: lookup '%c' found at %d\n",
8165 first, base);
8166 else if (third == 0)
8167 xmlGenericError(xmlGenericErrorContext,
8168 "PP: lookup '%c%c' found at %d\n",
8169 first, next, base);
8170 else
8171 xmlGenericError(xmlGenericErrorContext,
8172 "PP: lookup '%c%c%c' found at %d\n",
8173 first, next, third, base);
8174#endif
8175 return(base - (in->cur - in->base));
8176 }
8177 }
8178 ctxt->checkIndex = base;
8179#ifdef DEBUG_PUSH
8180 if (next == 0)
8181 xmlGenericError(xmlGenericErrorContext,
8182 "PP: lookup '%c' failed\n", first);
8183 else if (third == 0)
8184 xmlGenericError(xmlGenericErrorContext,
8185 "PP: lookup '%c%c' failed\n", first, next);
8186 else
8187 xmlGenericError(xmlGenericErrorContext,
8188 "PP: lookup '%c%c%c' failed\n", first, next, third);
8189#endif
8190 return(-1);
8191}
8192
8193/**
8194 * xmlParseTryOrFinish:
8195 * @ctxt: an XML parser context
8196 * @terminate: last chunk indicator
8197 *
8198 * Try to progress on parsing
8199 *
8200 * Returns zero if no parsing was possible
8201 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008202static int
Owen Taylor3473f882001-02-23 17:55:21 +00008203xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8204 int ret = 0;
8205 int avail;
8206 xmlChar cur, next;
8207
8208#ifdef DEBUG_PUSH
8209 switch (ctxt->instate) {
8210 case XML_PARSER_EOF:
8211 xmlGenericError(xmlGenericErrorContext,
8212 "PP: try EOF\n"); break;
8213 case XML_PARSER_START:
8214 xmlGenericError(xmlGenericErrorContext,
8215 "PP: try START\n"); break;
8216 case XML_PARSER_MISC:
8217 xmlGenericError(xmlGenericErrorContext,
8218 "PP: try MISC\n");break;
8219 case XML_PARSER_COMMENT:
8220 xmlGenericError(xmlGenericErrorContext,
8221 "PP: try COMMENT\n");break;
8222 case XML_PARSER_PROLOG:
8223 xmlGenericError(xmlGenericErrorContext,
8224 "PP: try PROLOG\n");break;
8225 case XML_PARSER_START_TAG:
8226 xmlGenericError(xmlGenericErrorContext,
8227 "PP: try START_TAG\n");break;
8228 case XML_PARSER_CONTENT:
8229 xmlGenericError(xmlGenericErrorContext,
8230 "PP: try CONTENT\n");break;
8231 case XML_PARSER_CDATA_SECTION:
8232 xmlGenericError(xmlGenericErrorContext,
8233 "PP: try CDATA_SECTION\n");break;
8234 case XML_PARSER_END_TAG:
8235 xmlGenericError(xmlGenericErrorContext,
8236 "PP: try END_TAG\n");break;
8237 case XML_PARSER_ENTITY_DECL:
8238 xmlGenericError(xmlGenericErrorContext,
8239 "PP: try ENTITY_DECL\n");break;
8240 case XML_PARSER_ENTITY_VALUE:
8241 xmlGenericError(xmlGenericErrorContext,
8242 "PP: try ENTITY_VALUE\n");break;
8243 case XML_PARSER_ATTRIBUTE_VALUE:
8244 xmlGenericError(xmlGenericErrorContext,
8245 "PP: try ATTRIBUTE_VALUE\n");break;
8246 case XML_PARSER_DTD:
8247 xmlGenericError(xmlGenericErrorContext,
8248 "PP: try DTD\n");break;
8249 case XML_PARSER_EPILOG:
8250 xmlGenericError(xmlGenericErrorContext,
8251 "PP: try EPILOG\n");break;
8252 case XML_PARSER_PI:
8253 xmlGenericError(xmlGenericErrorContext,
8254 "PP: try PI\n");break;
8255 case XML_PARSER_IGNORE:
8256 xmlGenericError(xmlGenericErrorContext,
8257 "PP: try IGNORE\n");break;
8258 }
8259#endif
8260
8261 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008262 SHRINK;
8263
Owen Taylor3473f882001-02-23 17:55:21 +00008264 /*
8265 * Pop-up of finished entities.
8266 */
8267 while ((RAW == 0) && (ctxt->inputNr > 1))
8268 xmlPopInput(ctxt);
8269
8270 if (ctxt->input ==NULL) break;
8271 if (ctxt->input->buf == NULL)
8272 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008273 else {
8274 /*
8275 * If we are operating on converted input, try to flush
8276 * remainng chars to avoid them stalling in the non-converted
8277 * buffer.
8278 */
8279 if ((ctxt->input->buf->raw != NULL) &&
8280 (ctxt->input->buf->raw->use > 0)) {
8281 int base = ctxt->input->base -
8282 ctxt->input->buf->buffer->content;
8283 int current = ctxt->input->cur - ctxt->input->base;
8284
8285 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8286 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8287 ctxt->input->cur = ctxt->input->base + current;
8288 ctxt->input->end =
8289 &ctxt->input->buf->buffer->content[
8290 ctxt->input->buf->buffer->use];
8291 }
8292 avail = ctxt->input->buf->buffer->use -
8293 (ctxt->input->cur - ctxt->input->base);
8294 }
Owen Taylor3473f882001-02-23 17:55:21 +00008295 if (avail < 1)
8296 goto done;
8297 switch (ctxt->instate) {
8298 case XML_PARSER_EOF:
8299 /*
8300 * Document parsing is done !
8301 */
8302 goto done;
8303 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008304 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8305 xmlChar start[4];
8306 xmlCharEncoding enc;
8307
8308 /*
8309 * Very first chars read from the document flow.
8310 */
8311 if (avail < 4)
8312 goto done;
8313
8314 /*
8315 * Get the 4 first bytes and decode the charset
8316 * if enc != XML_CHAR_ENCODING_NONE
8317 * plug some encoding conversion routines.
8318 */
8319 start[0] = RAW;
8320 start[1] = NXT(1);
8321 start[2] = NXT(2);
8322 start[3] = NXT(3);
8323 enc = xmlDetectCharEncoding(start, 4);
8324 if (enc != XML_CHAR_ENCODING_NONE) {
8325 xmlSwitchEncoding(ctxt, enc);
8326 }
8327 break;
8328 }
Owen Taylor3473f882001-02-23 17:55:21 +00008329
8330 cur = ctxt->input->cur[0];
8331 next = ctxt->input->cur[1];
8332 if (cur == 0) {
8333 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8334 ctxt->sax->setDocumentLocator(ctxt->userData,
8335 &xmlDefaultSAXLocator);
8336 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8338 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8339 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008340 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008341 ctxt->instate = XML_PARSER_EOF;
8342#ifdef DEBUG_PUSH
8343 xmlGenericError(xmlGenericErrorContext,
8344 "PP: entering EOF\n");
8345#endif
8346 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8347 ctxt->sax->endDocument(ctxt->userData);
8348 goto done;
8349 }
8350 if ((cur == '<') && (next == '?')) {
8351 /* PI or XML decl */
8352 if (avail < 5) return(ret);
8353 if ((!terminate) &&
8354 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8355 return(ret);
8356 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8357 ctxt->sax->setDocumentLocator(ctxt->userData,
8358 &xmlDefaultSAXLocator);
8359 if ((ctxt->input->cur[2] == 'x') &&
8360 (ctxt->input->cur[3] == 'm') &&
8361 (ctxt->input->cur[4] == 'l') &&
8362 (IS_BLANK(ctxt->input->cur[5]))) {
8363 ret += 5;
8364#ifdef DEBUG_PUSH
8365 xmlGenericError(xmlGenericErrorContext,
8366 "PP: Parsing XML Decl\n");
8367#endif
8368 xmlParseXMLDecl(ctxt);
8369 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8370 /*
8371 * The XML REC instructs us to stop parsing right
8372 * here
8373 */
8374 ctxt->instate = XML_PARSER_EOF;
8375 return(0);
8376 }
8377 ctxt->standalone = ctxt->input->standalone;
8378 if ((ctxt->encoding == NULL) &&
8379 (ctxt->input->encoding != NULL))
8380 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8381 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8382 (!ctxt->disableSAX))
8383 ctxt->sax->startDocument(ctxt->userData);
8384 ctxt->instate = XML_PARSER_MISC;
8385#ifdef DEBUG_PUSH
8386 xmlGenericError(xmlGenericErrorContext,
8387 "PP: entering MISC\n");
8388#endif
8389 } else {
8390 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8391 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8392 (!ctxt->disableSAX))
8393 ctxt->sax->startDocument(ctxt->userData);
8394 ctxt->instate = XML_PARSER_MISC;
8395#ifdef DEBUG_PUSH
8396 xmlGenericError(xmlGenericErrorContext,
8397 "PP: entering MISC\n");
8398#endif
8399 }
8400 } else {
8401 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8402 ctxt->sax->setDocumentLocator(ctxt->userData,
8403 &xmlDefaultSAXLocator);
8404 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8405 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8406 (!ctxt->disableSAX))
8407 ctxt->sax->startDocument(ctxt->userData);
8408 ctxt->instate = XML_PARSER_MISC;
8409#ifdef DEBUG_PUSH
8410 xmlGenericError(xmlGenericErrorContext,
8411 "PP: entering MISC\n");
8412#endif
8413 }
8414 break;
8415 case XML_PARSER_MISC:
8416 SKIP_BLANKS;
8417 if (ctxt->input->buf == NULL)
8418 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8419 else
8420 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8421 if (avail < 2)
8422 goto done;
8423 cur = ctxt->input->cur[0];
8424 next = ctxt->input->cur[1];
8425 if ((cur == '<') && (next == '?')) {
8426 if ((!terminate) &&
8427 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8428 goto done;
8429#ifdef DEBUG_PUSH
8430 xmlGenericError(xmlGenericErrorContext,
8431 "PP: Parsing PI\n");
8432#endif
8433 xmlParsePI(ctxt);
8434 } else if ((cur == '<') && (next == '!') &&
8435 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8436 if ((!terminate) &&
8437 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8438 goto done;
8439#ifdef DEBUG_PUSH
8440 xmlGenericError(xmlGenericErrorContext,
8441 "PP: Parsing Comment\n");
8442#endif
8443 xmlParseComment(ctxt);
8444 ctxt->instate = XML_PARSER_MISC;
8445 } else if ((cur == '<') && (next == '!') &&
8446 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8447 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8448 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8449 (ctxt->input->cur[8] == 'E')) {
8450 if ((!terminate) &&
8451 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8452 goto done;
8453#ifdef DEBUG_PUSH
8454 xmlGenericError(xmlGenericErrorContext,
8455 "PP: Parsing internal subset\n");
8456#endif
8457 ctxt->inSubset = 1;
8458 xmlParseDocTypeDecl(ctxt);
8459 if (RAW == '[') {
8460 ctxt->instate = XML_PARSER_DTD;
8461#ifdef DEBUG_PUSH
8462 xmlGenericError(xmlGenericErrorContext,
8463 "PP: entering DTD\n");
8464#endif
8465 } else {
8466 /*
8467 * Create and update the external subset.
8468 */
8469 ctxt->inSubset = 2;
8470 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8471 (ctxt->sax->externalSubset != NULL))
8472 ctxt->sax->externalSubset(ctxt->userData,
8473 ctxt->intSubName, ctxt->extSubSystem,
8474 ctxt->extSubURI);
8475 ctxt->inSubset = 0;
8476 ctxt->instate = XML_PARSER_PROLOG;
8477#ifdef DEBUG_PUSH
8478 xmlGenericError(xmlGenericErrorContext,
8479 "PP: entering PROLOG\n");
8480#endif
8481 }
8482 } else if ((cur == '<') && (next == '!') &&
8483 (avail < 9)) {
8484 goto done;
8485 } else {
8486 ctxt->instate = XML_PARSER_START_TAG;
8487#ifdef DEBUG_PUSH
8488 xmlGenericError(xmlGenericErrorContext,
8489 "PP: entering START_TAG\n");
8490#endif
8491 }
8492 break;
8493 case XML_PARSER_IGNORE:
8494 xmlGenericError(xmlGenericErrorContext,
8495 "PP: internal error, state == IGNORE");
8496 ctxt->instate = XML_PARSER_DTD;
8497#ifdef DEBUG_PUSH
8498 xmlGenericError(xmlGenericErrorContext,
8499 "PP: entering DTD\n");
8500#endif
8501 break;
8502 case XML_PARSER_PROLOG:
8503 SKIP_BLANKS;
8504 if (ctxt->input->buf == NULL)
8505 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8506 else
8507 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8508 if (avail < 2)
8509 goto done;
8510 cur = ctxt->input->cur[0];
8511 next = ctxt->input->cur[1];
8512 if ((cur == '<') && (next == '?')) {
8513 if ((!terminate) &&
8514 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8515 goto done;
8516#ifdef DEBUG_PUSH
8517 xmlGenericError(xmlGenericErrorContext,
8518 "PP: Parsing PI\n");
8519#endif
8520 xmlParsePI(ctxt);
8521 } else if ((cur == '<') && (next == '!') &&
8522 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8523 if ((!terminate) &&
8524 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8525 goto done;
8526#ifdef DEBUG_PUSH
8527 xmlGenericError(xmlGenericErrorContext,
8528 "PP: Parsing Comment\n");
8529#endif
8530 xmlParseComment(ctxt);
8531 ctxt->instate = XML_PARSER_PROLOG;
8532 } else if ((cur == '<') && (next == '!') &&
8533 (avail < 4)) {
8534 goto done;
8535 } else {
8536 ctxt->instate = XML_PARSER_START_TAG;
8537#ifdef DEBUG_PUSH
8538 xmlGenericError(xmlGenericErrorContext,
8539 "PP: entering START_TAG\n");
8540#endif
8541 }
8542 break;
8543 case XML_PARSER_EPILOG:
8544 SKIP_BLANKS;
8545 if (ctxt->input->buf == NULL)
8546 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8547 else
8548 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8549 if (avail < 2)
8550 goto done;
8551 cur = ctxt->input->cur[0];
8552 next = ctxt->input->cur[1];
8553 if ((cur == '<') && (next == '?')) {
8554 if ((!terminate) &&
8555 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8556 goto done;
8557#ifdef DEBUG_PUSH
8558 xmlGenericError(xmlGenericErrorContext,
8559 "PP: Parsing PI\n");
8560#endif
8561 xmlParsePI(ctxt);
8562 ctxt->instate = XML_PARSER_EPILOG;
8563 } else if ((cur == '<') && (next == '!') &&
8564 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8565 if ((!terminate) &&
8566 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8567 goto done;
8568#ifdef DEBUG_PUSH
8569 xmlGenericError(xmlGenericErrorContext,
8570 "PP: Parsing Comment\n");
8571#endif
8572 xmlParseComment(ctxt);
8573 ctxt->instate = XML_PARSER_EPILOG;
8574 } else if ((cur == '<') && (next == '!') &&
8575 (avail < 4)) {
8576 goto done;
8577 } else {
8578 ctxt->errNo = XML_ERR_DOCUMENT_END;
8579 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8580 ctxt->sax->error(ctxt->userData,
8581 "Extra content at the end of the document\n");
8582 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008583 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008584 ctxt->instate = XML_PARSER_EOF;
8585#ifdef DEBUG_PUSH
8586 xmlGenericError(xmlGenericErrorContext,
8587 "PP: entering EOF\n");
8588#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008589 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008590 ctxt->sax->endDocument(ctxt->userData);
8591 goto done;
8592 }
8593 break;
8594 case XML_PARSER_START_TAG: {
8595 xmlChar *name, *oldname;
8596
8597 if ((avail < 2) && (ctxt->inputNr == 1))
8598 goto done;
8599 cur = ctxt->input->cur[0];
8600 if (cur != '<') {
8601 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8603 ctxt->sax->error(ctxt->userData,
8604 "Start tag expect, '<' not found\n");
8605 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008606 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008607 ctxt->instate = XML_PARSER_EOF;
8608#ifdef DEBUG_PUSH
8609 xmlGenericError(xmlGenericErrorContext,
8610 "PP: entering EOF\n");
8611#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008612 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008613 ctxt->sax->endDocument(ctxt->userData);
8614 goto done;
8615 }
8616 if ((!terminate) &&
8617 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8618 goto done;
8619 if (ctxt->spaceNr == 0)
8620 spacePush(ctxt, -1);
8621 else
8622 spacePush(ctxt, *ctxt->space);
8623 name = xmlParseStartTag(ctxt);
8624 if (name == NULL) {
8625 spacePop(ctxt);
8626 ctxt->instate = XML_PARSER_EOF;
8627#ifdef DEBUG_PUSH
8628 xmlGenericError(xmlGenericErrorContext,
8629 "PP: entering EOF\n");
8630#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008631 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008632 ctxt->sax->endDocument(ctxt->userData);
8633 goto done;
8634 }
8635 namePush(ctxt, xmlStrdup(name));
8636
8637 /*
8638 * [ VC: Root Element Type ]
8639 * The Name in the document type declaration must match
8640 * the element type of the root element.
8641 */
8642 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8643 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8644 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8645
8646 /*
8647 * Check for an Empty Element.
8648 */
8649 if ((RAW == '/') && (NXT(1) == '>')) {
8650 SKIP(2);
8651 if ((ctxt->sax != NULL) &&
8652 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8653 ctxt->sax->endElement(ctxt->userData, name);
8654 xmlFree(name);
8655 oldname = namePop(ctxt);
8656 spacePop(ctxt);
8657 if (oldname != NULL) {
8658#ifdef DEBUG_STACK
8659 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8660#endif
8661 xmlFree(oldname);
8662 }
8663 if (ctxt->name == NULL) {
8664 ctxt->instate = XML_PARSER_EPILOG;
8665#ifdef DEBUG_PUSH
8666 xmlGenericError(xmlGenericErrorContext,
8667 "PP: entering EPILOG\n");
8668#endif
8669 } else {
8670 ctxt->instate = XML_PARSER_CONTENT;
8671#ifdef DEBUG_PUSH
8672 xmlGenericError(xmlGenericErrorContext,
8673 "PP: entering CONTENT\n");
8674#endif
8675 }
8676 break;
8677 }
8678 if (RAW == '>') {
8679 NEXT;
8680 } else {
8681 ctxt->errNo = XML_ERR_GT_REQUIRED;
8682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8683 ctxt->sax->error(ctxt->userData,
8684 "Couldn't find end of Start Tag %s\n",
8685 name);
8686 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008687 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008688
8689 /*
8690 * end of parsing of this node.
8691 */
8692 nodePop(ctxt);
8693 oldname = namePop(ctxt);
8694 spacePop(ctxt);
8695 if (oldname != NULL) {
8696#ifdef DEBUG_STACK
8697 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8698#endif
8699 xmlFree(oldname);
8700 }
8701 }
8702 xmlFree(name);
8703 ctxt->instate = XML_PARSER_CONTENT;
8704#ifdef DEBUG_PUSH
8705 xmlGenericError(xmlGenericErrorContext,
8706 "PP: entering CONTENT\n");
8707#endif
8708 break;
8709 }
8710 case XML_PARSER_CONTENT: {
8711 const xmlChar *test;
8712 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008713 if ((avail < 2) && (ctxt->inputNr == 1))
8714 goto done;
8715 cur = ctxt->input->cur[0];
8716 next = ctxt->input->cur[1];
8717
8718 test = CUR_PTR;
8719 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008720 if ((cur == '<') && (next == '?')) {
8721 if ((!terminate) &&
8722 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8723 goto done;
8724#ifdef DEBUG_PUSH
8725 xmlGenericError(xmlGenericErrorContext,
8726 "PP: Parsing PI\n");
8727#endif
8728 xmlParsePI(ctxt);
8729 } else if ((cur == '<') && (next == '!') &&
8730 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8731 if ((!terminate) &&
8732 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8733 goto done;
8734#ifdef DEBUG_PUSH
8735 xmlGenericError(xmlGenericErrorContext,
8736 "PP: Parsing Comment\n");
8737#endif
8738 xmlParseComment(ctxt);
8739 ctxt->instate = XML_PARSER_CONTENT;
8740 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8741 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8742 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8743 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8744 (ctxt->input->cur[8] == '[')) {
8745 SKIP(9);
8746 ctxt->instate = XML_PARSER_CDATA_SECTION;
8747#ifdef DEBUG_PUSH
8748 xmlGenericError(xmlGenericErrorContext,
8749 "PP: entering CDATA_SECTION\n");
8750#endif
8751 break;
8752 } else if ((cur == '<') && (next == '!') &&
8753 (avail < 9)) {
8754 goto done;
8755 } else if ((cur == '<') && (next == '/')) {
8756 ctxt->instate = XML_PARSER_END_TAG;
8757#ifdef DEBUG_PUSH
8758 xmlGenericError(xmlGenericErrorContext,
8759 "PP: entering END_TAG\n");
8760#endif
8761 break;
8762 } else if (cur == '<') {
8763 ctxt->instate = XML_PARSER_START_TAG;
8764#ifdef DEBUG_PUSH
8765 xmlGenericError(xmlGenericErrorContext,
8766 "PP: entering START_TAG\n");
8767#endif
8768 break;
8769 } else if (cur == '&') {
8770 if ((!terminate) &&
8771 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8772 goto done;
8773#ifdef DEBUG_PUSH
8774 xmlGenericError(xmlGenericErrorContext,
8775 "PP: Parsing Reference\n");
8776#endif
8777 xmlParseReference(ctxt);
8778 } else {
8779 /* TODO Avoid the extra copy, handle directly !!! */
8780 /*
8781 * Goal of the following test is:
8782 * - minimize calls to the SAX 'character' callback
8783 * when they are mergeable
8784 * - handle an problem for isBlank when we only parse
8785 * a sequence of blank chars and the next one is
8786 * not available to check against '<' presence.
8787 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008788 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008789 * of the parser.
8790 */
8791 if ((ctxt->inputNr == 1) &&
8792 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8793 if ((!terminate) &&
8794 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8795 goto done;
8796 }
8797 ctxt->checkIndex = 0;
8798#ifdef DEBUG_PUSH
8799 xmlGenericError(xmlGenericErrorContext,
8800 "PP: Parsing char data\n");
8801#endif
8802 xmlParseCharData(ctxt, 0);
8803 }
8804 /*
8805 * Pop-up of finished entities.
8806 */
8807 while ((RAW == 0) && (ctxt->inputNr > 1))
8808 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008809 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008810 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8812 ctxt->sax->error(ctxt->userData,
8813 "detected an error in element content\n");
8814 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008815 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008816 ctxt->instate = XML_PARSER_EOF;
8817 break;
8818 }
8819 break;
8820 }
8821 case XML_PARSER_CDATA_SECTION: {
8822 /*
8823 * The Push mode need to have the SAX callback for
8824 * cdataBlock merge back contiguous callbacks.
8825 */
8826 int base;
8827
8828 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8829 if (base < 0) {
8830 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8831 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8832 if (ctxt->sax->cdataBlock != NULL)
8833 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8834 XML_PARSER_BIG_BUFFER_SIZE);
8835 }
8836 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8837 ctxt->checkIndex = 0;
8838 }
8839 goto done;
8840 } else {
8841 if ((ctxt->sax != NULL) && (base > 0) &&
8842 (!ctxt->disableSAX)) {
8843 if (ctxt->sax->cdataBlock != NULL)
8844 ctxt->sax->cdataBlock(ctxt->userData,
8845 ctxt->input->cur, base);
8846 }
8847 SKIP(base + 3);
8848 ctxt->checkIndex = 0;
8849 ctxt->instate = XML_PARSER_CONTENT;
8850#ifdef DEBUG_PUSH
8851 xmlGenericError(xmlGenericErrorContext,
8852 "PP: entering CONTENT\n");
8853#endif
8854 }
8855 break;
8856 }
8857 case XML_PARSER_END_TAG:
8858 if (avail < 2)
8859 goto done;
8860 if ((!terminate) &&
8861 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8862 goto done;
8863 xmlParseEndTag(ctxt);
8864 if (ctxt->name == NULL) {
8865 ctxt->instate = XML_PARSER_EPILOG;
8866#ifdef DEBUG_PUSH
8867 xmlGenericError(xmlGenericErrorContext,
8868 "PP: entering EPILOG\n");
8869#endif
8870 } else {
8871 ctxt->instate = XML_PARSER_CONTENT;
8872#ifdef DEBUG_PUSH
8873 xmlGenericError(xmlGenericErrorContext,
8874 "PP: entering CONTENT\n");
8875#endif
8876 }
8877 break;
8878 case XML_PARSER_DTD: {
8879 /*
8880 * Sorry but progressive parsing of the internal subset
8881 * is not expected to be supported. We first check that
8882 * the full content of the internal subset is available and
8883 * the parsing is launched only at that point.
8884 * Internal subset ends up with "']' S? '>'" in an unescaped
8885 * section and not in a ']]>' sequence which are conditional
8886 * sections (whoever argued to keep that crap in XML deserve
8887 * a place in hell !).
8888 */
8889 int base, i;
8890 xmlChar *buf;
8891 xmlChar quote = 0;
8892
8893 base = ctxt->input->cur - ctxt->input->base;
8894 if (base < 0) return(0);
8895 if (ctxt->checkIndex > base)
8896 base = ctxt->checkIndex;
8897 buf = ctxt->input->buf->buffer->content;
8898 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8899 base++) {
8900 if (quote != 0) {
8901 if (buf[base] == quote)
8902 quote = 0;
8903 continue;
8904 }
8905 if (buf[base] == '"') {
8906 quote = '"';
8907 continue;
8908 }
8909 if (buf[base] == '\'') {
8910 quote = '\'';
8911 continue;
8912 }
8913 if (buf[base] == ']') {
8914 if ((unsigned int) base +1 >=
8915 ctxt->input->buf->buffer->use)
8916 break;
8917 if (buf[base + 1] == ']') {
8918 /* conditional crap, skip both ']' ! */
8919 base++;
8920 continue;
8921 }
8922 for (i = 0;
8923 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8924 i++) {
8925 if (buf[base + i] == '>')
8926 goto found_end_int_subset;
8927 }
8928 break;
8929 }
8930 }
8931 /*
8932 * We didn't found the end of the Internal subset
8933 */
8934 if (quote == 0)
8935 ctxt->checkIndex = base;
8936#ifdef DEBUG_PUSH
8937 if (next == 0)
8938 xmlGenericError(xmlGenericErrorContext,
8939 "PP: lookup of int subset end filed\n");
8940#endif
8941 goto done;
8942
8943found_end_int_subset:
8944 xmlParseInternalSubset(ctxt);
8945 ctxt->inSubset = 2;
8946 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8947 (ctxt->sax->externalSubset != NULL))
8948 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8949 ctxt->extSubSystem, ctxt->extSubURI);
8950 ctxt->inSubset = 0;
8951 ctxt->instate = XML_PARSER_PROLOG;
8952 ctxt->checkIndex = 0;
8953#ifdef DEBUG_PUSH
8954 xmlGenericError(xmlGenericErrorContext,
8955 "PP: entering PROLOG\n");
8956#endif
8957 break;
8958 }
8959 case XML_PARSER_COMMENT:
8960 xmlGenericError(xmlGenericErrorContext,
8961 "PP: internal error, state == COMMENT\n");
8962 ctxt->instate = XML_PARSER_CONTENT;
8963#ifdef DEBUG_PUSH
8964 xmlGenericError(xmlGenericErrorContext,
8965 "PP: entering CONTENT\n");
8966#endif
8967 break;
8968 case XML_PARSER_PI:
8969 xmlGenericError(xmlGenericErrorContext,
8970 "PP: internal error, state == PI\n");
8971 ctxt->instate = XML_PARSER_CONTENT;
8972#ifdef DEBUG_PUSH
8973 xmlGenericError(xmlGenericErrorContext,
8974 "PP: entering CONTENT\n");
8975#endif
8976 break;
8977 case XML_PARSER_ENTITY_DECL:
8978 xmlGenericError(xmlGenericErrorContext,
8979 "PP: internal error, state == ENTITY_DECL\n");
8980 ctxt->instate = XML_PARSER_DTD;
8981#ifdef DEBUG_PUSH
8982 xmlGenericError(xmlGenericErrorContext,
8983 "PP: entering DTD\n");
8984#endif
8985 break;
8986 case XML_PARSER_ENTITY_VALUE:
8987 xmlGenericError(xmlGenericErrorContext,
8988 "PP: internal error, state == ENTITY_VALUE\n");
8989 ctxt->instate = XML_PARSER_CONTENT;
8990#ifdef DEBUG_PUSH
8991 xmlGenericError(xmlGenericErrorContext,
8992 "PP: entering DTD\n");
8993#endif
8994 break;
8995 case XML_PARSER_ATTRIBUTE_VALUE:
8996 xmlGenericError(xmlGenericErrorContext,
8997 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8998 ctxt->instate = XML_PARSER_START_TAG;
8999#ifdef DEBUG_PUSH
9000 xmlGenericError(xmlGenericErrorContext,
9001 "PP: entering START_TAG\n");
9002#endif
9003 break;
9004 case XML_PARSER_SYSTEM_LITERAL:
9005 xmlGenericError(xmlGenericErrorContext,
9006 "PP: internal error, state == SYSTEM_LITERAL\n");
9007 ctxt->instate = XML_PARSER_START_TAG;
9008#ifdef DEBUG_PUSH
9009 xmlGenericError(xmlGenericErrorContext,
9010 "PP: entering START_TAG\n");
9011#endif
9012 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009013 case XML_PARSER_PUBLIC_LITERAL:
9014 xmlGenericError(xmlGenericErrorContext,
9015 "PP: internal error, state == PUBLIC_LITERAL\n");
9016 ctxt->instate = XML_PARSER_START_TAG;
9017#ifdef DEBUG_PUSH
9018 xmlGenericError(xmlGenericErrorContext,
9019 "PP: entering START_TAG\n");
9020#endif
9021 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009022 }
9023 }
9024done:
9025#ifdef DEBUG_PUSH
9026 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9027#endif
9028 return(ret);
9029}
9030
9031/**
Owen Taylor3473f882001-02-23 17:55:21 +00009032 * xmlParseChunk:
9033 * @ctxt: an XML parser context
9034 * @chunk: an char array
9035 * @size: the size in byte of the chunk
9036 * @terminate: last chunk indicator
9037 *
9038 * Parse a Chunk of memory
9039 *
9040 * Returns zero if no error, the xmlParserErrors otherwise.
9041 */
9042int
9043xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9044 int terminate) {
9045 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9046 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9047 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9048 int cur = ctxt->input->cur - ctxt->input->base;
9049
9050 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9051 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9052 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009053 ctxt->input->end =
9054 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009055#ifdef DEBUG_PUSH
9056 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9057#endif
9058
9059 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9060 xmlParseTryOrFinish(ctxt, terminate);
9061 } else if (ctxt->instate != XML_PARSER_EOF) {
9062 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9063 xmlParserInputBufferPtr in = ctxt->input->buf;
9064 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9065 (in->raw != NULL)) {
9066 int nbchars;
9067
9068 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9069 if (nbchars < 0) {
9070 xmlGenericError(xmlGenericErrorContext,
9071 "xmlParseChunk: encoder error\n");
9072 return(XML_ERR_INVALID_ENCODING);
9073 }
9074 }
9075 }
9076 }
9077 xmlParseTryOrFinish(ctxt, terminate);
9078 if (terminate) {
9079 /*
9080 * Check for termination
9081 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009082 int avail = 0;
9083 if (ctxt->input->buf == NULL)
9084 avail = ctxt->input->length -
9085 (ctxt->input->cur - ctxt->input->base);
9086 else
9087 avail = ctxt->input->buf->buffer->use -
9088 (ctxt->input->cur - ctxt->input->base);
9089
Owen Taylor3473f882001-02-23 17:55:21 +00009090 if ((ctxt->instate != XML_PARSER_EOF) &&
9091 (ctxt->instate != XML_PARSER_EPILOG)) {
9092 ctxt->errNo = XML_ERR_DOCUMENT_END;
9093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9094 ctxt->sax->error(ctxt->userData,
9095 "Extra content at the end of the document\n");
9096 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009097 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009098 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009099 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9100 ctxt->errNo = XML_ERR_DOCUMENT_END;
9101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9102 ctxt->sax->error(ctxt->userData,
9103 "Extra content at the end of the document\n");
9104 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009105 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009106
9107 }
Owen Taylor3473f882001-02-23 17:55:21 +00009108 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009109 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009110 ctxt->sax->endDocument(ctxt->userData);
9111 }
9112 ctxt->instate = XML_PARSER_EOF;
9113 }
9114 return((xmlParserErrors) ctxt->errNo);
9115}
9116
9117/************************************************************************
9118 * *
9119 * I/O front end functions to the parser *
9120 * *
9121 ************************************************************************/
9122
9123/**
9124 * xmlStopParser:
9125 * @ctxt: an XML parser context
9126 *
9127 * Blocks further parser processing
9128 */
9129void
9130xmlStopParser(xmlParserCtxtPtr ctxt) {
9131 ctxt->instate = XML_PARSER_EOF;
9132 if (ctxt->input != NULL)
9133 ctxt->input->cur = BAD_CAST"";
9134}
9135
9136/**
9137 * xmlCreatePushParserCtxt:
9138 * @sax: a SAX handler
9139 * @user_data: The user data returned on SAX callbacks
9140 * @chunk: a pointer to an array of chars
9141 * @size: number of chars in the array
9142 * @filename: an optional file name or URI
9143 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009144 * Create a parser context for using the XML parser in push mode.
9145 * If @buffer and @size are non-NULL, the data is used to detect
9146 * the encoding. The remaining characters will be parsed so they
9147 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009148 * To allow content encoding detection, @size should be >= 4
9149 * The value of @filename is used for fetching external entities
9150 * and error/warning reports.
9151 *
9152 * Returns the new parser context or NULL
9153 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009154
Owen Taylor3473f882001-02-23 17:55:21 +00009155xmlParserCtxtPtr
9156xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9157 const char *chunk, int size, const char *filename) {
9158 xmlParserCtxtPtr ctxt;
9159 xmlParserInputPtr inputStream;
9160 xmlParserInputBufferPtr buf;
9161 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9162
9163 /*
9164 * plug some encoding conversion routines
9165 */
9166 if ((chunk != NULL) && (size >= 4))
9167 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9168
9169 buf = xmlAllocParserInputBuffer(enc);
9170 if (buf == NULL) return(NULL);
9171
9172 ctxt = xmlNewParserCtxt();
9173 if (ctxt == NULL) {
9174 xmlFree(buf);
9175 return(NULL);
9176 }
9177 if (sax != NULL) {
9178 if (ctxt->sax != &xmlDefaultSAXHandler)
9179 xmlFree(ctxt->sax);
9180 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9181 if (ctxt->sax == NULL) {
9182 xmlFree(buf);
9183 xmlFree(ctxt);
9184 return(NULL);
9185 }
9186 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9187 if (user_data != NULL)
9188 ctxt->userData = user_data;
9189 }
9190 if (filename == NULL) {
9191 ctxt->directory = NULL;
9192 } else {
9193 ctxt->directory = xmlParserGetDirectory(filename);
9194 }
9195
9196 inputStream = xmlNewInputStream(ctxt);
9197 if (inputStream == NULL) {
9198 xmlFreeParserCtxt(ctxt);
Daniel Veillard77a90a72003-03-22 00:04:05 +00009199 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009200 return(NULL);
9201 }
9202
9203 if (filename == NULL)
9204 inputStream->filename = NULL;
9205 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009206 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009207 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009208 inputStream->buf = buf;
9209 inputStream->base = inputStream->buf->buffer->content;
9210 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009211 inputStream->end =
9212 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009213
9214 inputPush(ctxt, inputStream);
9215
9216 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9217 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009218 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9219 int cur = ctxt->input->cur - ctxt->input->base;
9220
Owen Taylor3473f882001-02-23 17:55:21 +00009221 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009222
9223 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9224 ctxt->input->cur = ctxt->input->base + cur;
9225 ctxt->input->end =
9226 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009227#ifdef DEBUG_PUSH
9228 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9229#endif
9230 }
9231
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009232 if (enc != XML_CHAR_ENCODING_NONE) {
9233 xmlSwitchEncoding(ctxt, enc);
9234 }
9235
Owen Taylor3473f882001-02-23 17:55:21 +00009236 return(ctxt);
9237}
9238
9239/**
9240 * xmlCreateIOParserCtxt:
9241 * @sax: a SAX handler
9242 * @user_data: The user data returned on SAX callbacks
9243 * @ioread: an I/O read function
9244 * @ioclose: an I/O close function
9245 * @ioctx: an I/O handler
9246 * @enc: the charset encoding if known
9247 *
9248 * Create a parser context for using the XML parser with an existing
9249 * I/O stream
9250 *
9251 * Returns the new parser context or NULL
9252 */
9253xmlParserCtxtPtr
9254xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9255 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9256 void *ioctx, xmlCharEncoding enc) {
9257 xmlParserCtxtPtr ctxt;
9258 xmlParserInputPtr inputStream;
9259 xmlParserInputBufferPtr buf;
9260
9261 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9262 if (buf == NULL) return(NULL);
9263
9264 ctxt = xmlNewParserCtxt();
9265 if (ctxt == NULL) {
9266 xmlFree(buf);
9267 return(NULL);
9268 }
9269 if (sax != NULL) {
9270 if (ctxt->sax != &xmlDefaultSAXHandler)
9271 xmlFree(ctxt->sax);
9272 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9273 if (ctxt->sax == NULL) {
9274 xmlFree(buf);
9275 xmlFree(ctxt);
9276 return(NULL);
9277 }
9278 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9279 if (user_data != NULL)
9280 ctxt->userData = user_data;
9281 }
9282
9283 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9284 if (inputStream == NULL) {
9285 xmlFreeParserCtxt(ctxt);
9286 return(NULL);
9287 }
9288 inputPush(ctxt, inputStream);
9289
9290 return(ctxt);
9291}
9292
9293/************************************************************************
9294 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009295 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009296 * *
9297 ************************************************************************/
9298
9299/**
9300 * xmlIOParseDTD:
9301 * @sax: the SAX handler block or NULL
9302 * @input: an Input Buffer
9303 * @enc: the charset encoding if known
9304 *
9305 * Load and parse a DTD
9306 *
9307 * Returns the resulting xmlDtdPtr or NULL in case of error.
9308 * @input will be freed at parsing end.
9309 */
9310
9311xmlDtdPtr
9312xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9313 xmlCharEncoding enc) {
9314 xmlDtdPtr ret = NULL;
9315 xmlParserCtxtPtr ctxt;
9316 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009317 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009318
9319 if (input == NULL)
9320 return(NULL);
9321
9322 ctxt = xmlNewParserCtxt();
9323 if (ctxt == NULL) {
9324 return(NULL);
9325 }
9326
9327 /*
9328 * Set-up the SAX context
9329 */
9330 if (sax != NULL) {
9331 if (ctxt->sax != NULL)
9332 xmlFree(ctxt->sax);
9333 ctxt->sax = sax;
9334 ctxt->userData = NULL;
9335 }
9336
9337 /*
9338 * generate a parser input from the I/O handler
9339 */
9340
9341 pinput = xmlNewIOInputStream(ctxt, input, enc);
9342 if (pinput == NULL) {
9343 if (sax != NULL) ctxt->sax = NULL;
9344 xmlFreeParserCtxt(ctxt);
9345 return(NULL);
9346 }
9347
9348 /*
9349 * plug some encoding conversion routines here.
9350 */
9351 xmlPushInput(ctxt, pinput);
9352
9353 pinput->filename = NULL;
9354 pinput->line = 1;
9355 pinput->col = 1;
9356 pinput->base = ctxt->input->cur;
9357 pinput->cur = ctxt->input->cur;
9358 pinput->free = NULL;
9359
9360 /*
9361 * let's parse that entity knowing it's an external subset.
9362 */
9363 ctxt->inSubset = 2;
9364 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9365 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9366 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009367
9368 if (enc == XML_CHAR_ENCODING_NONE) {
9369 /*
9370 * Get the 4 first bytes and decode the charset
9371 * if enc != XML_CHAR_ENCODING_NONE
9372 * plug some encoding conversion routines.
9373 */
9374 start[0] = RAW;
9375 start[1] = NXT(1);
9376 start[2] = NXT(2);
9377 start[3] = NXT(3);
9378 enc = xmlDetectCharEncoding(start, 4);
9379 if (enc != XML_CHAR_ENCODING_NONE) {
9380 xmlSwitchEncoding(ctxt, enc);
9381 }
9382 }
9383
Owen Taylor3473f882001-02-23 17:55:21 +00009384 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9385
9386 if (ctxt->myDoc != NULL) {
9387 if (ctxt->wellFormed) {
9388 ret = ctxt->myDoc->extSubset;
9389 ctxt->myDoc->extSubset = NULL;
9390 } else {
9391 ret = NULL;
9392 }
9393 xmlFreeDoc(ctxt->myDoc);
9394 ctxt->myDoc = NULL;
9395 }
9396 if (sax != NULL) ctxt->sax = NULL;
9397 xmlFreeParserCtxt(ctxt);
9398
9399 return(ret);
9400}
9401
9402/**
9403 * xmlSAXParseDTD:
9404 * @sax: the SAX handler block
9405 * @ExternalID: a NAME* containing the External ID of the DTD
9406 * @SystemID: a NAME* containing the URL to the DTD
9407 *
9408 * Load and parse an external subset.
9409 *
9410 * Returns the resulting xmlDtdPtr or NULL in case of error.
9411 */
9412
9413xmlDtdPtr
9414xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9415 const xmlChar *SystemID) {
9416 xmlDtdPtr ret = NULL;
9417 xmlParserCtxtPtr ctxt;
9418 xmlParserInputPtr input = NULL;
9419 xmlCharEncoding enc;
9420
9421 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9422
9423 ctxt = xmlNewParserCtxt();
9424 if (ctxt == NULL) {
9425 return(NULL);
9426 }
9427
9428 /*
9429 * Set-up the SAX context
9430 */
9431 if (sax != NULL) {
9432 if (ctxt->sax != NULL)
9433 xmlFree(ctxt->sax);
9434 ctxt->sax = sax;
9435 ctxt->userData = NULL;
9436 }
9437
9438 /*
9439 * Ask the Entity resolver to load the damn thing
9440 */
9441
9442 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9443 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9444 if (input == NULL) {
9445 if (sax != NULL) ctxt->sax = NULL;
9446 xmlFreeParserCtxt(ctxt);
9447 return(NULL);
9448 }
9449
9450 /*
9451 * plug some encoding conversion routines here.
9452 */
9453 xmlPushInput(ctxt, input);
9454 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9455 xmlSwitchEncoding(ctxt, enc);
9456
9457 if (input->filename == NULL)
9458 input->filename = (char *) xmlStrdup(SystemID);
9459 input->line = 1;
9460 input->col = 1;
9461 input->base = ctxt->input->cur;
9462 input->cur = ctxt->input->cur;
9463 input->free = NULL;
9464
9465 /*
9466 * let's parse that entity knowing it's an external subset.
9467 */
9468 ctxt->inSubset = 2;
9469 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9470 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9471 ExternalID, SystemID);
9472 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9473
9474 if (ctxt->myDoc != NULL) {
9475 if (ctxt->wellFormed) {
9476 ret = ctxt->myDoc->extSubset;
9477 ctxt->myDoc->extSubset = NULL;
9478 } else {
9479 ret = NULL;
9480 }
9481 xmlFreeDoc(ctxt->myDoc);
9482 ctxt->myDoc = NULL;
9483 }
9484 if (sax != NULL) ctxt->sax = NULL;
9485 xmlFreeParserCtxt(ctxt);
9486
9487 return(ret);
9488}
9489
9490/**
9491 * xmlParseDTD:
9492 * @ExternalID: a NAME* containing the External ID of the DTD
9493 * @SystemID: a NAME* containing the URL to the DTD
9494 *
9495 * Load and parse an external subset.
9496 *
9497 * Returns the resulting xmlDtdPtr or NULL in case of error.
9498 */
9499
9500xmlDtdPtr
9501xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9502 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9503}
9504
9505/************************************************************************
9506 * *
9507 * Front ends when parsing an Entity *
9508 * *
9509 ************************************************************************/
9510
9511/**
Owen Taylor3473f882001-02-23 17:55:21 +00009512 * xmlParseCtxtExternalEntity:
9513 * @ctx: the existing parsing context
9514 * @URL: the URL for the entity to load
9515 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009516 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009517 *
9518 * Parse an external general entity within an existing parsing context
9519 * An external general parsed entity is well-formed if it matches the
9520 * production labeled extParsedEnt.
9521 *
9522 * [78] extParsedEnt ::= TextDecl? content
9523 *
9524 * Returns 0 if the entity is well formed, -1 in case of args problem and
9525 * the parser error code otherwise
9526 */
9527
9528int
9529xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009530 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009531 xmlParserCtxtPtr ctxt;
9532 xmlDocPtr newDoc;
9533 xmlSAXHandlerPtr oldsax = NULL;
9534 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009535 xmlChar start[4];
9536 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009537
9538 if (ctx->depth > 40) {
9539 return(XML_ERR_ENTITY_LOOP);
9540 }
9541
Daniel Veillardcda96922001-08-21 10:56:31 +00009542 if (lst != NULL)
9543 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009544 if ((URL == NULL) && (ID == NULL))
9545 return(-1);
9546 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9547 return(-1);
9548
9549
9550 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9551 if (ctxt == NULL) return(-1);
9552 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +00009553 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +00009554 oldsax = ctxt->sax;
9555 ctxt->sax = ctx->sax;
9556 newDoc = xmlNewDoc(BAD_CAST "1.0");
9557 if (newDoc == NULL) {
9558 xmlFreeParserCtxt(ctxt);
9559 return(-1);
9560 }
9561 if (ctx->myDoc != NULL) {
9562 newDoc->intSubset = ctx->myDoc->intSubset;
9563 newDoc->extSubset = ctx->myDoc->extSubset;
9564 }
9565 if (ctx->myDoc->URL != NULL) {
9566 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9567 }
9568 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9569 if (newDoc->children == NULL) {
9570 ctxt->sax = oldsax;
9571 xmlFreeParserCtxt(ctxt);
9572 newDoc->intSubset = NULL;
9573 newDoc->extSubset = NULL;
9574 xmlFreeDoc(newDoc);
9575 return(-1);
9576 }
9577 nodePush(ctxt, newDoc->children);
9578 if (ctx->myDoc == NULL) {
9579 ctxt->myDoc = newDoc;
9580 } else {
9581 ctxt->myDoc = ctx->myDoc;
9582 newDoc->children->doc = ctx->myDoc;
9583 }
9584
Daniel Veillard87a764e2001-06-20 17:41:10 +00009585 /*
9586 * Get the 4 first bytes and decode the charset
9587 * if enc != XML_CHAR_ENCODING_NONE
9588 * plug some encoding conversion routines.
9589 */
9590 GROW
9591 start[0] = RAW;
9592 start[1] = NXT(1);
9593 start[2] = NXT(2);
9594 start[3] = NXT(3);
9595 enc = xmlDetectCharEncoding(start, 4);
9596 if (enc != XML_CHAR_ENCODING_NONE) {
9597 xmlSwitchEncoding(ctxt, enc);
9598 }
9599
Owen Taylor3473f882001-02-23 17:55:21 +00009600 /*
9601 * Parse a possible text declaration first
9602 */
Owen Taylor3473f882001-02-23 17:55:21 +00009603 if ((RAW == '<') && (NXT(1) == '?') &&
9604 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9605 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9606 xmlParseTextDecl(ctxt);
9607 }
9608
9609 /*
9610 * Doing validity checking on chunk doesn't make sense
9611 */
9612 ctxt->instate = XML_PARSER_CONTENT;
9613 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009614 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009615 ctxt->loadsubset = ctx->loadsubset;
9616 ctxt->depth = ctx->depth + 1;
9617 ctxt->replaceEntities = ctx->replaceEntities;
9618 if (ctxt->validate) {
9619 ctxt->vctxt.error = ctx->vctxt.error;
9620 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009621 } else {
9622 ctxt->vctxt.error = NULL;
9623 ctxt->vctxt.warning = NULL;
9624 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009625 ctxt->vctxt.nodeTab = NULL;
9626 ctxt->vctxt.nodeNr = 0;
9627 ctxt->vctxt.nodeMax = 0;
9628 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009629
9630 xmlParseContent(ctxt);
9631
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009632 ctx->validate = ctxt->validate;
9633 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009634 if ((RAW == '<') && (NXT(1) == '/')) {
9635 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9637 ctxt->sax->error(ctxt->userData,
9638 "chunk is not well balanced\n");
9639 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009640 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009641 } else if (RAW != 0) {
9642 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9644 ctxt->sax->error(ctxt->userData,
9645 "extra content at the end of well balanced chunk\n");
9646 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009647 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009648 }
9649 if (ctxt->node != newDoc->children) {
9650 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9652 ctxt->sax->error(ctxt->userData,
9653 "chunk is not well balanced\n");
9654 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009656 }
9657
9658 if (!ctxt->wellFormed) {
9659 if (ctxt->errNo == 0)
9660 ret = 1;
9661 else
9662 ret = ctxt->errNo;
9663 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009664 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009665 xmlNodePtr cur;
9666
9667 /*
9668 * Return the newly created nodeset after unlinking it from
9669 * they pseudo parent.
9670 */
9671 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009672 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009673 while (cur != NULL) {
9674 cur->parent = NULL;
9675 cur = cur->next;
9676 }
9677 newDoc->children->children = NULL;
9678 }
9679 ret = 0;
9680 }
9681 ctxt->sax = oldsax;
9682 xmlFreeParserCtxt(ctxt);
9683 newDoc->intSubset = NULL;
9684 newDoc->extSubset = NULL;
9685 xmlFreeDoc(newDoc);
9686
9687 return(ret);
9688}
9689
9690/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009691 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009692 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009693 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009694 * @sax: the SAX handler bloc (possibly NULL)
9695 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9696 * @depth: Used for loop detection, use 0
9697 * @URL: the URL for the entity to load
9698 * @ID: the System ID for the entity to load
9699 * @list: the return value for the set of parsed nodes
9700 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009701 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009702 *
9703 * Returns 0 if the entity is well formed, -1 in case of args problem and
9704 * the parser error code otherwise
9705 */
9706
Daniel Veillard257d9102001-05-08 10:41:44 +00009707static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009708xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9709 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009710 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009711 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009712 xmlParserCtxtPtr ctxt;
9713 xmlDocPtr newDoc;
9714 xmlSAXHandlerPtr oldsax = NULL;
9715 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009716 xmlChar start[4];
9717 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009718
9719 if (depth > 40) {
9720 return(XML_ERR_ENTITY_LOOP);
9721 }
9722
9723
9724
9725 if (list != NULL)
9726 *list = NULL;
9727 if ((URL == NULL) && (ID == NULL))
9728 return(-1);
9729 if (doc == NULL) /* @@ relax but check for dereferences */
9730 return(-1);
9731
9732
9733 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9734 if (ctxt == NULL) return(-1);
9735 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009736 if (oldctxt != NULL) {
9737 ctxt->_private = oldctxt->_private;
9738 ctxt->loadsubset = oldctxt->loadsubset;
9739 ctxt->validate = oldctxt->validate;
9740 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009741 ctxt->record_info = oldctxt->record_info;
9742 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
9743 ctxt->node_seq.length = oldctxt->node_seq.length;
9744 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009745 } else {
9746 /*
9747 * Doing validity checking on chunk without context
9748 * doesn't make sense
9749 */
9750 ctxt->_private = NULL;
9751 ctxt->validate = 0;
9752 ctxt->external = 2;
9753 ctxt->loadsubset = 0;
9754 }
Owen Taylor3473f882001-02-23 17:55:21 +00009755 if (sax != NULL) {
9756 oldsax = ctxt->sax;
9757 ctxt->sax = sax;
9758 if (user_data != NULL)
9759 ctxt->userData = user_data;
9760 }
9761 newDoc = xmlNewDoc(BAD_CAST "1.0");
9762 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009763 ctxt->node_seq.maximum = 0;
9764 ctxt->node_seq.length = 0;
9765 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009766 xmlFreeParserCtxt(ctxt);
9767 return(-1);
9768 }
9769 if (doc != NULL) {
9770 newDoc->intSubset = doc->intSubset;
9771 newDoc->extSubset = doc->extSubset;
9772 }
9773 if (doc->URL != NULL) {
9774 newDoc->URL = xmlStrdup(doc->URL);
9775 }
9776 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9777 if (newDoc->children == NULL) {
9778 if (sax != NULL)
9779 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009780 ctxt->node_seq.maximum = 0;
9781 ctxt->node_seq.length = 0;
9782 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009783 xmlFreeParserCtxt(ctxt);
9784 newDoc->intSubset = NULL;
9785 newDoc->extSubset = NULL;
9786 xmlFreeDoc(newDoc);
9787 return(-1);
9788 }
9789 nodePush(ctxt, newDoc->children);
9790 if (doc == NULL) {
9791 ctxt->myDoc = newDoc;
9792 } else {
9793 ctxt->myDoc = doc;
9794 newDoc->children->doc = doc;
9795 }
9796
Daniel Veillard87a764e2001-06-20 17:41:10 +00009797 /*
9798 * Get the 4 first bytes and decode the charset
9799 * if enc != XML_CHAR_ENCODING_NONE
9800 * plug some encoding conversion routines.
9801 */
9802 GROW;
9803 start[0] = RAW;
9804 start[1] = NXT(1);
9805 start[2] = NXT(2);
9806 start[3] = NXT(3);
9807 enc = xmlDetectCharEncoding(start, 4);
9808 if (enc != XML_CHAR_ENCODING_NONE) {
9809 xmlSwitchEncoding(ctxt, enc);
9810 }
9811
Owen Taylor3473f882001-02-23 17:55:21 +00009812 /*
9813 * Parse a possible text declaration first
9814 */
Owen Taylor3473f882001-02-23 17:55:21 +00009815 if ((RAW == '<') && (NXT(1) == '?') &&
9816 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9817 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9818 xmlParseTextDecl(ctxt);
9819 }
9820
Owen Taylor3473f882001-02-23 17:55:21 +00009821 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009822 ctxt->depth = depth;
9823
9824 xmlParseContent(ctxt);
9825
Daniel Veillard561b7f82002-03-20 21:55:57 +00009826 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009827 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9829 ctxt->sax->error(ctxt->userData,
9830 "chunk is not well balanced\n");
9831 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009832 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009833 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009834 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9836 ctxt->sax->error(ctxt->userData,
9837 "extra content at the end of well balanced chunk\n");
9838 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009839 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009840 }
9841 if (ctxt->node != newDoc->children) {
9842 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9844 ctxt->sax->error(ctxt->userData,
9845 "chunk is not well balanced\n");
9846 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009847 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009848 }
9849
9850 if (!ctxt->wellFormed) {
9851 if (ctxt->errNo == 0)
9852 ret = 1;
9853 else
9854 ret = ctxt->errNo;
9855 } else {
9856 if (list != NULL) {
9857 xmlNodePtr cur;
9858
9859 /*
9860 * Return the newly created nodeset after unlinking it from
9861 * they pseudo parent.
9862 */
9863 cur = newDoc->children->children;
9864 *list = cur;
9865 while (cur != NULL) {
9866 cur->parent = NULL;
9867 cur = cur->next;
9868 }
9869 newDoc->children->children = NULL;
9870 }
9871 ret = 0;
9872 }
9873 if (sax != NULL)
9874 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +00009875 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
9876 oldctxt->node_seq.length = ctxt->node_seq.length;
9877 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009878 ctxt->node_seq.maximum = 0;
9879 ctxt->node_seq.length = 0;
9880 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009881 xmlFreeParserCtxt(ctxt);
9882 newDoc->intSubset = NULL;
9883 newDoc->extSubset = NULL;
9884 xmlFreeDoc(newDoc);
9885
9886 return(ret);
9887}
9888
9889/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009890 * xmlParseExternalEntity:
9891 * @doc: the document the chunk pertains to
9892 * @sax: the SAX handler bloc (possibly NULL)
9893 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9894 * @depth: Used for loop detection, use 0
9895 * @URL: the URL for the entity to load
9896 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009897 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009898 *
9899 * Parse an external general entity
9900 * An external general parsed entity is well-formed if it matches the
9901 * production labeled extParsedEnt.
9902 *
9903 * [78] extParsedEnt ::= TextDecl? content
9904 *
9905 * Returns 0 if the entity is well formed, -1 in case of args problem and
9906 * the parser error code otherwise
9907 */
9908
9909int
9910xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009911 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009912 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009913 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009914}
9915
9916/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009917 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009918 * @doc: the document the chunk pertains to
9919 * @sax: the SAX handler bloc (possibly NULL)
9920 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9921 * @depth: Used for loop detection, use 0
9922 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009923 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009924 *
9925 * Parse a well-balanced chunk of an XML document
9926 * called by the parser
9927 * The allowed sequence for the Well Balanced Chunk is the one defined by
9928 * the content production in the XML grammar:
9929 *
9930 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9931 *
9932 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9933 * the parser error code otherwise
9934 */
9935
9936int
9937xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009938 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009939 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9940 depth, string, lst, 0 );
9941}
9942
9943/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009944 * xmlParseBalancedChunkMemoryInternal:
9945 * @oldctxt: the existing parsing context
9946 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9947 * @user_data: the user data field for the parser context
9948 * @lst: the return value for the set of parsed nodes
9949 *
9950 *
9951 * Parse a well-balanced chunk of an XML document
9952 * called by the parser
9953 * The allowed sequence for the Well Balanced Chunk is the one defined by
9954 * the content production in the XML grammar:
9955 *
9956 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9957 *
9958 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9959 * the parser error code otherwise
9960 *
9961 * In case recover is set to 1, the nodelist will not be empty even if
9962 * the parsed chunk is not well balanced.
9963 */
9964static int
9965xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9966 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9967 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009968 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009969 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009970 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009971 int size;
9972 int ret = 0;
9973
9974 if (oldctxt->depth > 40) {
9975 return(XML_ERR_ENTITY_LOOP);
9976 }
9977
9978
9979 if (lst != NULL)
9980 *lst = NULL;
9981 if (string == NULL)
9982 return(-1);
9983
9984 size = xmlStrlen(string);
9985
9986 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9987 if (ctxt == NULL) return(-1);
9988 if (user_data != NULL)
9989 ctxt->userData = user_data;
9990 else
9991 ctxt->userData = ctxt;
9992
9993 oldsax = ctxt->sax;
9994 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +00009995 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009996 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009997 newDoc = xmlNewDoc(BAD_CAST "1.0");
9998 if (newDoc == NULL) {
9999 ctxt->sax = oldsax;
10000 xmlFreeParserCtxt(ctxt);
10001 return(-1);
10002 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010003 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010004 } else {
10005 ctxt->myDoc = oldctxt->myDoc;
10006 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010007 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010008 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010009 BAD_CAST "pseudoroot", NULL);
10010 if (ctxt->myDoc->children == NULL) {
10011 ctxt->sax = oldsax;
10012 xmlFreeParserCtxt(ctxt);
10013 if (newDoc != NULL)
10014 xmlFreeDoc(newDoc);
10015 return(-1);
10016 }
10017 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010018 ctxt->instate = XML_PARSER_CONTENT;
10019 ctxt->depth = oldctxt->depth + 1;
10020
Daniel Veillard328f48c2002-11-15 15:24:34 +000010021 ctxt->validate = 0;
10022 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010023 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10024 /*
10025 * ID/IDREF registration will be done in xmlValidateElement below
10026 */
10027 ctxt->loadsubset |= XML_SKIP_IDS;
10028 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010029
Daniel Veillard68e9e742002-11-16 15:35:11 +000010030 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010031 if ((RAW == '<') && (NXT(1) == '/')) {
10032 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10034 ctxt->sax->error(ctxt->userData,
10035 "chunk is not well balanced\n");
10036 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010037 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010038 } else if (RAW != 0) {
10039 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10041 ctxt->sax->error(ctxt->userData,
10042 "extra content at the end of well balanced chunk\n");
10043 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010044 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010045 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010046 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010047 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10049 ctxt->sax->error(ctxt->userData,
10050 "chunk is not well balanced\n");
10051 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010052 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010053 }
10054
10055 if (!ctxt->wellFormed) {
10056 if (ctxt->errNo == 0)
10057 ret = 1;
10058 else
10059 ret = ctxt->errNo;
10060 } else {
10061 ret = 0;
10062 }
10063
10064 if ((lst != NULL) && (ret == 0)) {
10065 xmlNodePtr cur;
10066
10067 /*
10068 * Return the newly created nodeset after unlinking it from
10069 * they pseudo parent.
10070 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010071 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010072 *lst = cur;
10073 while (cur != NULL) {
Daniel Veillard8d589042003-02-04 15:07:21 +000010074 if (oldctxt->validate && oldctxt->wellFormed &&
10075 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10076 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10077 oldctxt->myDoc, cur);
10078 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010079 cur->parent = NULL;
10080 cur = cur->next;
10081 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010082 ctxt->myDoc->children->children = NULL;
10083 }
10084 if (ctxt->myDoc != NULL) {
10085 xmlFreeNode(ctxt->myDoc->children);
10086 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010087 }
10088
10089 ctxt->sax = oldsax;
10090 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010091 if (newDoc != NULL)
10092 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010093
10094 return(ret);
10095}
10096
10097/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010098 * xmlParseBalancedChunkMemoryRecover:
10099 * @doc: the document the chunk pertains to
10100 * @sax: the SAX handler bloc (possibly NULL)
10101 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10102 * @depth: Used for loop detection, use 0
10103 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10104 * @lst: the return value for the set of parsed nodes
10105 * @recover: return nodes even if the data is broken (use 0)
10106 *
10107 *
10108 * Parse a well-balanced chunk of an XML document
10109 * called by the parser
10110 * The allowed sequence for the Well Balanced Chunk is the one defined by
10111 * the content production in the XML grammar:
10112 *
10113 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10114 *
10115 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10116 * the parser error code otherwise
10117 *
10118 * In case recover is set to 1, the nodelist will not be empty even if
10119 * the parsed chunk is not well balanced.
10120 */
10121int
10122xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10123 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10124 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010125 xmlParserCtxtPtr ctxt;
10126 xmlDocPtr newDoc;
10127 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010128 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010129 int size;
10130 int ret = 0;
10131
10132 if (depth > 40) {
10133 return(XML_ERR_ENTITY_LOOP);
10134 }
10135
10136
Daniel Veillardcda96922001-08-21 10:56:31 +000010137 if (lst != NULL)
10138 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010139 if (string == NULL)
10140 return(-1);
10141
10142 size = xmlStrlen(string);
10143
10144 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10145 if (ctxt == NULL) return(-1);
10146 ctxt->userData = ctxt;
10147 if (sax != NULL) {
10148 oldsax = ctxt->sax;
10149 ctxt->sax = sax;
10150 if (user_data != NULL)
10151 ctxt->userData = user_data;
10152 }
10153 newDoc = xmlNewDoc(BAD_CAST "1.0");
10154 if (newDoc == NULL) {
10155 xmlFreeParserCtxt(ctxt);
10156 return(-1);
10157 }
10158 if (doc != NULL) {
10159 newDoc->intSubset = doc->intSubset;
10160 newDoc->extSubset = doc->extSubset;
10161 }
10162 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10163 if (newDoc->children == NULL) {
10164 if (sax != NULL)
10165 ctxt->sax = oldsax;
10166 xmlFreeParserCtxt(ctxt);
10167 newDoc->intSubset = NULL;
10168 newDoc->extSubset = NULL;
10169 xmlFreeDoc(newDoc);
10170 return(-1);
10171 }
10172 nodePush(ctxt, newDoc->children);
10173 if (doc == NULL) {
10174 ctxt->myDoc = newDoc;
10175 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010176 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010177 newDoc->children->doc = doc;
10178 }
10179 ctxt->instate = XML_PARSER_CONTENT;
10180 ctxt->depth = depth;
10181
10182 /*
10183 * Doing validity checking on chunk doesn't make sense
10184 */
10185 ctxt->validate = 0;
10186 ctxt->loadsubset = 0;
10187
Daniel Veillardb39bc392002-10-26 19:29:51 +000010188 if ( doc != NULL ){
10189 content = doc->children;
10190 doc->children = NULL;
10191 xmlParseContent(ctxt);
10192 doc->children = content;
10193 }
10194 else {
10195 xmlParseContent(ctxt);
10196 }
Owen Taylor3473f882001-02-23 17:55:21 +000010197 if ((RAW == '<') && (NXT(1) == '/')) {
10198 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10200 ctxt->sax->error(ctxt->userData,
10201 "chunk is not well balanced\n");
10202 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010203 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010204 } else if (RAW != 0) {
10205 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10207 ctxt->sax->error(ctxt->userData,
10208 "extra content at the end of well balanced chunk\n");
10209 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010211 }
10212 if (ctxt->node != newDoc->children) {
10213 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10215 ctxt->sax->error(ctxt->userData,
10216 "chunk is not well balanced\n");
10217 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010218 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010219 }
10220
10221 if (!ctxt->wellFormed) {
10222 if (ctxt->errNo == 0)
10223 ret = 1;
10224 else
10225 ret = ctxt->errNo;
10226 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010227 ret = 0;
10228 }
10229
10230 if (lst != NULL && (ret == 0 || recover == 1)) {
10231 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010232
10233 /*
10234 * Return the newly created nodeset after unlinking it from
10235 * they pseudo parent.
10236 */
10237 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010238 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010239 while (cur != NULL) {
10240 cur->parent = NULL;
10241 cur = cur->next;
10242 }
10243 newDoc->children->children = NULL;
10244 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010245
Owen Taylor3473f882001-02-23 17:55:21 +000010246 if (sax != NULL)
10247 ctxt->sax = oldsax;
10248 xmlFreeParserCtxt(ctxt);
10249 newDoc->intSubset = NULL;
10250 newDoc->extSubset = NULL;
10251 xmlFreeDoc(newDoc);
10252
10253 return(ret);
10254}
10255
10256/**
10257 * xmlSAXParseEntity:
10258 * @sax: the SAX handler block
10259 * @filename: the filename
10260 *
10261 * parse an XML external entity out of context and build a tree.
10262 * It use the given SAX function block to handle the parsing callback.
10263 * If sax is NULL, fallback to the default DOM tree building routines.
10264 *
10265 * [78] extParsedEnt ::= TextDecl? content
10266 *
10267 * This correspond to a "Well Balanced" chunk
10268 *
10269 * Returns the resulting document tree
10270 */
10271
10272xmlDocPtr
10273xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10274 xmlDocPtr ret;
10275 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010276
10277 ctxt = xmlCreateFileParserCtxt(filename);
10278 if (ctxt == NULL) {
10279 return(NULL);
10280 }
10281 if (sax != NULL) {
10282 if (ctxt->sax != NULL)
10283 xmlFree(ctxt->sax);
10284 ctxt->sax = sax;
10285 ctxt->userData = NULL;
10286 }
10287
Owen Taylor3473f882001-02-23 17:55:21 +000010288 xmlParseExtParsedEnt(ctxt);
10289
10290 if (ctxt->wellFormed)
10291 ret = ctxt->myDoc;
10292 else {
10293 ret = NULL;
10294 xmlFreeDoc(ctxt->myDoc);
10295 ctxt->myDoc = NULL;
10296 }
10297 if (sax != NULL)
10298 ctxt->sax = NULL;
10299 xmlFreeParserCtxt(ctxt);
10300
10301 return(ret);
10302}
10303
10304/**
10305 * xmlParseEntity:
10306 * @filename: the filename
10307 *
10308 * parse an XML external entity out of context and build a tree.
10309 *
10310 * [78] extParsedEnt ::= TextDecl? content
10311 *
10312 * This correspond to a "Well Balanced" chunk
10313 *
10314 * Returns the resulting document tree
10315 */
10316
10317xmlDocPtr
10318xmlParseEntity(const char *filename) {
10319 return(xmlSAXParseEntity(NULL, filename));
10320}
10321
10322/**
10323 * xmlCreateEntityParserCtxt:
10324 * @URL: the entity URL
10325 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010326 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010327 *
10328 * Create a parser context for an external entity
10329 * Automatic support for ZLIB/Compress compressed document is provided
10330 * by default if found at compile-time.
10331 *
10332 * Returns the new parser context or NULL
10333 */
10334xmlParserCtxtPtr
10335xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10336 const xmlChar *base) {
10337 xmlParserCtxtPtr ctxt;
10338 xmlParserInputPtr inputStream;
10339 char *directory = NULL;
10340 xmlChar *uri;
10341
10342 ctxt = xmlNewParserCtxt();
10343 if (ctxt == NULL) {
10344 return(NULL);
10345 }
10346
10347 uri = xmlBuildURI(URL, base);
10348
10349 if (uri == NULL) {
10350 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10351 if (inputStream == NULL) {
10352 xmlFreeParserCtxt(ctxt);
10353 return(NULL);
10354 }
10355
10356 inputPush(ctxt, inputStream);
10357
10358 if ((ctxt->directory == NULL) && (directory == NULL))
10359 directory = xmlParserGetDirectory((char *)URL);
10360 if ((ctxt->directory == NULL) && (directory != NULL))
10361 ctxt->directory = directory;
10362 } else {
10363 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10364 if (inputStream == NULL) {
10365 xmlFree(uri);
10366 xmlFreeParserCtxt(ctxt);
10367 return(NULL);
10368 }
10369
10370 inputPush(ctxt, inputStream);
10371
10372 if ((ctxt->directory == NULL) && (directory == NULL))
10373 directory = xmlParserGetDirectory((char *)uri);
10374 if ((ctxt->directory == NULL) && (directory != NULL))
10375 ctxt->directory = directory;
10376 xmlFree(uri);
10377 }
10378
10379 return(ctxt);
10380}
10381
10382/************************************************************************
10383 * *
10384 * Front ends when parsing from a file *
10385 * *
10386 ************************************************************************/
10387
10388/**
10389 * xmlCreateFileParserCtxt:
10390 * @filename: the filename
10391 *
10392 * Create a parser context for a file content.
10393 * Automatic support for ZLIB/Compress compressed document is provided
10394 * by default if found at compile-time.
10395 *
10396 * Returns the new parser context or NULL
10397 */
10398xmlParserCtxtPtr
10399xmlCreateFileParserCtxt(const char *filename)
10400{
10401 xmlParserCtxtPtr ctxt;
10402 xmlParserInputPtr inputStream;
Igor Zlatkovicce076162003-02-23 13:39:39 +000010403 char *canonicFilename;
Owen Taylor3473f882001-02-23 17:55:21 +000010404 char *directory = NULL;
10405
Owen Taylor3473f882001-02-23 17:55:21 +000010406 ctxt = xmlNewParserCtxt();
10407 if (ctxt == NULL) {
10408 if (xmlDefaultSAXHandler.error != NULL) {
10409 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10410 }
10411 return(NULL);
10412 }
10413
Daniel Veillardc64b8e92003-02-24 11:47:13 +000010414 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename);
Igor Zlatkovicce076162003-02-23 13:39:39 +000010415 if (canonicFilename == NULL) {
10416 if (xmlDefaultSAXHandler.error != NULL) {
10417 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10418 }
10419 return(NULL);
10420 }
10421
10422 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
10423 xmlFree(canonicFilename);
Owen Taylor3473f882001-02-23 17:55:21 +000010424 if (inputStream == NULL) {
10425 xmlFreeParserCtxt(ctxt);
10426 return(NULL);
10427 }
10428
Owen Taylor3473f882001-02-23 17:55:21 +000010429 inputPush(ctxt, inputStream);
10430 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010431 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010432 if ((ctxt->directory == NULL) && (directory != NULL))
10433 ctxt->directory = directory;
10434
10435 return(ctxt);
10436}
10437
10438/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010439 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010440 * @sax: the SAX handler block
10441 * @filename: the filename
10442 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10443 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010444 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010445 *
10446 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10447 * compressed document is provided by default if found at compile-time.
10448 * It use the given SAX function block to handle the parsing callback.
10449 * If sax is NULL, fallback to the default DOM tree building routines.
10450 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010451 * User data (void *) is stored within the parser context in the
10452 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010453 *
Owen Taylor3473f882001-02-23 17:55:21 +000010454 * Returns the resulting document tree
10455 */
10456
10457xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010458xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10459 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010460 xmlDocPtr ret;
10461 xmlParserCtxtPtr ctxt;
10462 char *directory = NULL;
10463
Daniel Veillard635ef722001-10-29 11:48:19 +000010464 xmlInitParser();
10465
Owen Taylor3473f882001-02-23 17:55:21 +000010466 ctxt = xmlCreateFileParserCtxt(filename);
10467 if (ctxt == NULL) {
10468 return(NULL);
10469 }
10470 if (sax != NULL) {
10471 if (ctxt->sax != NULL)
10472 xmlFree(ctxt->sax);
10473 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010474 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010475 if (data!=NULL) {
10476 ctxt->_private=data;
10477 }
Owen Taylor3473f882001-02-23 17:55:21 +000010478
10479 if ((ctxt->directory == NULL) && (directory == NULL))
10480 directory = xmlParserGetDirectory(filename);
10481 if ((ctxt->directory == NULL) && (directory != NULL))
10482 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10483
Daniel Veillarddad3f682002-11-17 16:47:27 +000010484 ctxt->recovery = recovery;
10485
Owen Taylor3473f882001-02-23 17:55:21 +000010486 xmlParseDocument(ctxt);
10487
10488 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10489 else {
10490 ret = NULL;
10491 xmlFreeDoc(ctxt->myDoc);
10492 ctxt->myDoc = NULL;
10493 }
10494 if (sax != NULL)
10495 ctxt->sax = NULL;
10496 xmlFreeParserCtxt(ctxt);
10497
10498 return(ret);
10499}
10500
10501/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010502 * xmlSAXParseFile:
10503 * @sax: the SAX handler block
10504 * @filename: the filename
10505 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10506 * documents
10507 *
10508 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10509 * compressed document is provided by default if found at compile-time.
10510 * It use the given SAX function block to handle the parsing callback.
10511 * If sax is NULL, fallback to the default DOM tree building routines.
10512 *
10513 * Returns the resulting document tree
10514 */
10515
10516xmlDocPtr
10517xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10518 int recovery) {
10519 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10520}
10521
10522/**
Owen Taylor3473f882001-02-23 17:55:21 +000010523 * xmlRecoverDoc:
10524 * @cur: a pointer to an array of xmlChar
10525 *
10526 * parse an XML in-memory document and build a tree.
10527 * In the case the document is not Well Formed, a tree is built anyway
10528 *
10529 * Returns the resulting document tree
10530 */
10531
10532xmlDocPtr
10533xmlRecoverDoc(xmlChar *cur) {
10534 return(xmlSAXParseDoc(NULL, cur, 1));
10535}
10536
10537/**
10538 * xmlParseFile:
10539 * @filename: the filename
10540 *
10541 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10542 * compressed document is provided by default if found at compile-time.
10543 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010544 * Returns the resulting document tree if the file was wellformed,
10545 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010546 */
10547
10548xmlDocPtr
10549xmlParseFile(const char *filename) {
10550 return(xmlSAXParseFile(NULL, filename, 0));
10551}
10552
10553/**
10554 * xmlRecoverFile:
10555 * @filename: the filename
10556 *
10557 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10558 * compressed document is provided by default if found at compile-time.
10559 * In the case the document is not Well Formed, a tree is built anyway
10560 *
10561 * Returns the resulting document tree
10562 */
10563
10564xmlDocPtr
10565xmlRecoverFile(const char *filename) {
10566 return(xmlSAXParseFile(NULL, filename, 1));
10567}
10568
10569
10570/**
10571 * xmlSetupParserForBuffer:
10572 * @ctxt: an XML parser context
10573 * @buffer: a xmlChar * buffer
10574 * @filename: a file name
10575 *
10576 * Setup the parser context to parse a new buffer; Clears any prior
10577 * contents from the parser context. The buffer parameter must not be
10578 * NULL, but the filename parameter can be
10579 */
10580void
10581xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10582 const char* filename)
10583{
10584 xmlParserInputPtr input;
10585
10586 input = xmlNewInputStream(ctxt);
10587 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010588 xmlGenericError(xmlGenericErrorContext,
10589 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010590 xmlFree(ctxt);
10591 return;
10592 }
10593
10594 xmlClearParserCtxt(ctxt);
10595 if (filename != NULL)
10596 input->filename = xmlMemStrdup(filename);
10597 input->base = buffer;
10598 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010599 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010600 inputPush(ctxt, input);
10601}
10602
10603/**
10604 * xmlSAXUserParseFile:
10605 * @sax: a SAX handler
10606 * @user_data: The user data returned on SAX callbacks
10607 * @filename: a file name
10608 *
10609 * parse an XML file and call the given SAX handler routines.
10610 * Automatic support for ZLIB/Compress compressed document is provided
10611 *
10612 * Returns 0 in case of success or a error number otherwise
10613 */
10614int
10615xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10616 const char *filename) {
10617 int ret = 0;
10618 xmlParserCtxtPtr ctxt;
10619
10620 ctxt = xmlCreateFileParserCtxt(filename);
10621 if (ctxt == NULL) return -1;
10622 if (ctxt->sax != &xmlDefaultSAXHandler)
10623 xmlFree(ctxt->sax);
10624 ctxt->sax = sax;
10625 if (user_data != NULL)
10626 ctxt->userData = user_data;
10627
10628 xmlParseDocument(ctxt);
10629
10630 if (ctxt->wellFormed)
10631 ret = 0;
10632 else {
10633 if (ctxt->errNo != 0)
10634 ret = ctxt->errNo;
10635 else
10636 ret = -1;
10637 }
10638 if (sax != NULL)
10639 ctxt->sax = NULL;
10640 xmlFreeParserCtxt(ctxt);
10641
10642 return ret;
10643}
10644
10645/************************************************************************
10646 * *
10647 * Front ends when parsing from memory *
10648 * *
10649 ************************************************************************/
10650
10651/**
10652 * xmlCreateMemoryParserCtxt:
10653 * @buffer: a pointer to a char array
10654 * @size: the size of the array
10655 *
10656 * Create a parser context for an XML in-memory document.
10657 *
10658 * Returns the new parser context or NULL
10659 */
10660xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010661xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010662 xmlParserCtxtPtr ctxt;
10663 xmlParserInputPtr input;
10664 xmlParserInputBufferPtr buf;
10665
10666 if (buffer == NULL)
10667 return(NULL);
10668 if (size <= 0)
10669 return(NULL);
10670
10671 ctxt = xmlNewParserCtxt();
10672 if (ctxt == NULL)
10673 return(NULL);
10674
10675 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010676 if (buf == NULL) {
10677 xmlFreeParserCtxt(ctxt);
10678 return(NULL);
10679 }
Owen Taylor3473f882001-02-23 17:55:21 +000010680
10681 input = xmlNewInputStream(ctxt);
10682 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010683 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010684 xmlFreeParserCtxt(ctxt);
10685 return(NULL);
10686 }
10687
10688 input->filename = NULL;
10689 input->buf = buf;
10690 input->base = input->buf->buffer->content;
10691 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010692 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010693
10694 inputPush(ctxt, input);
10695 return(ctxt);
10696}
10697
10698/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010699 * xmlSAXParseMemoryWithData:
10700 * @sax: the SAX handler block
10701 * @buffer: an pointer to a char array
10702 * @size: the size of the array
10703 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10704 * documents
10705 * @data: the userdata
10706 *
10707 * parse an XML in-memory block and use the given SAX function block
10708 * to handle the parsing callback. If sax is NULL, fallback to the default
10709 * DOM tree building routines.
10710 *
10711 * User data (void *) is stored within the parser context in the
10712 * context's _private member, so it is available nearly everywhere in libxml
10713 *
10714 * Returns the resulting document tree
10715 */
10716
10717xmlDocPtr
10718xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10719 int size, int recovery, void *data) {
10720 xmlDocPtr ret;
10721 xmlParserCtxtPtr ctxt;
10722
10723 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10724 if (ctxt == NULL) return(NULL);
10725 if (sax != NULL) {
10726 if (ctxt->sax != NULL)
10727 xmlFree(ctxt->sax);
10728 ctxt->sax = sax;
10729 }
10730 if (data!=NULL) {
10731 ctxt->_private=data;
10732 }
10733
Daniel Veillardadba5f12003-04-04 16:09:01 +000010734 ctxt->recovery = recovery;
10735
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010736 xmlParseDocument(ctxt);
10737
10738 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10739 else {
10740 ret = NULL;
10741 xmlFreeDoc(ctxt->myDoc);
10742 ctxt->myDoc = NULL;
10743 }
10744 if (sax != NULL)
10745 ctxt->sax = NULL;
10746 xmlFreeParserCtxt(ctxt);
10747
10748 return(ret);
10749}
10750
10751/**
Owen Taylor3473f882001-02-23 17:55:21 +000010752 * xmlSAXParseMemory:
10753 * @sax: the SAX handler block
10754 * @buffer: an pointer to a char array
10755 * @size: the size of the array
10756 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10757 * documents
10758 *
10759 * parse an XML in-memory block and use the given SAX function block
10760 * to handle the parsing callback. If sax is NULL, fallback to the default
10761 * DOM tree building routines.
10762 *
10763 * Returns the resulting document tree
10764 */
10765xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010766xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10767 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010768 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010769}
10770
10771/**
10772 * xmlParseMemory:
10773 * @buffer: an pointer to a char array
10774 * @size: the size of the array
10775 *
10776 * parse an XML in-memory block and build a tree.
10777 *
10778 * Returns the resulting document tree
10779 */
10780
Daniel Veillard50822cb2001-07-26 20:05:51 +000010781xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010782 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10783}
10784
10785/**
10786 * xmlRecoverMemory:
10787 * @buffer: an pointer to a char array
10788 * @size: the size of the array
10789 *
10790 * parse an XML in-memory block and build a tree.
10791 * In the case the document is not Well Formed, a tree is built anyway
10792 *
10793 * Returns the resulting document tree
10794 */
10795
Daniel Veillard50822cb2001-07-26 20:05:51 +000010796xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010797 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10798}
10799
10800/**
10801 * xmlSAXUserParseMemory:
10802 * @sax: a SAX handler
10803 * @user_data: The user data returned on SAX callbacks
10804 * @buffer: an in-memory XML document input
10805 * @size: the length of the XML document in bytes
10806 *
10807 * A better SAX parsing routine.
10808 * parse an XML in-memory buffer and call the given SAX handler routines.
10809 *
10810 * Returns 0 in case of success or a error number otherwise
10811 */
10812int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010813 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010814 int ret = 0;
10815 xmlParserCtxtPtr ctxt;
10816 xmlSAXHandlerPtr oldsax = NULL;
10817
Daniel Veillard9e923512002-08-14 08:48:52 +000010818 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010819 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10820 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010821 oldsax = ctxt->sax;
10822 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010823 if (user_data != NULL)
10824 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010825
10826 xmlParseDocument(ctxt);
10827
10828 if (ctxt->wellFormed)
10829 ret = 0;
10830 else {
10831 if (ctxt->errNo != 0)
10832 ret = ctxt->errNo;
10833 else
10834 ret = -1;
10835 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010836 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010837 xmlFreeParserCtxt(ctxt);
10838
10839 return ret;
10840}
10841
10842/**
10843 * xmlCreateDocParserCtxt:
10844 * @cur: a pointer to an array of xmlChar
10845 *
10846 * Creates a parser context for an XML in-memory document.
10847 *
10848 * Returns the new parser context or NULL
10849 */
10850xmlParserCtxtPtr
10851xmlCreateDocParserCtxt(xmlChar *cur) {
10852 int len;
10853
10854 if (cur == NULL)
10855 return(NULL);
10856 len = xmlStrlen(cur);
10857 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10858}
10859
10860/**
10861 * xmlSAXParseDoc:
10862 * @sax: the SAX handler block
10863 * @cur: a pointer to an array of xmlChar
10864 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10865 * documents
10866 *
10867 * parse an XML in-memory document and build a tree.
10868 * It use the given SAX function block to handle the parsing callback.
10869 * If sax is NULL, fallback to the default DOM tree building routines.
10870 *
10871 * Returns the resulting document tree
10872 */
10873
10874xmlDocPtr
10875xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10876 xmlDocPtr ret;
10877 xmlParserCtxtPtr ctxt;
10878
10879 if (cur == NULL) return(NULL);
10880
10881
10882 ctxt = xmlCreateDocParserCtxt(cur);
10883 if (ctxt == NULL) return(NULL);
10884 if (sax != NULL) {
10885 ctxt->sax = sax;
10886 ctxt->userData = NULL;
10887 }
10888
10889 xmlParseDocument(ctxt);
10890 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10891 else {
10892 ret = NULL;
10893 xmlFreeDoc(ctxt->myDoc);
10894 ctxt->myDoc = NULL;
10895 }
10896 if (sax != NULL)
10897 ctxt->sax = NULL;
10898 xmlFreeParserCtxt(ctxt);
10899
10900 return(ret);
10901}
10902
10903/**
10904 * xmlParseDoc:
10905 * @cur: a pointer to an array of xmlChar
10906 *
10907 * parse an XML in-memory document and build a tree.
10908 *
10909 * Returns the resulting document tree
10910 */
10911
10912xmlDocPtr
10913xmlParseDoc(xmlChar *cur) {
10914 return(xmlSAXParseDoc(NULL, cur, 0));
10915}
10916
Daniel Veillard8107a222002-01-13 14:10:10 +000010917/************************************************************************
10918 * *
10919 * Specific function to keep track of entities references *
10920 * and used by the XSLT debugger *
10921 * *
10922 ************************************************************************/
10923
10924static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10925
10926/**
10927 * xmlAddEntityReference:
10928 * @ent : A valid entity
10929 * @firstNode : A valid first node for children of entity
10930 * @lastNode : A valid last node of children entity
10931 *
10932 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10933 */
10934static void
10935xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10936 xmlNodePtr lastNode)
10937{
10938 if (xmlEntityRefFunc != NULL) {
10939 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10940 }
10941}
10942
10943
10944/**
10945 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000010946 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000010947 *
10948 * Set the function to call call back when a xml reference has been made
10949 */
10950void
10951xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10952{
10953 xmlEntityRefFunc = func;
10954}
Owen Taylor3473f882001-02-23 17:55:21 +000010955
10956/************************************************************************
10957 * *
10958 * Miscellaneous *
10959 * *
10960 ************************************************************************/
10961
10962#ifdef LIBXML_XPATH_ENABLED
10963#include <libxml/xpath.h>
10964#endif
10965
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010966extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010967static int xmlParserInitialized = 0;
10968
10969/**
10970 * xmlInitParser:
10971 *
10972 * Initialization function for the XML parser.
10973 * This is not reentrant. Call once before processing in case of
10974 * use in multithreaded programs.
10975 */
10976
10977void
10978xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010979 if (xmlParserInitialized != 0)
10980 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010981
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010982 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10983 (xmlGenericError == NULL))
10984 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010985 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010986 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010987 xmlInitCharEncodingHandlers();
10988 xmlInitializePredefinedEntities();
10989 xmlDefaultSAXHandlerInit();
10990 xmlRegisterDefaultInputCallbacks();
10991 xmlRegisterDefaultOutputCallbacks();
10992#ifdef LIBXML_HTML_ENABLED
10993 htmlInitAutoClose();
10994 htmlDefaultSAXHandlerInit();
10995#endif
10996#ifdef LIBXML_XPATH_ENABLED
10997 xmlXPathInit();
10998#endif
10999 xmlParserInitialized = 1;
11000}
11001
11002/**
11003 * xmlCleanupParser:
11004 *
11005 * Cleanup function for the XML parser. It tries to reclaim all
11006 * parsing related global memory allocated for the parser processing.
11007 * It doesn't deallocate any document related memory. Calling this
11008 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011009 * One should call xmlCleanupParser() only when the process has
11010 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011011 */
11012
11013void
11014xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000011015 xmlCleanupCharEncodingHandlers();
11016 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011017#ifdef LIBXML_CATALOG_ENABLED
11018 xmlCatalogCleanup();
11019#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011020 xmlCleanupThreads();
11021 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011022}