blob: 535df1e0b28562fd43c8a2cd1450a734b43def8d [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard21a0f912001-02-25 19:54:14 +000088#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000089#define XML_PARSER_BUFFER_SIZE 100
90
Daniel Veillard5997aca2002-03-18 18:36:20 +000091#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
92
Owen Taylor3473f882001-02-23 17:55:21 +000093/*
Owen Taylor3473f882001-02-23 17:55:21 +000094 * List of XML prefixed PI allowed by W3C specs
95 */
96
Daniel Veillardb44025c2001-10-11 22:55:55 +000097static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000098 "xml-stylesheet",
99 NULL
100};
101
102/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000103xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
104 const xmlChar **str);
105
Daniel Veillard257d9102001-05-08 10:41:44 +0000106static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000107xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
108 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000109 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000111
Daniel Veillard8107a222002-01-13 14:10:10 +0000112static void
113xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
114 xmlNodePtr lastNode);
115
Daniel Veillard328f48c2002-11-15 15:24:34 +0000116static int
117xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
118 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000119/************************************************************************
120 * *
121 * Parser stacks related functions and macros *
122 * *
123 ************************************************************************/
124
125xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
126 const xmlChar ** str);
127
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128/**
129 * inputPush:
130 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000131 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000132 *
133 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000134 *
135 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000137extern int
138inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
139{
140 if (ctxt->inputNr >= ctxt->inputMax) {
141 ctxt->inputMax *= 2;
142 ctxt->inputTab =
143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
144 ctxt->inputMax *
145 sizeof(ctxt->inputTab[0]));
146 if (ctxt->inputTab == NULL) {
147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
148 return (0);
149 }
150 }
151 ctxt->inputTab[ctxt->inputNr] = value;
152 ctxt->input = value;
153 return (ctxt->inputNr++);
154}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000155/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000156 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000157 * @ctxt: an XML parser context
158 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000159 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000160 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000161 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000163extern xmlParserInputPtr
164inputPop(xmlParserCtxtPtr ctxt)
165{
166 xmlParserInputPtr ret;
167
168 if (ctxt->inputNr <= 0)
169 return (0);
170 ctxt->inputNr--;
171 if (ctxt->inputNr > 0)
172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
173 else
174 ctxt->input = NULL;
175 ret = ctxt->inputTab[ctxt->inputNr];
176 ctxt->inputTab[ctxt->inputNr] = 0;
177 return (ret);
178}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179/**
180 * nodePush:
181 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000182 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 *
184 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000185 *
186 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000187 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000188extern int
189nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
190{
191 if (ctxt->nodeNr >= ctxt->nodeMax) {
192 ctxt->nodeMax *= 2;
193 ctxt->nodeTab =
194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
195 ctxt->nodeMax *
196 sizeof(ctxt->nodeTab[0]));
197 if (ctxt->nodeTab == NULL) {
198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
199 return (0);
200 }
201 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202#ifdef MAX_DEPTH
203 if (ctxt->nodeNr > MAX_DEPTH) {
204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
205 ctxt->sax->error(ctxt->userData,
206 "Excessive depth in document: change MAX_DEPTH = %d\n",
207 MAX_DEPTH);
208 ctxt->wellFormed = 0;
209 ctxt->instate = XML_PARSER_EOF;
210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
211 return(0);
212 }
213#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000214 ctxt->nodeTab[ctxt->nodeNr] = value;
215 ctxt->node = value;
216 return (ctxt->nodeNr++);
217}
218/**
219 * nodePop:
220 * @ctxt: an XML parser context
221 *
222 * Pops the top element node from the node stack
223 *
224 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000226extern xmlNodePtr
227nodePop(xmlParserCtxtPtr ctxt)
228{
229 xmlNodePtr ret;
230
231 if (ctxt->nodeNr <= 0)
232 return (0);
233 ctxt->nodeNr--;
234 if (ctxt->nodeNr > 0)
235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
236 else
237 ctxt->node = NULL;
238 ret = ctxt->nodeTab[ctxt->nodeNr];
239 ctxt->nodeTab[ctxt->nodeNr] = 0;
240 return (ret);
241}
242/**
243 * namePush:
244 * @ctxt: an XML parser context
245 * @value: the element name
246 *
247 * Pushes a new element name on top of the name stack
248 *
249 * Returns 0 in case of error, the index in the stack otherwise
250 */
251extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000252namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +0000253{
254 if (ctxt->nameNr >= ctxt->nameMax) {
255 ctxt->nameMax *= 2;
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000256 ctxt->nameTab = (const xmlChar * *)
257 xmlRealloc(ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +0000258 ctxt->nameMax *
259 sizeof(ctxt->nameTab[0]));
260 if (ctxt->nameTab == NULL) {
261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
262 return (0);
263 }
264 }
265 ctxt->nameTab[ctxt->nameNr] = value;
266 ctxt->name = value;
267 return (ctxt->nameNr++);
268}
269/**
270 * namePop:
271 * @ctxt: an XML parser context
272 *
273 * Pops the top element name from the name stack
274 *
275 * Returns the name just removed
276 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000277extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000278namePop(xmlParserCtxtPtr ctxt)
279{
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000280 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +0000281
282 if (ctxt->nameNr <= 0)
283 return (0);
284 ctxt->nameNr--;
285 if (ctxt->nameNr > 0)
286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
287 else
288 ctxt->name = NULL;
289 ret = ctxt->nameTab[ctxt->nameNr];
290 ctxt->nameTab[ctxt->nameNr] = 0;
291 return (ret);
292}
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000294static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000295 if (ctxt->spaceNr >= ctxt->spaceMax) {
296 ctxt->spaceMax *= 2;
297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
299 if (ctxt->spaceTab == NULL) {
300 xmlGenericError(xmlGenericErrorContext,
301 "realloc failed !\n");
302 return(0);
303 }
304 }
305 ctxt->spaceTab[ctxt->spaceNr] = val;
306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
307 return(ctxt->spaceNr++);
308}
309
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000311 int ret;
312 if (ctxt->spaceNr <= 0) return(0);
313 ctxt->spaceNr--;
314 if (ctxt->spaceNr > 0)
315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
316 else
317 ctxt->space = NULL;
318 ret = ctxt->spaceTab[ctxt->spaceNr];
319 ctxt->spaceTab[ctxt->spaceNr] = -1;
320 return(ret);
321}
322
323/*
324 * Macros for accessing the content. Those should be used only by the parser,
325 * and not exported.
326 *
327 * Dirty macros, i.e. one often need to make assumption on the context to
328 * use them
329 *
330 * CUR_PTR return the current pointer to the xmlChar to be parsed.
331 * To be used with extreme caution since operations consuming
332 * characters may move the input buffer to a different location !
333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
334 * This should be used internally by the parser
335 * only to compare to ASCII values otherwise it would break when
336 * running with UTF-8 encoding.
337 * RAW same as CUR but in the input buffer, bypass any token
338 * extraction that may have been done
339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
340 * to compare on ASCII based substring.
341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +0000342 * strings without newlines within the parser.
343 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
344 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +0000345 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
346 *
347 * NEXT Skip to the next character, this does the proper decoding
348 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +0000349 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +0000350 * CUR_CHAR(l) returns the current unicode character (int), set l
351 * to the number of xmlChars used for the encoding [0-5].
352 * CUR_SCHAR same but operate on a string instead of the context
353 * COPY_BUF copy the current unicode char to the target buffer, increment
354 * the index
355 * GROW, SHRINK handling of input buffers
356 */
357
Daniel Veillardfdc91562002-07-01 21:52:03 +0000358#define RAW (*ctxt->input->cur)
359#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000360#define NXT(val) ctxt->input->cur[(val)]
361#define CUR_PTR ctxt->input->cur
362
363#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000364 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +0000365 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000366 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000367 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
368 xmlPopInput(ctxt); \
369 } while (0)
370
Daniel Veillarda880b122003-04-21 21:36:41 +0000371#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillardb19ba832003-08-14 00:33:46 +0000372 (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) && \
373 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +0000374 xmlSHRINK (ctxt);
375
376static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
377 xmlParserInputShrink(ctxt->input);
378 if ((*ctxt->input->cur == 0) &&
379 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
380 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000381 }
Owen Taylor3473f882001-02-23 17:55:21 +0000382
Daniel Veillarda880b122003-04-21 21:36:41 +0000383#define GROW if ((ctxt->progressive == 0) && \
384 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +0000385 xmlGROW (ctxt);
386
387static void xmlGROW (xmlParserCtxtPtr ctxt) {
388 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
389 if ((*ctxt->input->cur == 0) &&
390 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
391 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +0000392}
Owen Taylor3473f882001-02-23 17:55:21 +0000393
394#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
395
396#define NEXT xmlNextChar(ctxt)
397
Daniel Veillard21a0f912001-02-25 19:54:14 +0000398#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000399 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000400 ctxt->input->cur++; \
401 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000402 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000403 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
404 }
405
Owen Taylor3473f882001-02-23 17:55:21 +0000406#define NEXTL(l) do { \
407 if (*(ctxt->input->cur) == '\n') { \
408 ctxt->input->line++; ctxt->input->col = 1; \
409 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000410 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000411 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000412 } while (0)
413
414#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
415#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
416
417#define COPY_BUF(l,b,i,v) \
418 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000419 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000420
421/**
422 * xmlSkipBlankChars:
423 * @ctxt: the XML parser context
424 *
425 * skip all blanks character found at that point in the input streams.
426 * It pops up finished entities in the process if allowable at that point.
427 *
428 * Returns the number of space chars skipped
429 */
430
431int
432xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000433 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000434
435 /*
436 * It's Okay to use CUR/NEXT here since all the blanks are on
437 * the ASCII range.
438 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000439 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
440 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000441 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000442 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000443 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000444 cur = ctxt->input->cur;
445 while (IS_BLANK(*cur)) {
446 if (*cur == '\n') {
447 ctxt->input->line++; ctxt->input->col = 1;
448 }
449 cur++;
450 res++;
451 if (*cur == 0) {
452 ctxt->input->cur = cur;
453 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
454 cur = ctxt->input->cur;
455 }
456 }
457 ctxt->input->cur = cur;
458 } else {
459 int cur;
460 do {
461 cur = CUR;
462 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
463 NEXT;
464 cur = CUR;
465 res++;
466 }
467 while ((cur == 0) && (ctxt->inputNr > 1) &&
468 (ctxt->instate != XML_PARSER_COMMENT)) {
469 xmlPopInput(ctxt);
470 cur = CUR;
471 }
472 /*
473 * Need to handle support of entities branching here
474 */
475 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
476 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
477 }
Owen Taylor3473f882001-02-23 17:55:21 +0000478 return(res);
479}
480
481/************************************************************************
482 * *
483 * Commodity functions to handle entities *
484 * *
485 ************************************************************************/
486
487/**
488 * xmlPopInput:
489 * @ctxt: an XML parser context
490 *
491 * xmlPopInput: the current input pointed by ctxt->input came to an end
492 * pop it and return the next char.
493 *
494 * Returns the current xmlChar in the parser context
495 */
496xmlChar
497xmlPopInput(xmlParserCtxtPtr ctxt) {
498 if (ctxt->inputNr == 1) return(0); /* End of main Input */
499 if (xmlParserDebugEntities)
500 xmlGenericError(xmlGenericErrorContext,
501 "Popping input %d\n", ctxt->inputNr);
502 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000503 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000504 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
505 return(xmlPopInput(ctxt));
506 return(CUR);
507}
508
509/**
510 * xmlPushInput:
511 * @ctxt: an XML parser context
512 * @input: an XML parser input fragment (entity, XML fragment ...).
513 *
514 * xmlPushInput: switch to a new input stream which is stacked on top
515 * of the previous one(s).
516 */
517void
518xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
519 if (input == NULL) return;
520
521 if (xmlParserDebugEntities) {
522 if ((ctxt->input != NULL) && (ctxt->input->filename))
523 xmlGenericError(xmlGenericErrorContext,
524 "%s(%d): ", ctxt->input->filename,
525 ctxt->input->line);
526 xmlGenericError(xmlGenericErrorContext,
527 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
528 }
529 inputPush(ctxt, input);
530 GROW;
531}
532
533/**
534 * xmlParseCharRef:
535 * @ctxt: an XML parser context
536 *
537 * parse Reference declarations
538 *
539 * [66] CharRef ::= '&#' [0-9]+ ';' |
540 * '&#x' [0-9a-fA-F]+ ';'
541 *
542 * [ WFC: Legal Character ]
543 * Characters referred to using character references must match the
544 * production for Char.
545 *
546 * Returns the value parsed (as an int), 0 in case of error
547 */
548int
549xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000550 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000551 int count = 0;
552
Owen Taylor3473f882001-02-23 17:55:21 +0000553 /*
554 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
555 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000556 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000557 (NXT(2) == 'x')) {
558 SKIP(3);
559 GROW;
560 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000561 if (count++ > 20) {
562 count = 0;
563 GROW;
564 }
565 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000566 val = val * 16 + (CUR - '0');
567 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
568 val = val * 16 + (CUR - 'a') + 10;
569 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
570 val = val * 16 + (CUR - 'A') + 10;
571 else {
572 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
574 ctxt->sax->error(ctxt->userData,
575 "xmlParseCharRef: invalid hexadecimal value\n");
576 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000577 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000578 val = 0;
579 break;
580 }
581 NEXT;
582 count++;
583 }
584 if (RAW == ';') {
585 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000586 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000587 ctxt->nbChars ++;
588 ctxt->input->cur++;
589 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000590 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000591 SKIP(2);
592 GROW;
593 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000594 if (count++ > 20) {
595 count = 0;
596 GROW;
597 }
598 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000599 val = val * 10 + (CUR - '0');
600 else {
601 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
603 ctxt->sax->error(ctxt->userData,
604 "xmlParseCharRef: invalid decimal value\n");
605 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000606 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000607 val = 0;
608 break;
609 }
610 NEXT;
611 count++;
612 }
613 if (RAW == ';') {
614 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000615 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000616 ctxt->nbChars ++;
617 ctxt->input->cur++;
618 }
619 } else {
620 ctxt->errNo = XML_ERR_INVALID_CHARREF;
621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
622 ctxt->sax->error(ctxt->userData,
623 "xmlParseCharRef: invalid value\n");
624 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000625 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000626 }
627
628 /*
629 * [ WFC: Legal Character ]
630 * Characters referred to using character references must match the
631 * production for Char.
632 */
633 if (IS_CHAR(val)) {
634 return(val);
635 } else {
636 ctxt->errNo = XML_ERR_INVALID_CHAR;
637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000638 ctxt->sax->error(ctxt->userData,
639 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000640 val);
641 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000642 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000643 }
644 return(0);
645}
646
647/**
648 * xmlParseStringCharRef:
649 * @ctxt: an XML parser context
650 * @str: a pointer to an index in the string
651 *
652 * parse Reference declarations, variant parsing from a string rather
653 * than an an input flow.
654 *
655 * [66] CharRef ::= '&#' [0-9]+ ';' |
656 * '&#x' [0-9a-fA-F]+ ';'
657 *
658 * [ WFC: Legal Character ]
659 * Characters referred to using character references must match the
660 * production for Char.
661 *
662 * Returns the value parsed (as an int), 0 in case of error, str will be
663 * updated to the current value of the index
664 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000665static int
Owen Taylor3473f882001-02-23 17:55:21 +0000666xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
667 const xmlChar *ptr;
668 xmlChar cur;
669 int val = 0;
670
671 if ((str == NULL) || (*str == NULL)) return(0);
672 ptr = *str;
673 cur = *ptr;
674 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
675 ptr += 3;
676 cur = *ptr;
677 while (cur != ';') { /* Non input consuming loop */
678 if ((cur >= '0') && (cur <= '9'))
679 val = val * 16 + (cur - '0');
680 else if ((cur >= 'a') && (cur <= 'f'))
681 val = val * 16 + (cur - 'a') + 10;
682 else if ((cur >= 'A') && (cur <= 'F'))
683 val = val * 16 + (cur - 'A') + 10;
684 else {
685 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
687 ctxt->sax->error(ctxt->userData,
688 "xmlParseStringCharRef: invalid hexadecimal value\n");
689 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000690 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000691 val = 0;
692 break;
693 }
694 ptr++;
695 cur = *ptr;
696 }
697 if (cur == ';')
698 ptr++;
699 } else if ((cur == '&') && (ptr[1] == '#')){
700 ptr += 2;
701 cur = *ptr;
702 while (cur != ';') { /* Non input consuming loops */
703 if ((cur >= '0') && (cur <= '9'))
704 val = val * 10 + (cur - '0');
705 else {
706 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
708 ctxt->sax->error(ctxt->userData,
709 "xmlParseStringCharRef: invalid decimal value\n");
710 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000711 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000712 val = 0;
713 break;
714 }
715 ptr++;
716 cur = *ptr;
717 }
718 if (cur == ';')
719 ptr++;
720 } else {
721 ctxt->errNo = XML_ERR_INVALID_CHARREF;
722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
723 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000724 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000725 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000726 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000727 return(0);
728 }
729 *str = ptr;
730
731 /*
732 * [ WFC: Legal Character ]
733 * Characters referred to using character references must match the
734 * production for Char.
735 */
736 if (IS_CHAR(val)) {
737 return(val);
738 } else {
739 ctxt->errNo = XML_ERR_INVALID_CHAR;
740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
741 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000742 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000743 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000744 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000745 }
746 return(0);
747}
748
749/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000750 * xmlNewBlanksWrapperInputStream:
751 * @ctxt: an XML parser context
752 * @entity: an Entity pointer
753 *
754 * Create a new input stream for wrapping
755 * blanks around a PEReference
756 *
757 * Returns the new input stream or NULL
758 */
759
760static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
761
Daniel Veillardf4862f02002-09-10 11:13:43 +0000762static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000763xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
764 xmlParserInputPtr input;
765 xmlChar *buffer;
766 size_t length;
767 if (entity == NULL) {
768 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
770 ctxt->sax->error(ctxt->userData,
771 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
772 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
773 return(NULL);
774 }
775 if (xmlParserDebugEntities)
776 xmlGenericError(xmlGenericErrorContext,
777 "new blanks wrapper for entity: %s\n", entity->name);
778 input = xmlNewInputStream(ctxt);
779 if (input == NULL) {
780 return(NULL);
781 }
782 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000783 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +0000784 if (buffer == NULL) {
785 return(NULL);
786 }
787 buffer [0] = ' ';
788 buffer [1] = '%';
789 buffer [length-3] = ';';
790 buffer [length-2] = ' ';
791 buffer [length-1] = 0;
792 memcpy(buffer + 2, entity->name, length - 5);
793 input->free = deallocblankswrapper;
794 input->base = buffer;
795 input->cur = buffer;
796 input->length = length;
797 input->end = &buffer[length];
798 return(input);
799}
800
801/**
Owen Taylor3473f882001-02-23 17:55:21 +0000802 * xmlParserHandlePEReference:
803 * @ctxt: the parser context
804 *
805 * [69] PEReference ::= '%' Name ';'
806 *
807 * [ WFC: No Recursion ]
808 * A parsed entity must not contain a recursive
809 * reference to itself, either directly or indirectly.
810 *
811 * [ WFC: Entity Declared ]
812 * In a document without any DTD, a document with only an internal DTD
813 * subset which contains no parameter entity references, or a document
814 * with "standalone='yes'", ... ... The declaration of a parameter
815 * entity must precede any reference to it...
816 *
817 * [ VC: Entity Declared ]
818 * In a document with an external subset or external parameter entities
819 * with "standalone='no'", ... ... The declaration of a parameter entity
820 * must precede any reference to it...
821 *
822 * [ WFC: In DTD ]
823 * Parameter-entity references may only appear in the DTD.
824 * NOTE: misleading but this is handled.
825 *
826 * A PEReference may have been detected in the current input stream
827 * the handling is done accordingly to
828 * http://www.w3.org/TR/REC-xml#entproc
829 * i.e.
830 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000831 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000832 */
833void
834xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000835 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +0000836 xmlEntityPtr entity = NULL;
837 xmlParserInputPtr input;
838
Owen Taylor3473f882001-02-23 17:55:21 +0000839 if (RAW != '%') return;
840 switch(ctxt->instate) {
841 case XML_PARSER_CDATA_SECTION:
842 return;
843 case XML_PARSER_COMMENT:
844 return;
845 case XML_PARSER_START_TAG:
846 return;
847 case XML_PARSER_END_TAG:
848 return;
849 case XML_PARSER_EOF:
850 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
852 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
853 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000854 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000855 return;
856 case XML_PARSER_PROLOG:
857 case XML_PARSER_START:
858 case XML_PARSER_MISC:
859 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
861 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
862 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000863 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000864 return;
865 case XML_PARSER_ENTITY_DECL:
866 case XML_PARSER_CONTENT:
867 case XML_PARSER_ATTRIBUTE_VALUE:
868 case XML_PARSER_PI:
869 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000870 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000871 /* we just ignore it there */
872 return;
873 case XML_PARSER_EPILOG:
874 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
876 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
877 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000878 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000879 return;
880 case XML_PARSER_ENTITY_VALUE:
881 /*
882 * NOTE: in the case of entity values, we don't do the
883 * substitution here since we need the literal
884 * entity value to be able to save the internal
885 * subset of the document.
886 * This will be handled by xmlStringDecodeEntities
887 */
888 return;
889 case XML_PARSER_DTD:
890 /*
891 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
892 * In the internal DTD subset, parameter-entity references
893 * can occur only where markup declarations can occur, not
894 * within markup declarations.
895 * In that case this is handled in xmlParseMarkupDecl
896 */
897 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
898 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000899 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
900 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000901 break;
902 case XML_PARSER_IGNORE:
903 return;
904 }
905
906 NEXT;
907 name = xmlParseName(ctxt);
908 if (xmlParserDebugEntities)
909 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000910 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000911 if (name == NULL) {
912 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000914 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000915 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000916 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000917 } else {
918 if (RAW == ';') {
919 NEXT;
920 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
921 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
922 if (entity == NULL) {
923
924 /*
925 * [ WFC: Entity Declared ]
926 * In a document without any DTD, a document with only an
927 * internal DTD subset which contains no parameter entity
928 * references, or a document with "standalone='yes'", ...
929 * ... The declaration of a parameter entity must precede
930 * any reference to it...
931 */
932 if ((ctxt->standalone == 1) ||
933 ((ctxt->hasExternalSubset == 0) &&
934 (ctxt->hasPErefs == 0))) {
935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
936 ctxt->sax->error(ctxt->userData,
937 "PEReference: %%%s; not found\n", name);
938 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000939 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000940 } else {
941 /*
942 * [ VC: Entity Declared ]
943 * In a document with an external subset or external
944 * parameter entities with "standalone='no'", ...
945 * ... The declaration of a parameter entity must precede
946 * any reference to it...
947 */
948 if ((!ctxt->disableSAX) &&
949 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
950 ctxt->vctxt.error(ctxt->vctxt.userData,
951 "PEReference: %%%s; not found\n", name);
952 } else if ((!ctxt->disableSAX) &&
953 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
954 ctxt->sax->warning(ctxt->userData,
955 "PEReference: %%%s; not found\n", name);
956 ctxt->valid = 0;
957 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000958 } else if (ctxt->input->free != deallocblankswrapper) {
959 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
960 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000961 } else {
962 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
963 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000964 xmlChar start[4];
965 xmlCharEncoding enc;
966
Owen Taylor3473f882001-02-23 17:55:21 +0000967 /*
968 * handle the extra spaces added before and after
969 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000970 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000971 */
972 input = xmlNewEntityInputStream(ctxt, entity);
973 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000974
975 /*
976 * Get the 4 first bytes and decode the charset
977 * if enc != XML_CHAR_ENCODING_NONE
978 * plug some encoding conversion routines.
979 */
980 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000981 if (entity->length >= 4) {
982 start[0] = RAW;
983 start[1] = NXT(1);
984 start[2] = NXT(2);
985 start[3] = NXT(3);
986 enc = xmlDetectCharEncoding(start, 4);
987 if (enc != XML_CHAR_ENCODING_NONE) {
988 xmlSwitchEncoding(ctxt, enc);
989 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000990 }
991
Owen Taylor3473f882001-02-23 17:55:21 +0000992 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
993 (RAW == '<') && (NXT(1) == '?') &&
994 (NXT(2) == 'x') && (NXT(3) == 'm') &&
995 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
996 xmlParseTextDecl(ctxt);
997 }
Owen Taylor3473f882001-02-23 17:55:21 +0000998 } else {
999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1000 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001001 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001002 name);
1003 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001004 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001005 }
1006 }
1007 } else {
1008 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1010 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001011 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001012 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001013 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001014 }
Owen Taylor3473f882001-02-23 17:55:21 +00001015 }
1016}
1017
1018/*
1019 * Macro used to grow the current buffer.
1020 */
1021#define growBuffer(buffer) { \
1022 buffer##_size *= 2; \
1023 buffer = (xmlChar *) \
1024 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1025 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001026 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001027 return(NULL); \
1028 } \
1029}
1030
1031/**
1032 * xmlStringDecodeEntities:
1033 * @ctxt: the parser context
1034 * @str: the input string
1035 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1036 * @end: an end marker xmlChar, 0 if none
1037 * @end2: an end marker xmlChar, 0 if none
1038 * @end3: an end marker xmlChar, 0 if none
1039 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001040 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001041 *
1042 * [67] Reference ::= EntityRef | CharRef
1043 *
1044 * [69] PEReference ::= '%' Name ';'
1045 *
1046 * Returns A newly allocated string with the substitution done. The caller
1047 * must deallocate it !
1048 */
1049xmlChar *
1050xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1051 xmlChar end, xmlChar end2, xmlChar end3) {
1052 xmlChar *buffer = NULL;
1053 int buffer_size = 0;
1054
1055 xmlChar *current = NULL;
1056 xmlEntityPtr ent;
1057 int c,l;
1058 int nbchars = 0;
1059
1060 if (str == NULL)
1061 return(NULL);
1062
1063 if (ctxt->depth > 40) {
1064 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1066 ctxt->sax->error(ctxt->userData,
1067 "Detected entity reference loop\n");
1068 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001069 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001070 return(NULL);
1071 }
1072
1073 /*
1074 * allocate a translation buffer.
1075 */
1076 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001077 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001078 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001079 xmlGenericError(xmlGenericErrorContext,
1080 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001081 return(NULL);
1082 }
1083
1084 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001085 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001086 * we are operating on already parsed values.
1087 */
1088 c = CUR_SCHAR(str, l);
1089 while ((c != 0) && (c != end) && /* non input consuming loop */
1090 (c != end2) && (c != end3)) {
1091
1092 if (c == 0) break;
1093 if ((c == '&') && (str[1] == '#')) {
1094 int val = xmlParseStringCharRef(ctxt, &str);
1095 if (val != 0) {
1096 COPY_BUF(0,buffer,nbchars,val);
1097 }
1098 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1099 if (xmlParserDebugEntities)
1100 xmlGenericError(xmlGenericErrorContext,
1101 "String decoding Entity Reference: %.30s\n",
1102 str);
1103 ent = xmlParseStringEntityRef(ctxt, &str);
1104 if ((ent != NULL) &&
1105 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1106 if (ent->content != NULL) {
1107 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1108 } else {
1109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1110 ctxt->sax->error(ctxt->userData,
1111 "internal error entity has no content\n");
1112 }
1113 } else if ((ent != NULL) && (ent->content != NULL)) {
1114 xmlChar *rep;
1115
1116 ctxt->depth++;
1117 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1118 0, 0, 0);
1119 ctxt->depth--;
1120 if (rep != NULL) {
1121 current = rep;
1122 while (*current != 0) { /* non input consuming loop */
1123 buffer[nbchars++] = *current++;
1124 if (nbchars >
1125 buffer_size - XML_PARSER_BUFFER_SIZE) {
1126 growBuffer(buffer);
1127 }
1128 }
1129 xmlFree(rep);
1130 }
1131 } else if (ent != NULL) {
1132 int i = xmlStrlen(ent->name);
1133 const xmlChar *cur = ent->name;
1134
1135 buffer[nbchars++] = '&';
1136 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1137 growBuffer(buffer);
1138 }
1139 for (;i > 0;i--)
1140 buffer[nbchars++] = *cur++;
1141 buffer[nbchars++] = ';';
1142 }
1143 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1144 if (xmlParserDebugEntities)
1145 xmlGenericError(xmlGenericErrorContext,
1146 "String decoding PE Reference: %.30s\n", str);
1147 ent = xmlParseStringPEReference(ctxt, &str);
1148 if (ent != NULL) {
1149 xmlChar *rep;
1150
1151 ctxt->depth++;
1152 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1153 0, 0, 0);
1154 ctxt->depth--;
1155 if (rep != NULL) {
1156 current = rep;
1157 while (*current != 0) { /* non input consuming loop */
1158 buffer[nbchars++] = *current++;
1159 if (nbchars >
1160 buffer_size - XML_PARSER_BUFFER_SIZE) {
1161 growBuffer(buffer);
1162 }
1163 }
1164 xmlFree(rep);
1165 }
1166 }
1167 } else {
1168 COPY_BUF(l,buffer,nbchars,c);
1169 str += l;
1170 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1171 growBuffer(buffer);
1172 }
1173 }
1174 c = CUR_SCHAR(str, l);
1175 }
1176 buffer[nbchars++] = 0;
1177 return(buffer);
1178}
1179
1180
1181/************************************************************************
1182 * *
1183 * Commodity functions to handle xmlChars *
1184 * *
1185 ************************************************************************/
1186
1187/**
1188 * xmlStrndup:
1189 * @cur: the input xmlChar *
1190 * @len: the len of @cur
1191 *
1192 * a strndup for array of xmlChar's
1193 *
1194 * Returns a new xmlChar * or NULL
1195 */
1196xmlChar *
1197xmlStrndup(const xmlChar *cur, int len) {
1198 xmlChar *ret;
1199
1200 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001201 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001202 if (ret == NULL) {
1203 xmlGenericError(xmlGenericErrorContext,
1204 "malloc of %ld byte failed\n",
1205 (len + 1) * (long)sizeof(xmlChar));
1206 return(NULL);
1207 }
1208 memcpy(ret, cur, len * sizeof(xmlChar));
1209 ret[len] = 0;
1210 return(ret);
1211}
1212
1213/**
1214 * xmlStrdup:
1215 * @cur: the input xmlChar *
1216 *
1217 * a strdup for array of xmlChar's. Since they are supposed to be
1218 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1219 * a termination mark of '0'.
1220 *
1221 * Returns a new xmlChar * or NULL
1222 */
1223xmlChar *
1224xmlStrdup(const xmlChar *cur) {
1225 const xmlChar *p = cur;
1226
1227 if (cur == NULL) return(NULL);
1228 while (*p != 0) p++; /* non input consuming */
1229 return(xmlStrndup(cur, p - cur));
1230}
1231
1232/**
1233 * xmlCharStrndup:
1234 * @cur: the input char *
1235 * @len: the len of @cur
1236 *
1237 * a strndup for char's to xmlChar's
1238 *
1239 * Returns a new xmlChar * or NULL
1240 */
1241
1242xmlChar *
1243xmlCharStrndup(const char *cur, int len) {
1244 int i;
1245 xmlChar *ret;
1246
1247 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001248 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001249 if (ret == NULL) {
1250 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1251 (len + 1) * (long)sizeof(xmlChar));
1252 return(NULL);
1253 }
1254 for (i = 0;i < len;i++)
1255 ret[i] = (xmlChar) cur[i];
1256 ret[len] = 0;
1257 return(ret);
1258}
1259
1260/**
1261 * xmlCharStrdup:
1262 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001263 *
1264 * a strdup for char's to xmlChar's
1265 *
1266 * Returns a new xmlChar * or NULL
1267 */
1268
1269xmlChar *
1270xmlCharStrdup(const char *cur) {
1271 const char *p = cur;
1272
1273 if (cur == NULL) return(NULL);
1274 while (*p != '\0') p++; /* non input consuming */
1275 return(xmlCharStrndup(cur, p - cur));
1276}
1277
1278/**
1279 * xmlStrcmp:
1280 * @str1: the first xmlChar *
1281 * @str2: the second xmlChar *
1282 *
1283 * a strcmp for xmlChar's
1284 *
1285 * Returns the integer result of the comparison
1286 */
1287
1288int
1289xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1290 register int tmp;
1291
1292 if (str1 == str2) return(0);
1293 if (str1 == NULL) return(-1);
1294 if (str2 == NULL) return(1);
1295 do {
1296 tmp = *str1++ - *str2;
1297 if (tmp != 0) return(tmp);
1298 } while (*str2++ != 0);
1299 return 0;
1300}
1301
1302/**
1303 * xmlStrEqual:
1304 * @str1: the first xmlChar *
1305 * @str2: the second xmlChar *
1306 *
1307 * Check if both string are equal of have same content
1308 * Should be a bit more readable and faster than xmlStrEqual()
1309 *
1310 * Returns 1 if they are equal, 0 if they are different
1311 */
1312
1313int
1314xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1315 if (str1 == str2) return(1);
1316 if (str1 == NULL) return(0);
1317 if (str2 == NULL) return(0);
1318 do {
1319 if (*str1++ != *str2) return(0);
1320 } while (*str2++);
1321 return(1);
1322}
1323
1324/**
1325 * xmlStrncmp:
1326 * @str1: the first xmlChar *
1327 * @str2: the second xmlChar *
1328 * @len: the max comparison length
1329 *
1330 * a strncmp for xmlChar's
1331 *
1332 * Returns the integer result of the comparison
1333 */
1334
1335int
1336xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1337 register int tmp;
1338
1339 if (len <= 0) return(0);
1340 if (str1 == str2) return(0);
1341 if (str1 == NULL) return(-1);
1342 if (str2 == NULL) return(1);
1343 do {
1344 tmp = *str1++ - *str2;
1345 if (tmp != 0 || --len == 0) return(tmp);
1346 } while (*str2++ != 0);
1347 return 0;
1348}
1349
Daniel Veillardb44025c2001-10-11 22:55:55 +00001350static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001351 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1352 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1353 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1354 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1355 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1356 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1357 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1358 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1359 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1360 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1361 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1362 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1363 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1364 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1365 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1366 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1367 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1368 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1369 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1370 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1371 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1372 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1373 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1374 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1375 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1376 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1377 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1378 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1379 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1380 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1381 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1382 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1383};
1384
1385/**
1386 * xmlStrcasecmp:
1387 * @str1: the first xmlChar *
1388 * @str2: the second xmlChar *
1389 *
1390 * a strcasecmp for xmlChar's
1391 *
1392 * Returns the integer result of the comparison
1393 */
1394
1395int
1396xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1397 register int tmp;
1398
1399 if (str1 == str2) return(0);
1400 if (str1 == NULL) return(-1);
1401 if (str2 == NULL) return(1);
1402 do {
1403 tmp = casemap[*str1++] - casemap[*str2];
1404 if (tmp != 0) return(tmp);
1405 } while (*str2++ != 0);
1406 return 0;
1407}
1408
1409/**
1410 * xmlStrncasecmp:
1411 * @str1: the first xmlChar *
1412 * @str2: the second xmlChar *
1413 * @len: the max comparison length
1414 *
1415 * a strncasecmp for xmlChar's
1416 *
1417 * Returns the integer result of the comparison
1418 */
1419
1420int
1421xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1422 register int tmp;
1423
1424 if (len <= 0) return(0);
1425 if (str1 == str2) return(0);
1426 if (str1 == NULL) return(-1);
1427 if (str2 == NULL) return(1);
1428 do {
1429 tmp = casemap[*str1++] - casemap[*str2];
1430 if (tmp != 0 || --len == 0) return(tmp);
1431 } while (*str2++ != 0);
1432 return 0;
1433}
1434
1435/**
1436 * xmlStrchr:
1437 * @str: the xmlChar * array
1438 * @val: the xmlChar to search
1439 *
1440 * a strchr for xmlChar's
1441 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001442 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001443 */
1444
1445const xmlChar *
1446xmlStrchr(const xmlChar *str, xmlChar val) {
1447 if (str == NULL) return(NULL);
1448 while (*str != 0) { /* non input consuming */
1449 if (*str == val) return((xmlChar *) str);
1450 str++;
1451 }
1452 return(NULL);
1453}
1454
1455/**
1456 * xmlStrstr:
1457 * @str: the xmlChar * array (haystack)
1458 * @val: the xmlChar to search (needle)
1459 *
1460 * a strstr for xmlChar's
1461 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001462 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001463 */
1464
1465const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001466xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001467 int n;
1468
1469 if (str == NULL) return(NULL);
1470 if (val == NULL) return(NULL);
1471 n = xmlStrlen(val);
1472
1473 if (n == 0) return(str);
1474 while (*str != 0) { /* non input consuming */
1475 if (*str == *val) {
1476 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1477 }
1478 str++;
1479 }
1480 return(NULL);
1481}
1482
1483/**
1484 * xmlStrcasestr:
1485 * @str: the xmlChar * array (haystack)
1486 * @val: the xmlChar to search (needle)
1487 *
1488 * a case-ignoring strstr for xmlChar's
1489 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001490 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001491 */
1492
1493const xmlChar *
1494xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1495 int n;
1496
1497 if (str == NULL) return(NULL);
1498 if (val == NULL) return(NULL);
1499 n = xmlStrlen(val);
1500
1501 if (n == 0) return(str);
1502 while (*str != 0) { /* non input consuming */
1503 if (casemap[*str] == casemap[*val])
1504 if (!xmlStrncasecmp(str, val, n)) return(str);
1505 str++;
1506 }
1507 return(NULL);
1508}
1509
1510/**
1511 * xmlStrsub:
1512 * @str: the xmlChar * array (haystack)
1513 * @start: the index of the first char (zero based)
1514 * @len: the length of the substring
1515 *
1516 * Extract a substring of a given string
1517 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001518 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001519 */
1520
1521xmlChar *
1522xmlStrsub(const xmlChar *str, int start, int len) {
1523 int i;
1524
1525 if (str == NULL) return(NULL);
1526 if (start < 0) return(NULL);
1527 if (len < 0) return(NULL);
1528
1529 for (i = 0;i < start;i++) {
1530 if (*str == 0) return(NULL);
1531 str++;
1532 }
1533 if (*str == 0) return(NULL);
1534 return(xmlStrndup(str, len));
1535}
1536
1537/**
1538 * xmlStrlen:
1539 * @str: the xmlChar * array
1540 *
1541 * length of a xmlChar's string
1542 *
1543 * Returns the number of xmlChar contained in the ARRAY.
1544 */
1545
1546int
1547xmlStrlen(const xmlChar *str) {
1548 int len = 0;
1549
1550 if (str == NULL) return(0);
1551 while (*str != 0) { /* non input consuming */
1552 str++;
1553 len++;
1554 }
1555 return(len);
1556}
1557
1558/**
1559 * xmlStrncat:
1560 * @cur: the original xmlChar * array
1561 * @add: the xmlChar * array added
1562 * @len: the length of @add
1563 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001564 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001565 * first bytes of @add.
1566 *
1567 * Returns a new xmlChar *, the original @cur is reallocated if needed
1568 * and should not be freed
1569 */
1570
1571xmlChar *
1572xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1573 int size;
1574 xmlChar *ret;
1575
1576 if ((add == NULL) || (len == 0))
1577 return(cur);
1578 if (cur == NULL)
1579 return(xmlStrndup(add, len));
1580
1581 size = xmlStrlen(cur);
1582 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1583 if (ret == NULL) {
1584 xmlGenericError(xmlGenericErrorContext,
1585 "xmlStrncat: realloc of %ld byte failed\n",
1586 (size + len + 1) * (long)sizeof(xmlChar));
1587 return(cur);
1588 }
1589 memcpy(&ret[size], add, len * sizeof(xmlChar));
1590 ret[size + len] = 0;
1591 return(ret);
1592}
1593
1594/**
1595 * xmlStrcat:
1596 * @cur: the original xmlChar * array
1597 * @add: the xmlChar * array added
1598 *
1599 * a strcat for array of xmlChar's. Since they are supposed to be
1600 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1601 * a termination mark of '0'.
1602 *
1603 * Returns a new xmlChar * containing the concatenated string.
1604 */
1605xmlChar *
1606xmlStrcat(xmlChar *cur, const xmlChar *add) {
1607 const xmlChar *p = add;
1608
1609 if (add == NULL) return(cur);
1610 if (cur == NULL)
1611 return(xmlStrdup(add));
1612
1613 while (*p != 0) p++; /* non input consuming */
1614 return(xmlStrncat(cur, add, p - add));
1615}
1616
1617/************************************************************************
1618 * *
1619 * Commodity functions, cleanup needed ? *
1620 * *
1621 ************************************************************************/
1622
1623/**
1624 * areBlanks:
1625 * @ctxt: an XML parser context
1626 * @str: a xmlChar *
1627 * @len: the size of @str
1628 *
1629 * Is this a sequence of blank chars that one can ignore ?
1630 *
1631 * Returns 1 if ignorable 0 otherwise.
1632 */
1633
1634static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1635 int i, ret;
1636 xmlNodePtr lastChild;
1637
Daniel Veillard05c13a22001-09-09 08:38:09 +00001638 /*
1639 * Don't spend time trying to differentiate them, the same callback is
1640 * used !
1641 */
1642 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001643 return(0);
1644
Owen Taylor3473f882001-02-23 17:55:21 +00001645 /*
1646 * Check for xml:space value.
1647 */
1648 if (*(ctxt->space) == 1)
1649 return(0);
1650
1651 /*
1652 * Check that the string is made of blanks
1653 */
1654 for (i = 0;i < len;i++)
1655 if (!(IS_BLANK(str[i]))) return(0);
1656
1657 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001658 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001659 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001660 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001661 if (ctxt->myDoc != NULL) {
1662 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1663 if (ret == 0) return(1);
1664 if (ret == 1) return(0);
1665 }
1666
1667 /*
1668 * Otherwise, heuristic :-\
1669 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001671 if ((ctxt->node->children == NULL) &&
1672 (RAW == '<') && (NXT(1) == '/')) return(0);
1673
1674 lastChild = xmlGetLastChild(ctxt->node);
1675 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001676 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1677 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001678 } else if (xmlNodeIsText(lastChild))
1679 return(0);
1680 else if ((ctxt->node->children != NULL) &&
1681 (xmlNodeIsText(ctxt->node->children)))
1682 return(0);
1683 return(1);
1684}
1685
Owen Taylor3473f882001-02-23 17:55:21 +00001686/************************************************************************
1687 * *
1688 * Extra stuff for namespace support *
1689 * Relates to http://www.w3.org/TR/WD-xml-names *
1690 * *
1691 ************************************************************************/
1692
1693/**
1694 * xmlSplitQName:
1695 * @ctxt: an XML parser context
1696 * @name: an XML parser context
1697 * @prefix: a xmlChar **
1698 *
1699 * parse an UTF8 encoded XML qualified name string
1700 *
1701 * [NS 5] QName ::= (Prefix ':')? LocalPart
1702 *
1703 * [NS 6] Prefix ::= NCName
1704 *
1705 * [NS 7] LocalPart ::= NCName
1706 *
1707 * Returns the local part, and prefix is updated
1708 * to get the Prefix if any.
1709 */
1710
1711xmlChar *
1712xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1713 xmlChar buf[XML_MAX_NAMELEN + 5];
1714 xmlChar *buffer = NULL;
1715 int len = 0;
1716 int max = XML_MAX_NAMELEN;
1717 xmlChar *ret = NULL;
1718 const xmlChar *cur = name;
1719 int c;
1720
1721 *prefix = NULL;
1722
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00001723 if (cur == NULL) return(NULL);
1724
Owen Taylor3473f882001-02-23 17:55:21 +00001725#ifndef XML_XML_NAMESPACE
1726 /* xml: prefix is not really a namespace */
1727 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1728 (cur[2] == 'l') && (cur[3] == ':'))
1729 return(xmlStrdup(name));
1730#endif
1731
Daniel Veillard597bc482003-07-24 16:08:28 +00001732 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00001733 if (cur[0] == ':')
1734 return(xmlStrdup(name));
1735
1736 c = *cur++;
1737 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1738 buf[len++] = c;
1739 c = *cur++;
1740 }
1741 if (len >= max) {
1742 /*
1743 * Okay someone managed to make a huge name, so he's ready to pay
1744 * for the processing speed.
1745 */
1746 max = len * 2;
1747
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001748 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001749 if (buffer == NULL) {
1750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1751 ctxt->sax->error(ctxt->userData,
1752 "xmlSplitQName: out of memory\n");
1753 return(NULL);
1754 }
1755 memcpy(buffer, buf, len);
1756 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1757 if (len + 10 > max) {
1758 max *= 2;
1759 buffer = (xmlChar *) xmlRealloc(buffer,
1760 max * sizeof(xmlChar));
1761 if (buffer == NULL) {
1762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1763 ctxt->sax->error(ctxt->userData,
1764 "xmlSplitQName: out of memory\n");
1765 return(NULL);
1766 }
1767 }
1768 buffer[len++] = c;
1769 c = *cur++;
1770 }
1771 buffer[len] = 0;
1772 }
1773
Daniel Veillard597bc482003-07-24 16:08:28 +00001774 /* nasty but well=formed
1775 if ((c == ':') && (*cur == 0)) {
1776 return(xmlStrdup(name));
1777 } */
1778
Owen Taylor3473f882001-02-23 17:55:21 +00001779 if (buffer == NULL)
1780 ret = xmlStrndup(buf, len);
1781 else {
1782 ret = buffer;
1783 buffer = NULL;
1784 max = XML_MAX_NAMELEN;
1785 }
1786
1787
1788 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001789 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001790 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00001791 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00001792 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00001793 }
Owen Taylor3473f882001-02-23 17:55:21 +00001794 len = 0;
1795
Daniel Veillardbb284f42002-10-16 18:02:47 +00001796 /*
1797 * Check that the first character is proper to start
1798 * a new name
1799 */
1800 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1801 ((c >= 0x41) && (c <= 0x5A)) ||
1802 (c == '_') || (c == ':'))) {
1803 int l;
1804 int first = CUR_SCHAR(cur, l);
1805
1806 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001807 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1808 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001809 ctxt->sax->error(ctxt->userData,
1810 "Name %s is not XML Namespace compliant\n",
1811 name);
1812 }
1813 }
1814 cur++;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1817 buf[len++] = c;
1818 c = *cur++;
1819 }
1820 if (len >= max) {
1821 /*
1822 * Okay someone managed to make a huge name, so he's ready to pay
1823 * for the processing speed.
1824 */
1825 max = len * 2;
1826
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001827 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001828 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001829 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1830 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001831 ctxt->sax->error(ctxt->userData,
1832 "xmlSplitQName: out of memory\n");
1833 return(NULL);
1834 }
1835 memcpy(buffer, buf, len);
1836 while (c != 0) { /* tested bigname2.xml */
1837 if (len + 10 > max) {
1838 max *= 2;
1839 buffer = (xmlChar *) xmlRealloc(buffer,
1840 max * sizeof(xmlChar));
1841 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001842 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1843 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001844 ctxt->sax->error(ctxt->userData,
1845 "xmlSplitQName: out of memory\n");
1846 return(NULL);
1847 }
1848 }
1849 buffer[len++] = c;
1850 c = *cur++;
1851 }
1852 buffer[len] = 0;
1853 }
1854
1855 if (buffer == NULL)
1856 ret = xmlStrndup(buf, len);
1857 else {
1858 ret = buffer;
1859 }
1860 }
1861
1862 return(ret);
1863}
1864
1865/************************************************************************
1866 * *
1867 * The parser itself *
1868 * Relates to http://www.w3.org/TR/REC-xml *
1869 * *
1870 ************************************************************************/
1871
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001872static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001873/**
1874 * xmlParseName:
1875 * @ctxt: an XML parser context
1876 *
1877 * parse an XML name.
1878 *
1879 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1880 * CombiningChar | Extender
1881 *
1882 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1883 *
1884 * [6] Names ::= Name (S Name)*
1885 *
1886 * Returns the Name parsed or NULL
1887 */
1888
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001889const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001890xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001891 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001892 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001893 int count = 0;
1894
1895 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001896
1897 /*
1898 * Accelerator for simple ASCII names
1899 */
1900 in = ctxt->input->cur;
1901 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1902 ((*in >= 0x41) && (*in <= 0x5A)) ||
1903 (*in == '_') || (*in == ':')) {
1904 in++;
1905 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1906 ((*in >= 0x41) && (*in <= 0x5A)) ||
1907 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001908 (*in == '_') || (*in == '-') ||
1909 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001910 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001911 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001912 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001913 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001914 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001915 ctxt->nbChars += count;
1916 ctxt->input->col += count;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00001917 if (ret == NULL) {
1918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1919 ctxt->sax->error(ctxt->userData,
1920 "XML parser: out of memory\n");
1921 ctxt->errNo = XML_ERR_NO_MEMORY;
1922 ctxt->instate = XML_PARSER_EOF;
1923 ctxt->disableSAX = 1;
1924 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001925 return(ret);
1926 }
1927 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001928 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001929}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001930
Daniel Veillard46de64e2002-05-29 08:21:33 +00001931/**
1932 * xmlParseNameAndCompare:
1933 * @ctxt: an XML parser context
1934 *
1935 * parse an XML name and compares for match
1936 * (specialized for endtag parsing)
1937 *
1938 *
1939 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1940 * and the name for mismatch
1941 */
1942
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001943static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001944xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1945 const xmlChar *cmp = other;
1946 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001947 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001948
1949 GROW;
1950
1951 in = ctxt->input->cur;
1952 while (*in != 0 && *in == *cmp) {
1953 ++in;
1954 ++cmp;
1955 }
1956 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1957 /* success */
1958 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001959 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001960 }
1961 /* failure (or end of input buffer), check with full function */
1962 ret = xmlParseName (ctxt);
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001963 if ((ret != NULL) && (xmlStrEqual (ret, other))) {
1964 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001965 }
1966 return ret;
1967}
1968
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001969static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001970xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00001971 int len = 0, l;
1972 int c;
1973 int count = 0;
1974
1975 /*
1976 * Handler for more complex cases
1977 */
1978 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001979 c = CUR_CHAR(l);
1980 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1981 (!IS_LETTER(c) && (c != '_') &&
1982 (c != ':'))) {
1983 return(NULL);
1984 }
1985
1986 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1987 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1988 (c == '.') || (c == '-') ||
1989 (c == '_') || (c == ':') ||
1990 (IS_COMBINING(c)) ||
1991 (IS_EXTENDER(c)))) {
1992 if (count++ > 100) {
1993 count = 0;
1994 GROW;
1995 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001996 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00001997 NEXTL(l);
1998 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00001999 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002000 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002001}
2002
2003/**
2004 * xmlParseStringName:
2005 * @ctxt: an XML parser context
2006 * @str: a pointer to the string pointer (IN/OUT)
2007 *
2008 * parse an XML name.
2009 *
2010 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2011 * CombiningChar | Extender
2012 *
2013 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2014 *
2015 * [6] Names ::= Name (S Name)*
2016 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002017 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002018 * is updated to the current location in the string.
2019 */
2020
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002021static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002022xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2023 xmlChar buf[XML_MAX_NAMELEN + 5];
2024 const xmlChar *cur = *str;
2025 int len = 0, l;
2026 int c;
2027
2028 c = CUR_SCHAR(cur, l);
2029 if (!IS_LETTER(c) && (c != '_') &&
2030 (c != ':')) {
2031 return(NULL);
2032 }
2033
2034 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2035 (c == '.') || (c == '-') ||
2036 (c == '_') || (c == ':') ||
2037 (IS_COMBINING(c)) ||
2038 (IS_EXTENDER(c))) {
2039 COPY_BUF(l,buf,len,c);
2040 cur += l;
2041 c = CUR_SCHAR(cur, l);
2042 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2043 /*
2044 * Okay someone managed to make a huge name, so he's ready to pay
2045 * for the processing speed.
2046 */
2047 xmlChar *buffer;
2048 int max = len * 2;
2049
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002050 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002051 if (buffer == NULL) {
2052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2053 ctxt->sax->error(ctxt->userData,
2054 "xmlParseStringName: out of memory\n");
2055 return(NULL);
2056 }
2057 memcpy(buffer, buf, len);
2058 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2059 (c == '.') || (c == '-') ||
2060 (c == '_') || (c == ':') ||
2061 (IS_COMBINING(c)) ||
2062 (IS_EXTENDER(c))) {
2063 if (len + 10 > max) {
2064 max *= 2;
2065 buffer = (xmlChar *) xmlRealloc(buffer,
2066 max * sizeof(xmlChar));
2067 if (buffer == NULL) {
2068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2069 ctxt->sax->error(ctxt->userData,
2070 "xmlParseStringName: out of memory\n");
2071 return(NULL);
2072 }
2073 }
2074 COPY_BUF(l,buffer,len,c);
2075 cur += l;
2076 c = CUR_SCHAR(cur, l);
2077 }
2078 buffer[len] = 0;
2079 *str = cur;
2080 return(buffer);
2081 }
2082 }
2083 *str = cur;
2084 return(xmlStrndup(buf, len));
2085}
2086
2087/**
2088 * xmlParseNmtoken:
2089 * @ctxt: an XML parser context
2090 *
2091 * parse an XML Nmtoken.
2092 *
2093 * [7] Nmtoken ::= (NameChar)+
2094 *
2095 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2096 *
2097 * Returns the Nmtoken parsed or NULL
2098 */
2099
2100xmlChar *
2101xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2102 xmlChar buf[XML_MAX_NAMELEN + 5];
2103 int len = 0, l;
2104 int c;
2105 int count = 0;
2106
2107 GROW;
2108 c = CUR_CHAR(l);
2109
2110 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2111 (c == '.') || (c == '-') ||
2112 (c == '_') || (c == ':') ||
2113 (IS_COMBINING(c)) ||
2114 (IS_EXTENDER(c))) {
2115 if (count++ > 100) {
2116 count = 0;
2117 GROW;
2118 }
2119 COPY_BUF(l,buf,len,c);
2120 NEXTL(l);
2121 c = CUR_CHAR(l);
2122 if (len >= XML_MAX_NAMELEN) {
2123 /*
2124 * Okay someone managed to make a huge token, so he's ready to pay
2125 * for the processing speed.
2126 */
2127 xmlChar *buffer;
2128 int max = len * 2;
2129
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002130 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002131 if (buffer == NULL) {
2132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2133 ctxt->sax->error(ctxt->userData,
2134 "xmlParseNmtoken: out of memory\n");
2135 return(NULL);
2136 }
2137 memcpy(buffer, buf, len);
2138 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2139 (c == '.') || (c == '-') ||
2140 (c == '_') || (c == ':') ||
2141 (IS_COMBINING(c)) ||
2142 (IS_EXTENDER(c))) {
2143 if (count++ > 100) {
2144 count = 0;
2145 GROW;
2146 }
2147 if (len + 10 > max) {
2148 max *= 2;
2149 buffer = (xmlChar *) xmlRealloc(buffer,
2150 max * sizeof(xmlChar));
2151 if (buffer == NULL) {
2152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2153 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002154 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002155 return(NULL);
2156 }
2157 }
2158 COPY_BUF(l,buffer,len,c);
2159 NEXTL(l);
2160 c = CUR_CHAR(l);
2161 }
2162 buffer[len] = 0;
2163 return(buffer);
2164 }
2165 }
2166 if (len == 0)
2167 return(NULL);
2168 return(xmlStrndup(buf, len));
2169}
2170
2171/**
2172 * xmlParseEntityValue:
2173 * @ctxt: an XML parser context
2174 * @orig: if non-NULL store a copy of the original entity value
2175 *
2176 * parse a value for ENTITY declarations
2177 *
2178 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2179 * "'" ([^%&'] | PEReference | Reference)* "'"
2180 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002181 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002182 */
2183
2184xmlChar *
2185xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2186 xmlChar *buf = NULL;
2187 int len = 0;
2188 int size = XML_PARSER_BUFFER_SIZE;
2189 int c, l;
2190 xmlChar stop;
2191 xmlChar *ret = NULL;
2192 const xmlChar *cur = NULL;
2193 xmlParserInputPtr input;
2194
2195 if (RAW == '"') stop = '"';
2196 else if (RAW == '\'') stop = '\'';
2197 else {
2198 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2200 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2201 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002202 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 return(NULL);
2204 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002205 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002206 if (buf == NULL) {
2207 xmlGenericError(xmlGenericErrorContext,
2208 "malloc of %d byte failed\n", size);
2209 return(NULL);
2210 }
2211
2212 /*
2213 * The content of the entity definition is copied in a buffer.
2214 */
2215
2216 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2217 input = ctxt->input;
2218 GROW;
2219 NEXT;
2220 c = CUR_CHAR(l);
2221 /*
2222 * NOTE: 4.4.5 Included in Literal
2223 * When a parameter entity reference appears in a literal entity
2224 * value, ... a single or double quote character in the replacement
2225 * text is always treated as a normal data character and will not
2226 * terminate the literal.
2227 * In practice it means we stop the loop only when back at parsing
2228 * the initial entity and the quote is found
2229 */
2230 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2231 (ctxt->input != input))) {
2232 if (len + 5 >= size) {
2233 size *= 2;
2234 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2235 if (buf == NULL) {
2236 xmlGenericError(xmlGenericErrorContext,
2237 "realloc of %d byte failed\n", size);
2238 return(NULL);
2239 }
2240 }
2241 COPY_BUF(l,buf,len,c);
2242 NEXTL(l);
2243 /*
2244 * Pop-up of finished entities.
2245 */
2246 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2247 xmlPopInput(ctxt);
2248
2249 GROW;
2250 c = CUR_CHAR(l);
2251 if (c == 0) {
2252 GROW;
2253 c = CUR_CHAR(l);
2254 }
2255 }
2256 buf[len] = 0;
2257
2258 /*
2259 * Raise problem w.r.t. '&' and '%' being used in non-entities
2260 * reference constructs. Note Charref will be handled in
2261 * xmlStringDecodeEntities()
2262 */
2263 cur = buf;
2264 while (*cur != 0) { /* non input consuming */
2265 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2266 xmlChar *name;
2267 xmlChar tmp = *cur;
2268
2269 cur++;
2270 name = xmlParseStringName(ctxt, &cur);
2271 if ((name == NULL) || (*cur != ';')) {
2272 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2274 ctxt->sax->error(ctxt->userData,
2275 "EntityValue: '%c' forbidden except for entities references\n",
2276 tmp);
2277 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002278 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002279 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002280 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2281 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002282 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2284 ctxt->sax->error(ctxt->userData,
2285 "EntityValue: PEReferences forbidden in internal subset\n",
2286 tmp);
2287 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002288 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002289 }
2290 if (name != NULL)
2291 xmlFree(name);
2292 }
2293 cur++;
2294 }
2295
2296 /*
2297 * Then PEReference entities are substituted.
2298 */
2299 if (c != stop) {
2300 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2302 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2303 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002305 xmlFree(buf);
2306 } else {
2307 NEXT;
2308 /*
2309 * NOTE: 4.4.7 Bypassed
2310 * When a general entity reference appears in the EntityValue in
2311 * an entity declaration, it is bypassed and left as is.
2312 * so XML_SUBSTITUTE_REF is not set here.
2313 */
2314 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2315 0, 0, 0);
2316 if (orig != NULL)
2317 *orig = buf;
2318 else
2319 xmlFree(buf);
2320 }
2321
2322 return(ret);
2323}
2324
2325/**
2326 * xmlParseAttValue:
2327 * @ctxt: an XML parser context
2328 *
2329 * parse a value for an attribute
2330 * Note: the parser won't do substitution of entities here, this
2331 * will be handled later in xmlStringGetNodeList
2332 *
2333 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2334 * "'" ([^<&'] | Reference)* "'"
2335 *
2336 * 3.3.3 Attribute-Value Normalization:
2337 * Before the value of an attribute is passed to the application or
2338 * checked for validity, the XML processor must normalize it as follows:
2339 * - a character reference is processed by appending the referenced
2340 * character to the attribute value
2341 * - an entity reference is processed by recursively processing the
2342 * replacement text of the entity
2343 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2344 * appending #x20 to the normalized value, except that only a single
2345 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2346 * parsed entity or the literal entity value of an internal parsed entity
2347 * - other characters are processed by appending them to the normalized value
2348 * If the declared value is not CDATA, then the XML processor must further
2349 * process the normalized attribute value by discarding any leading and
2350 * trailing space (#x20) characters, and by replacing sequences of space
2351 * (#x20) characters by a single space (#x20) character.
2352 * All attributes for which no declaration has been read should be treated
2353 * by a non-validating parser as if declared CDATA.
2354 *
2355 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2356 */
2357
2358xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002359xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2360
2361xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002362xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2363 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002364 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002365 xmlChar *ret = NULL;
2366 SHRINK;
2367 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002368 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002369 if (*in != '"' && *in != '\'') {
2370 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2372 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2373 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002374 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002375 return(NULL);
2376 }
2377 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2378 limit = *in;
2379 ++in;
2380
2381 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2382 *in != '&' && *in != '<'
2383 ) {
2384 ++in;
2385 }
2386 if (*in != limit) {
2387 return xmlParseAttValueComplex(ctxt);
2388 }
2389 ++in;
2390 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2391 CUR_PTR = in;
2392 return ret;
2393}
2394
Daniel Veillard01c13b52002-12-10 15:19:08 +00002395/**
2396 * xmlParseAttValueComplex:
2397 * @ctxt: an XML parser context
2398 *
2399 * parse a value for an attribute, this is the fallback function
2400 * of xmlParseAttValue() when the attribute parsing requires handling
2401 * of non-ASCII characters.
2402 *
2403 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2404 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002405xmlChar *
2406xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2407 xmlChar limit = 0;
2408 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002409 int len = 0;
2410 int buf_size = 0;
2411 int c, l;
2412 xmlChar *current = NULL;
2413 xmlEntityPtr ent;
2414
2415
2416 SHRINK;
2417 if (NXT(0) == '"') {
2418 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2419 limit = '"';
2420 NEXT;
2421 } else if (NXT(0) == '\'') {
2422 limit = '\'';
2423 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2424 NEXT;
2425 } else {
2426 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2428 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2429 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002430 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002431 return(NULL);
2432 }
2433
2434 /*
2435 * allocate a translation buffer.
2436 */
2437 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002438 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002439 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002440 xmlGenericError(xmlGenericErrorContext,
2441 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002442 return(NULL);
2443 }
2444
2445 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002446 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002447 */
2448 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002449 while ((NXT(0) != limit) && /* checked */
2450 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002451 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002452 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002453 if (NXT(1) == '#') {
2454 int val = xmlParseCharRef(ctxt);
2455 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002456 if (ctxt->replaceEntities) {
2457 if (len > buf_size - 10) {
2458 growBuffer(buf);
2459 }
2460 buf[len++] = '&';
2461 } else {
2462 /*
2463 * The reparsing will be done in xmlStringGetNodeList()
2464 * called by the attribute() function in SAX.c
2465 */
2466 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002467
Daniel Veillard319a7422001-09-11 09:27:09 +00002468 if (len > buf_size - 10) {
2469 growBuffer(buf);
2470 }
2471 current = &buffer[0];
2472 while (*current != 0) { /* non input consuming */
2473 buf[len++] = *current++;
2474 }
Owen Taylor3473f882001-02-23 17:55:21 +00002475 }
2476 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002477 if (len > buf_size - 10) {
2478 growBuffer(buf);
2479 }
Owen Taylor3473f882001-02-23 17:55:21 +00002480 len += xmlCopyChar(0, &buf[len], val);
2481 }
2482 } else {
2483 ent = xmlParseEntityRef(ctxt);
2484 if ((ent != NULL) &&
2485 (ctxt->replaceEntities != 0)) {
2486 xmlChar *rep;
2487
2488 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2489 rep = xmlStringDecodeEntities(ctxt, ent->content,
2490 XML_SUBSTITUTE_REF, 0, 0, 0);
2491 if (rep != NULL) {
2492 current = rep;
2493 while (*current != 0) { /* non input consuming */
2494 buf[len++] = *current++;
2495 if (len > buf_size - 10) {
2496 growBuffer(buf);
2497 }
2498 }
2499 xmlFree(rep);
2500 }
2501 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002502 if (len > buf_size - 10) {
2503 growBuffer(buf);
2504 }
Owen Taylor3473f882001-02-23 17:55:21 +00002505 if (ent->content != NULL)
2506 buf[len++] = ent->content[0];
2507 }
2508 } else if (ent != NULL) {
2509 int i = xmlStrlen(ent->name);
2510 const xmlChar *cur = ent->name;
2511
2512 /*
2513 * This may look absurd but is needed to detect
2514 * entities problems
2515 */
2516 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2517 (ent->content != NULL)) {
2518 xmlChar *rep;
2519 rep = xmlStringDecodeEntities(ctxt, ent->content,
2520 XML_SUBSTITUTE_REF, 0, 0, 0);
2521 if (rep != NULL)
2522 xmlFree(rep);
2523 }
2524
2525 /*
2526 * Just output the reference
2527 */
2528 buf[len++] = '&';
2529 if (len > buf_size - i - 10) {
2530 growBuffer(buf);
2531 }
2532 for (;i > 0;i--)
2533 buf[len++] = *cur++;
2534 buf[len++] = ';';
2535 }
2536 }
2537 } else {
2538 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2539 COPY_BUF(l,buf,len,0x20);
2540 if (len > buf_size - 10) {
2541 growBuffer(buf);
2542 }
2543 } else {
2544 COPY_BUF(l,buf,len,c);
2545 if (len > buf_size - 10) {
2546 growBuffer(buf);
2547 }
2548 }
2549 NEXTL(l);
2550 }
2551 GROW;
2552 c = CUR_CHAR(l);
2553 }
2554 buf[len++] = 0;
2555 if (RAW == '<') {
2556 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2558 ctxt->sax->error(ctxt->userData,
2559 "Unescaped '<' not allowed in attributes values\n");
2560 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002561 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002562 } else if (RAW != limit) {
2563 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2565 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2566 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002567 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002568 } else
2569 NEXT;
2570 return(buf);
2571}
2572
2573/**
2574 * xmlParseSystemLiteral:
2575 * @ctxt: an XML parser context
2576 *
2577 * parse an XML Literal
2578 *
2579 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2580 *
2581 * Returns the SystemLiteral parsed or NULL
2582 */
2583
2584xmlChar *
2585xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2586 xmlChar *buf = NULL;
2587 int len = 0;
2588 int size = XML_PARSER_BUFFER_SIZE;
2589 int cur, l;
2590 xmlChar stop;
2591 int state = ctxt->instate;
2592 int count = 0;
2593
2594 SHRINK;
2595 if (RAW == '"') {
2596 NEXT;
2597 stop = '"';
2598 } else if (RAW == '\'') {
2599 NEXT;
2600 stop = '\'';
2601 } else {
2602 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2604 ctxt->sax->error(ctxt->userData,
2605 "SystemLiteral \" or ' expected\n");
2606 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002607 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002608 return(NULL);
2609 }
2610
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002611 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002612 if (buf == NULL) {
2613 xmlGenericError(xmlGenericErrorContext,
2614 "malloc of %d byte failed\n", size);
2615 return(NULL);
2616 }
2617 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2618 cur = CUR_CHAR(l);
2619 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2620 if (len + 5 >= size) {
2621 size *= 2;
2622 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2623 if (buf == NULL) {
2624 xmlGenericError(xmlGenericErrorContext,
2625 "realloc of %d byte failed\n", size);
2626 ctxt->instate = (xmlParserInputState) state;
2627 return(NULL);
2628 }
2629 }
2630 count++;
2631 if (count > 50) {
2632 GROW;
2633 count = 0;
2634 }
2635 COPY_BUF(l,buf,len,cur);
2636 NEXTL(l);
2637 cur = CUR_CHAR(l);
2638 if (cur == 0) {
2639 GROW;
2640 SHRINK;
2641 cur = CUR_CHAR(l);
2642 }
2643 }
2644 buf[len] = 0;
2645 ctxt->instate = (xmlParserInputState) state;
2646 if (!IS_CHAR(cur)) {
2647 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2649 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2650 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002651 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002652 } else {
2653 NEXT;
2654 }
2655 return(buf);
2656}
2657
2658/**
2659 * xmlParsePubidLiteral:
2660 * @ctxt: an XML parser context
2661 *
2662 * parse an XML public literal
2663 *
2664 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2665 *
2666 * Returns the PubidLiteral parsed or NULL.
2667 */
2668
2669xmlChar *
2670xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2671 xmlChar *buf = NULL;
2672 int len = 0;
2673 int size = XML_PARSER_BUFFER_SIZE;
2674 xmlChar cur;
2675 xmlChar stop;
2676 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002677 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002678
2679 SHRINK;
2680 if (RAW == '"') {
2681 NEXT;
2682 stop = '"';
2683 } else if (RAW == '\'') {
2684 NEXT;
2685 stop = '\'';
2686 } else {
2687 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2689 ctxt->sax->error(ctxt->userData,
2690 "SystemLiteral \" or ' expected\n");
2691 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002692 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002693 return(NULL);
2694 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002695 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002696 if (buf == NULL) {
2697 xmlGenericError(xmlGenericErrorContext,
2698 "malloc of %d byte failed\n", size);
2699 return(NULL);
2700 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002701 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002702 cur = CUR;
2703 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2704 if (len + 1 >= size) {
2705 size *= 2;
2706 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2707 if (buf == NULL) {
2708 xmlGenericError(xmlGenericErrorContext,
2709 "realloc of %d byte failed\n", size);
2710 return(NULL);
2711 }
2712 }
2713 buf[len++] = cur;
2714 count++;
2715 if (count > 50) {
2716 GROW;
2717 count = 0;
2718 }
2719 NEXT;
2720 cur = CUR;
2721 if (cur == 0) {
2722 GROW;
2723 SHRINK;
2724 cur = CUR;
2725 }
2726 }
2727 buf[len] = 0;
2728 if (cur != stop) {
2729 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2731 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2732 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002733 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002734 } else {
2735 NEXT;
2736 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002737 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002738 return(buf);
2739}
2740
Daniel Veillard48b2f892001-02-25 16:11:03 +00002741void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002742/**
2743 * xmlParseCharData:
2744 * @ctxt: an XML parser context
2745 * @cdata: int indicating whether we are within a CDATA section
2746 *
2747 * parse a CharData section.
2748 * if we are within a CDATA section ']]>' marks an end of section.
2749 *
2750 * The right angle bracket (>) may be represented using the string "&gt;",
2751 * and must, for compatibility, be escaped using "&gt;" or a character
2752 * reference when it appears in the string "]]>" in content, when that
2753 * string is not marking the end of a CDATA section.
2754 *
2755 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2756 */
2757
2758void
2759xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002760 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002761 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002762 int line = ctxt->input->line;
2763 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002764
2765 SHRINK;
2766 GROW;
2767 /*
2768 * Accelerated common case where input don't need to be
2769 * modified before passing it to the handler.
2770 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002771 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002772 in = ctxt->input->cur;
2773 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002774get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002775 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2776 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002777 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002778 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002779 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002780 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002781 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002782 ctxt->input->line++;
2783 in++;
2784 }
2785 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002786 }
2787 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002788 if ((in[1] == ']') && (in[2] == '>')) {
2789 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2791 ctxt->sax->error(ctxt->userData,
2792 "Sequence ']]>' not allowed in content\n");
2793 ctxt->input->cur = in;
2794 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002795 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002796 return;
2797 }
2798 in++;
2799 goto get_more;
2800 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002801 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002802 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002803 if (IS_BLANK(*ctxt->input->cur)) {
2804 const xmlChar *tmp = ctxt->input->cur;
2805 ctxt->input->cur = in;
2806 if (areBlanks(ctxt, tmp, nbchar)) {
2807 if (ctxt->sax->ignorableWhitespace != NULL)
2808 ctxt->sax->ignorableWhitespace(ctxt->userData,
2809 tmp, nbchar);
2810 } else {
2811 if (ctxt->sax->characters != NULL)
2812 ctxt->sax->characters(ctxt->userData,
2813 tmp, nbchar);
2814 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002815 line = ctxt->input->line;
2816 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002817 } else {
2818 if (ctxt->sax->characters != NULL)
2819 ctxt->sax->characters(ctxt->userData,
2820 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002821 line = ctxt->input->line;
2822 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002823 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002824 }
2825 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002826 if (*in == 0xD) {
2827 in++;
2828 if (*in == 0xA) {
2829 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002830 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002831 ctxt->input->line++;
2832 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002833 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002834 in--;
2835 }
2836 if (*in == '<') {
2837 return;
2838 }
2839 if (*in == '&') {
2840 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002841 }
2842 SHRINK;
2843 GROW;
2844 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002845 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002846 nbchar = 0;
2847 }
Daniel Veillard50582112001-03-26 22:52:16 +00002848 ctxt->input->line = line;
2849 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002850 xmlParseCharDataComplex(ctxt, cdata);
2851}
2852
Daniel Veillard01c13b52002-12-10 15:19:08 +00002853/**
2854 * xmlParseCharDataComplex:
2855 * @ctxt: an XML parser context
2856 * @cdata: int indicating whether we are within a CDATA section
2857 *
2858 * parse a CharData section.this is the fallback function
2859 * of xmlParseCharData() when the parsing requires handling
2860 * of non-ASCII characters.
2861 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002862void
2863xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002864 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2865 int nbchar = 0;
2866 int cur, l;
2867 int count = 0;
2868
2869 SHRINK;
2870 GROW;
2871 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002872 while ((cur != '<') && /* checked */
2873 (cur != '&') &&
2874 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if ((cur == ']') && (NXT(1) == ']') &&
2876 (NXT(2) == '>')) {
2877 if (cdata) break;
2878 else {
2879 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882 "Sequence ']]>' not allowed in content\n");
2883 /* Should this be relaxed ??? I see a "must here */
2884 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002885 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002886 }
2887 }
2888 COPY_BUF(l,buf,nbchar,cur);
2889 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2890 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002891 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002892 */
2893 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2894 if (areBlanks(ctxt, buf, nbchar)) {
2895 if (ctxt->sax->ignorableWhitespace != NULL)
2896 ctxt->sax->ignorableWhitespace(ctxt->userData,
2897 buf, nbchar);
2898 } else {
2899 if (ctxt->sax->characters != NULL)
2900 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2901 }
2902 }
2903 nbchar = 0;
2904 }
2905 count++;
2906 if (count > 50) {
2907 GROW;
2908 count = 0;
2909 }
2910 NEXTL(l);
2911 cur = CUR_CHAR(l);
2912 }
2913 if (nbchar != 0) {
2914 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002915 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002916 */
2917 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2918 if (areBlanks(ctxt, buf, nbchar)) {
2919 if (ctxt->sax->ignorableWhitespace != NULL)
2920 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2921 } else {
2922 if (ctxt->sax->characters != NULL)
2923 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2924 }
2925 }
2926 }
2927}
2928
2929/**
2930 * xmlParseExternalID:
2931 * @ctxt: an XML parser context
2932 * @publicID: a xmlChar** receiving PubidLiteral
2933 * @strict: indicate whether we should restrict parsing to only
2934 * production [75], see NOTE below
2935 *
2936 * Parse an External ID or a Public ID
2937 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002938 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002939 * 'PUBLIC' S PubidLiteral S SystemLiteral
2940 *
2941 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2942 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2943 *
2944 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2945 *
2946 * Returns the function returns SystemLiteral and in the second
2947 * case publicID receives PubidLiteral, is strict is off
2948 * it is possible to return NULL and have publicID set.
2949 */
2950
2951xmlChar *
2952xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2953 xmlChar *URI = NULL;
2954
2955 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002956
2957 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002958 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2959 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2960 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2961 SKIP(6);
2962 if (!IS_BLANK(CUR)) {
2963 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2965 ctxt->sax->error(ctxt->userData,
2966 "Space required after 'SYSTEM'\n");
2967 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002968 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002969 }
2970 SKIP_BLANKS;
2971 URI = xmlParseSystemLiteral(ctxt);
2972 if (URI == NULL) {
2973 ctxt->errNo = XML_ERR_URI_REQUIRED;
2974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2975 ctxt->sax->error(ctxt->userData,
2976 "xmlParseExternalID: SYSTEM, no URI\n");
2977 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002978 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002979 }
2980 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2981 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2982 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2983 SKIP(6);
2984 if (!IS_BLANK(CUR)) {
2985 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2987 ctxt->sax->error(ctxt->userData,
2988 "Space required after 'PUBLIC'\n");
2989 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002990 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002991 }
2992 SKIP_BLANKS;
2993 *publicID = xmlParsePubidLiteral(ctxt);
2994 if (*publicID == NULL) {
2995 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2997 ctxt->sax->error(ctxt->userData,
2998 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2999 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003000 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003001 }
3002 if (strict) {
3003 /*
3004 * We don't handle [83] so "S SystemLiteral" is required.
3005 */
3006 if (!IS_BLANK(CUR)) {
3007 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3009 ctxt->sax->error(ctxt->userData,
3010 "Space required after the Public Identifier\n");
3011 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003012 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003013 }
3014 } else {
3015 /*
3016 * We handle [83] so we return immediately, if
3017 * "S SystemLiteral" is not detected. From a purely parsing
3018 * point of view that's a nice mess.
3019 */
3020 const xmlChar *ptr;
3021 GROW;
3022
3023 ptr = CUR_PTR;
3024 if (!IS_BLANK(*ptr)) return(NULL);
3025
3026 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3027 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3028 }
3029 SKIP_BLANKS;
3030 URI = xmlParseSystemLiteral(ctxt);
3031 if (URI == NULL) {
3032 ctxt->errNo = XML_ERR_URI_REQUIRED;
3033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3034 ctxt->sax->error(ctxt->userData,
3035 "xmlParseExternalID: PUBLIC, no URI\n");
3036 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003037 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003038 }
3039 }
3040 return(URI);
3041}
3042
3043/**
3044 * xmlParseComment:
3045 * @ctxt: an XML parser context
3046 *
3047 * Skip an XML (SGML) comment <!-- .... -->
3048 * The spec says that "For compatibility, the string "--" (double-hyphen)
3049 * must not occur within comments. "
3050 *
3051 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3052 */
3053void
3054xmlParseComment(xmlParserCtxtPtr ctxt) {
3055 xmlChar *buf = NULL;
3056 int len;
3057 int size = XML_PARSER_BUFFER_SIZE;
3058 int q, ql;
3059 int r, rl;
3060 int cur, l;
3061 xmlParserInputState state;
3062 xmlParserInputPtr input = ctxt->input;
3063 int count = 0;
3064
3065 /*
3066 * Check that there is a comment right here.
3067 */
3068 if ((RAW != '<') || (NXT(1) != '!') ||
3069 (NXT(2) != '-') || (NXT(3) != '-')) return;
3070
3071 state = ctxt->instate;
3072 ctxt->instate = XML_PARSER_COMMENT;
3073 SHRINK;
3074 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003075 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003076 if (buf == NULL) {
3077 xmlGenericError(xmlGenericErrorContext,
3078 "malloc of %d byte failed\n", size);
3079 ctxt->instate = state;
3080 return;
3081 }
3082 q = CUR_CHAR(ql);
3083 NEXTL(ql);
3084 r = CUR_CHAR(rl);
3085 NEXTL(rl);
3086 cur = CUR_CHAR(l);
3087 len = 0;
3088 while (IS_CHAR(cur) && /* checked */
3089 ((cur != '>') ||
3090 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003091 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003092 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3094 ctxt->sax->error(ctxt->userData,
3095 "Comment must not contain '--' (double-hyphen)`\n");
3096 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003097 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003098 }
3099 if (len + 5 >= size) {
3100 size *= 2;
3101 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3102 if (buf == NULL) {
3103 xmlGenericError(xmlGenericErrorContext,
3104 "realloc of %d byte failed\n", size);
3105 ctxt->instate = state;
3106 return;
3107 }
3108 }
3109 COPY_BUF(ql,buf,len,q);
3110 q = r;
3111 ql = rl;
3112 r = cur;
3113 rl = l;
3114
3115 count++;
3116 if (count > 50) {
3117 GROW;
3118 count = 0;
3119 }
3120 NEXTL(l);
3121 cur = CUR_CHAR(l);
3122 if (cur == 0) {
3123 SHRINK;
3124 GROW;
3125 cur = CUR_CHAR(l);
3126 }
3127 }
3128 buf[len] = 0;
3129 if (!IS_CHAR(cur)) {
3130 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3132 ctxt->sax->error(ctxt->userData,
3133 "Comment not terminated \n<!--%.50s\n", buf);
3134 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003135 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003136 xmlFree(buf);
3137 } else {
3138 if (input != ctxt->input) {
3139 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3141 ctxt->sax->error(ctxt->userData,
3142"Comment doesn't start and stop in the same entity\n");
3143 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003144 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003145 }
3146 NEXT;
3147 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3148 (!ctxt->disableSAX))
3149 ctxt->sax->comment(ctxt->userData, buf);
3150 xmlFree(buf);
3151 }
3152 ctxt->instate = state;
3153}
3154
3155/**
3156 * xmlParsePITarget:
3157 * @ctxt: an XML parser context
3158 *
3159 * parse the name of a PI
3160 *
3161 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3162 *
3163 * Returns the PITarget name or NULL
3164 */
3165
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003166const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003167xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003168 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003169
3170 name = xmlParseName(ctxt);
3171 if ((name != NULL) &&
3172 ((name[0] == 'x') || (name[0] == 'X')) &&
3173 ((name[1] == 'm') || (name[1] == 'M')) &&
3174 ((name[2] == 'l') || (name[2] == 'L'))) {
3175 int i;
3176 if ((name[0] == 'x') && (name[1] == 'm') &&
3177 (name[2] == 'l') && (name[3] == 0)) {
3178 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3179 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3180 ctxt->sax->error(ctxt->userData,
3181 "XML declaration allowed only at the start of the document\n");
3182 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003183 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003184 return(name);
3185 } else if (name[3] == 0) {
3186 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3188 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3189 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003190 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003191 return(name);
3192 }
3193 for (i = 0;;i++) {
3194 if (xmlW3CPIs[i] == NULL) break;
3195 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3196 return(name);
3197 }
3198 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3199 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3200 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003201 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003202 }
3203 }
3204 return(name);
3205}
3206
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003207#ifdef LIBXML_CATALOG_ENABLED
3208/**
3209 * xmlParseCatalogPI:
3210 * @ctxt: an XML parser context
3211 * @catalog: the PI value string
3212 *
3213 * parse an XML Catalog Processing Instruction.
3214 *
3215 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3216 *
3217 * Occurs only if allowed by the user and if happening in the Misc
3218 * part of the document before any doctype informations
3219 * This will add the given catalog to the parsing context in order
3220 * to be used if there is a resolution need further down in the document
3221 */
3222
3223static void
3224xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3225 xmlChar *URL = NULL;
3226 const xmlChar *tmp, *base;
3227 xmlChar marker;
3228
3229 tmp = catalog;
3230 while (IS_BLANK(*tmp)) tmp++;
3231 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3232 goto error;
3233 tmp += 7;
3234 while (IS_BLANK(*tmp)) tmp++;
3235 if (*tmp != '=') {
3236 return;
3237 }
3238 tmp++;
3239 while (IS_BLANK(*tmp)) tmp++;
3240 marker = *tmp;
3241 if ((marker != '\'') && (marker != '"'))
3242 goto error;
3243 tmp++;
3244 base = tmp;
3245 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3246 if (*tmp == 0)
3247 goto error;
3248 URL = xmlStrndup(base, tmp - base);
3249 tmp++;
3250 while (IS_BLANK(*tmp)) tmp++;
3251 if (*tmp != 0)
3252 goto error;
3253
3254 if (URL != NULL) {
3255 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3256 xmlFree(URL);
3257 }
3258 return;
3259
3260error:
3261 ctxt->errNo = XML_WAR_CATALOG_PI;
3262 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3263 ctxt->sax->warning(ctxt->userData,
3264 "Catalog PI syntax error: %s\n", catalog);
3265 if (URL != NULL)
3266 xmlFree(URL);
3267}
3268#endif
3269
Owen Taylor3473f882001-02-23 17:55:21 +00003270/**
3271 * xmlParsePI:
3272 * @ctxt: an XML parser context
3273 *
3274 * parse an XML Processing Instruction.
3275 *
3276 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3277 *
3278 * The processing is transfered to SAX once parsed.
3279 */
3280
3281void
3282xmlParsePI(xmlParserCtxtPtr ctxt) {
3283 xmlChar *buf = NULL;
3284 int len = 0;
3285 int size = XML_PARSER_BUFFER_SIZE;
3286 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003287 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003288 xmlParserInputState state;
3289 int count = 0;
3290
3291 if ((RAW == '<') && (NXT(1) == '?')) {
3292 xmlParserInputPtr input = ctxt->input;
3293 state = ctxt->instate;
3294 ctxt->instate = XML_PARSER_PI;
3295 /*
3296 * this is a Processing Instruction.
3297 */
3298 SKIP(2);
3299 SHRINK;
3300
3301 /*
3302 * Parse the target name and check for special support like
3303 * namespace.
3304 */
3305 target = xmlParsePITarget(ctxt);
3306 if (target != NULL) {
3307 if ((RAW == '?') && (NXT(1) == '>')) {
3308 if (input != ctxt->input) {
3309 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3311 ctxt->sax->error(ctxt->userData,
3312 "PI declaration doesn't start and stop in the same entity\n");
3313 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003314 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003315 }
3316 SKIP(2);
3317
3318 /*
3319 * SAX: PI detected.
3320 */
3321 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3322 (ctxt->sax->processingInstruction != NULL))
3323 ctxt->sax->processingInstruction(ctxt->userData,
3324 target, NULL);
3325 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003326 return;
3327 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003328 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003329 if (buf == NULL) {
3330 xmlGenericError(xmlGenericErrorContext,
3331 "malloc of %d byte failed\n", size);
3332 ctxt->instate = state;
3333 return;
3334 }
3335 cur = CUR;
3336 if (!IS_BLANK(cur)) {
3337 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3339 ctxt->sax->error(ctxt->userData,
3340 "xmlParsePI: PI %s space expected\n", target);
3341 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003342 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003343 }
3344 SKIP_BLANKS;
3345 cur = CUR_CHAR(l);
3346 while (IS_CHAR(cur) && /* checked */
3347 ((cur != '?') || (NXT(1) != '>'))) {
3348 if (len + 5 >= size) {
3349 size *= 2;
3350 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3351 if (buf == NULL) {
3352 xmlGenericError(xmlGenericErrorContext,
3353 "realloc of %d byte failed\n", size);
3354 ctxt->instate = state;
3355 return;
3356 }
3357 }
3358 count++;
3359 if (count > 50) {
3360 GROW;
3361 count = 0;
3362 }
3363 COPY_BUF(l,buf,len,cur);
3364 NEXTL(l);
3365 cur = CUR_CHAR(l);
3366 if (cur == 0) {
3367 SHRINK;
3368 GROW;
3369 cur = CUR_CHAR(l);
3370 }
3371 }
3372 buf[len] = 0;
3373 if (cur != '?') {
3374 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3376 ctxt->sax->error(ctxt->userData,
3377 "xmlParsePI: PI %s never end ...\n", target);
3378 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003379 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003380 } else {
3381 if (input != ctxt->input) {
3382 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3384 ctxt->sax->error(ctxt->userData,
3385 "PI declaration doesn't start and stop in the same entity\n");
3386 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003387 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003388 }
3389 SKIP(2);
3390
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003391#ifdef LIBXML_CATALOG_ENABLED
3392 if (((state == XML_PARSER_MISC) ||
3393 (state == XML_PARSER_START)) &&
3394 (xmlStrEqual(target, XML_CATALOG_PI))) {
3395 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3396 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3397 (allow == XML_CATA_ALLOW_ALL))
3398 xmlParseCatalogPI(ctxt, buf);
3399 }
3400#endif
3401
3402
Owen Taylor3473f882001-02-23 17:55:21 +00003403 /*
3404 * SAX: PI detected.
3405 */
3406 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3407 (ctxt->sax->processingInstruction != NULL))
3408 ctxt->sax->processingInstruction(ctxt->userData,
3409 target, buf);
3410 }
3411 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003412 } else {
3413 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3415 ctxt->sax->error(ctxt->userData,
3416 "xmlParsePI : no target name\n");
3417 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003418 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003419 }
3420 ctxt->instate = state;
3421 }
3422}
3423
3424/**
3425 * xmlParseNotationDecl:
3426 * @ctxt: an XML parser context
3427 *
3428 * parse a notation declaration
3429 *
3430 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3431 *
3432 * Hence there is actually 3 choices:
3433 * 'PUBLIC' S PubidLiteral
3434 * 'PUBLIC' S PubidLiteral S SystemLiteral
3435 * and 'SYSTEM' S SystemLiteral
3436 *
3437 * See the NOTE on xmlParseExternalID().
3438 */
3439
3440void
3441xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003442 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003443 xmlChar *Pubid;
3444 xmlChar *Systemid;
3445
3446 if ((RAW == '<') && (NXT(1) == '!') &&
3447 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3448 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3449 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3450 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3451 xmlParserInputPtr input = ctxt->input;
3452 SHRINK;
3453 SKIP(10);
3454 if (!IS_BLANK(CUR)) {
3455 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3457 ctxt->sax->error(ctxt->userData,
3458 "Space required after '<!NOTATION'\n");
3459 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003460 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003461 return;
3462 }
3463 SKIP_BLANKS;
3464
Daniel Veillard76d66f42001-05-16 21:05:17 +00003465 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003466 if (name == NULL) {
3467 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3469 ctxt->sax->error(ctxt->userData,
3470 "NOTATION: Name expected here\n");
3471 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003472 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003473 return;
3474 }
3475 if (!IS_BLANK(CUR)) {
3476 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3478 ctxt->sax->error(ctxt->userData,
3479 "Space required after the NOTATION name'\n");
3480 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003481 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003482 return;
3483 }
3484 SKIP_BLANKS;
3485
3486 /*
3487 * Parse the IDs.
3488 */
3489 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3490 SKIP_BLANKS;
3491
3492 if (RAW == '>') {
3493 if (input != ctxt->input) {
3494 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3495 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3496 ctxt->sax->error(ctxt->userData,
3497"Notation declaration doesn't start and stop in the same entity\n");
3498 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003499 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003500 }
3501 NEXT;
3502 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3503 (ctxt->sax->notationDecl != NULL))
3504 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3505 } else {
3506 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3508 ctxt->sax->error(ctxt->userData,
3509 "'>' required to close NOTATION declaration\n");
3510 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003511 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003512 }
Owen Taylor3473f882001-02-23 17:55:21 +00003513 if (Systemid != NULL) xmlFree(Systemid);
3514 if (Pubid != NULL) xmlFree(Pubid);
3515 }
3516}
3517
3518/**
3519 * xmlParseEntityDecl:
3520 * @ctxt: an XML parser context
3521 *
3522 * parse <!ENTITY declarations
3523 *
3524 * [70] EntityDecl ::= GEDecl | PEDecl
3525 *
3526 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3527 *
3528 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3529 *
3530 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3531 *
3532 * [74] PEDef ::= EntityValue | ExternalID
3533 *
3534 * [76] NDataDecl ::= S 'NDATA' S Name
3535 *
3536 * [ VC: Notation Declared ]
3537 * The Name must match the declared name of a notation.
3538 */
3539
3540void
3541xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003542 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003543 xmlChar *value = NULL;
3544 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003545 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003546 int isParameter = 0;
3547 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003548 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003549
3550 GROW;
3551 if ((RAW == '<') && (NXT(1) == '!') &&
3552 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3553 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3554 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3555 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003556 SHRINK;
3557 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003558 skipped = SKIP_BLANKS;
3559 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003560 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3562 ctxt->sax->error(ctxt->userData,
3563 "Space required after '<!ENTITY'\n");
3564 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003565 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003566 }
Owen Taylor3473f882001-02-23 17:55:21 +00003567
3568 if (RAW == '%') {
3569 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003570 skipped = SKIP_BLANKS;
3571 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003572 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3574 ctxt->sax->error(ctxt->userData,
3575 "Space required after '%'\n");
3576 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003577 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003578 }
Owen Taylor3473f882001-02-23 17:55:21 +00003579 isParameter = 1;
3580 }
3581
Daniel Veillard76d66f42001-05-16 21:05:17 +00003582 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003583 if (name == NULL) {
3584 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3586 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3587 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003588 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003589 return;
3590 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003591 skipped = SKIP_BLANKS;
3592 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003593 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3595 ctxt->sax->error(ctxt->userData,
3596 "Space required after the entity name\n");
3597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003599 }
Owen Taylor3473f882001-02-23 17:55:21 +00003600
Daniel Veillardf5582f12002-06-11 10:08:16 +00003601 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003602 /*
3603 * handle the various case of definitions...
3604 */
3605 if (isParameter) {
3606 if ((RAW == '"') || (RAW == '\'')) {
3607 value = xmlParseEntityValue(ctxt, &orig);
3608 if (value) {
3609 if ((ctxt->sax != NULL) &&
3610 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3611 ctxt->sax->entityDecl(ctxt->userData, name,
3612 XML_INTERNAL_PARAMETER_ENTITY,
3613 NULL, NULL, value);
3614 }
3615 } else {
3616 URI = xmlParseExternalID(ctxt, &literal, 1);
3617 if ((URI == NULL) && (literal == NULL)) {
3618 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3620 ctxt->sax->error(ctxt->userData,
3621 "Entity value required\n");
3622 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003623 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003624 }
3625 if (URI) {
3626 xmlURIPtr uri;
3627
3628 uri = xmlParseURI((const char *) URI);
3629 if (uri == NULL) {
3630 ctxt->errNo = XML_ERR_INVALID_URI;
3631 if ((ctxt->sax != NULL) &&
3632 (!ctxt->disableSAX) &&
3633 (ctxt->sax->error != NULL))
3634 ctxt->sax->error(ctxt->userData,
3635 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003636 /*
3637 * This really ought to be a well formedness error
3638 * but the XML Core WG decided otherwise c.f. issue
3639 * E26 of the XML erratas.
3640 */
Owen Taylor3473f882001-02-23 17:55:21 +00003641 } else {
3642 if (uri->fragment != NULL) {
3643 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3644 if ((ctxt->sax != NULL) &&
3645 (!ctxt->disableSAX) &&
3646 (ctxt->sax->error != NULL))
3647 ctxt->sax->error(ctxt->userData,
3648 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003649 /*
3650 * Okay this is foolish to block those but not
3651 * invalid URIs.
3652 */
Owen Taylor3473f882001-02-23 17:55:21 +00003653 ctxt->wellFormed = 0;
3654 } else {
3655 if ((ctxt->sax != NULL) &&
3656 (!ctxt->disableSAX) &&
3657 (ctxt->sax->entityDecl != NULL))
3658 ctxt->sax->entityDecl(ctxt->userData, name,
3659 XML_EXTERNAL_PARAMETER_ENTITY,
3660 literal, URI, NULL);
3661 }
3662 xmlFreeURI(uri);
3663 }
3664 }
3665 }
3666 } else {
3667 if ((RAW == '"') || (RAW == '\'')) {
3668 value = xmlParseEntityValue(ctxt, &orig);
3669 if ((ctxt->sax != NULL) &&
3670 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3671 ctxt->sax->entityDecl(ctxt->userData, name,
3672 XML_INTERNAL_GENERAL_ENTITY,
3673 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003674 /*
3675 * For expat compatibility in SAX mode.
3676 */
3677 if ((ctxt->myDoc == NULL) ||
3678 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3679 if (ctxt->myDoc == NULL) {
3680 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3681 }
3682 if (ctxt->myDoc->intSubset == NULL)
3683 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3684 BAD_CAST "fake", NULL, NULL);
3685
3686 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3687 NULL, NULL, value);
3688 }
Owen Taylor3473f882001-02-23 17:55:21 +00003689 } else {
3690 URI = xmlParseExternalID(ctxt, &literal, 1);
3691 if ((URI == NULL) && (literal == NULL)) {
3692 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3694 ctxt->sax->error(ctxt->userData,
3695 "Entity value required\n");
3696 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003697 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003698 }
3699 if (URI) {
3700 xmlURIPtr uri;
3701
3702 uri = xmlParseURI((const char *)URI);
3703 if (uri == NULL) {
3704 ctxt->errNo = XML_ERR_INVALID_URI;
3705 if ((ctxt->sax != NULL) &&
3706 (!ctxt->disableSAX) &&
3707 (ctxt->sax->error != NULL))
3708 ctxt->sax->error(ctxt->userData,
3709 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003710 /*
3711 * This really ought to be a well formedness error
3712 * but the XML Core WG decided otherwise c.f. issue
3713 * E26 of the XML erratas.
3714 */
Owen Taylor3473f882001-02-23 17:55:21 +00003715 } else {
3716 if (uri->fragment != NULL) {
3717 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3718 if ((ctxt->sax != NULL) &&
3719 (!ctxt->disableSAX) &&
3720 (ctxt->sax->error != NULL))
3721 ctxt->sax->error(ctxt->userData,
3722 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003723 /*
3724 * Okay this is foolish to block those but not
3725 * invalid URIs.
3726 */
Owen Taylor3473f882001-02-23 17:55:21 +00003727 ctxt->wellFormed = 0;
3728 }
3729 xmlFreeURI(uri);
3730 }
3731 }
3732 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3733 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3735 ctxt->sax->error(ctxt->userData,
3736 "Space required before 'NDATA'\n");
3737 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003738 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003739 }
3740 SKIP_BLANKS;
3741 if ((RAW == 'N') && (NXT(1) == 'D') &&
3742 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3743 (NXT(4) == 'A')) {
3744 SKIP(5);
3745 if (!IS_BLANK(CUR)) {
3746 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3748 ctxt->sax->error(ctxt->userData,
3749 "Space required after 'NDATA'\n");
3750 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003751 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003752 }
3753 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003754 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003755 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3756 (ctxt->sax->unparsedEntityDecl != NULL))
3757 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3758 literal, URI, ndata);
3759 } else {
3760 if ((ctxt->sax != NULL) &&
3761 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3762 ctxt->sax->entityDecl(ctxt->userData, name,
3763 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3764 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003765 /*
3766 * For expat compatibility in SAX mode.
3767 * assuming the entity repalcement was asked for
3768 */
3769 if ((ctxt->replaceEntities != 0) &&
3770 ((ctxt->myDoc == NULL) ||
3771 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3772 if (ctxt->myDoc == NULL) {
3773 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3774 }
3775
3776 if (ctxt->myDoc->intSubset == NULL)
3777 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3778 BAD_CAST "fake", NULL, NULL);
3779 entityDecl(ctxt, name,
3780 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3781 literal, URI, NULL);
3782 }
Owen Taylor3473f882001-02-23 17:55:21 +00003783 }
3784 }
3785 }
3786 SKIP_BLANKS;
3787 if (RAW != '>') {
3788 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3790 ctxt->sax->error(ctxt->userData,
3791 "xmlParseEntityDecl: entity %s not terminated\n", name);
3792 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003793 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003794 } else {
3795 if (input != ctxt->input) {
3796 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3798 ctxt->sax->error(ctxt->userData,
3799"Entity declaration doesn't start and stop in the same entity\n");
3800 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003801 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003802 }
3803 NEXT;
3804 }
3805 if (orig != NULL) {
3806 /*
3807 * Ugly mechanism to save the raw entity value.
3808 */
3809 xmlEntityPtr cur = NULL;
3810
3811 if (isParameter) {
3812 if ((ctxt->sax != NULL) &&
3813 (ctxt->sax->getParameterEntity != NULL))
3814 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3815 } else {
3816 if ((ctxt->sax != NULL) &&
3817 (ctxt->sax->getEntity != NULL))
3818 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003819 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3820 cur = getEntity(ctxt, name);
3821 }
Owen Taylor3473f882001-02-23 17:55:21 +00003822 }
3823 if (cur != NULL) {
3824 if (cur->orig != NULL)
3825 xmlFree(orig);
3826 else
3827 cur->orig = orig;
3828 } else
3829 xmlFree(orig);
3830 }
Owen Taylor3473f882001-02-23 17:55:21 +00003831 if (value != NULL) xmlFree(value);
3832 if (URI != NULL) xmlFree(URI);
3833 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00003834 }
3835}
3836
3837/**
3838 * xmlParseDefaultDecl:
3839 * @ctxt: an XML parser context
3840 * @value: Receive a possible fixed default value for the attribute
3841 *
3842 * Parse an attribute default declaration
3843 *
3844 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3845 *
3846 * [ VC: Required Attribute ]
3847 * if the default declaration is the keyword #REQUIRED, then the
3848 * attribute must be specified for all elements of the type in the
3849 * attribute-list declaration.
3850 *
3851 * [ VC: Attribute Default Legal ]
3852 * The declared default value must meet the lexical constraints of
3853 * the declared attribute type c.f. xmlValidateAttributeDecl()
3854 *
3855 * [ VC: Fixed Attribute Default ]
3856 * if an attribute has a default value declared with the #FIXED
3857 * keyword, instances of that attribute must match the default value.
3858 *
3859 * [ WFC: No < in Attribute Values ]
3860 * handled in xmlParseAttValue()
3861 *
3862 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3863 * or XML_ATTRIBUTE_FIXED.
3864 */
3865
3866int
3867xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3868 int val;
3869 xmlChar *ret;
3870
3871 *value = NULL;
3872 if ((RAW == '#') && (NXT(1) == 'R') &&
3873 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3874 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3875 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3876 (NXT(8) == 'D')) {
3877 SKIP(9);
3878 return(XML_ATTRIBUTE_REQUIRED);
3879 }
3880 if ((RAW == '#') && (NXT(1) == 'I') &&
3881 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3882 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3883 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3884 SKIP(8);
3885 return(XML_ATTRIBUTE_IMPLIED);
3886 }
3887 val = XML_ATTRIBUTE_NONE;
3888 if ((RAW == '#') && (NXT(1) == 'F') &&
3889 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3890 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3891 SKIP(6);
3892 val = XML_ATTRIBUTE_FIXED;
3893 if (!IS_BLANK(CUR)) {
3894 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3896 ctxt->sax->error(ctxt->userData,
3897 "Space required after '#FIXED'\n");
3898 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003899 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003900 }
3901 SKIP_BLANKS;
3902 }
3903 ret = xmlParseAttValue(ctxt);
3904 ctxt->instate = XML_PARSER_DTD;
3905 if (ret == NULL) {
3906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3907 ctxt->sax->error(ctxt->userData,
3908 "Attribute default value declaration error\n");
3909 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003910 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003911 } else
3912 *value = ret;
3913 return(val);
3914}
3915
3916/**
3917 * xmlParseNotationType:
3918 * @ctxt: an XML parser context
3919 *
3920 * parse an Notation attribute type.
3921 *
3922 * Note: the leading 'NOTATION' S part has already being parsed...
3923 *
3924 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3925 *
3926 * [ VC: Notation Attributes ]
3927 * Values of this type must match one of the notation names included
3928 * in the declaration; all notation names in the declaration must be declared.
3929 *
3930 * Returns: the notation attribute tree built while parsing
3931 */
3932
3933xmlEnumerationPtr
3934xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003935 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003936 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3937
3938 if (RAW != '(') {
3939 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3941 ctxt->sax->error(ctxt->userData,
3942 "'(' required to start 'NOTATION'\n");
3943 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003944 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003945 return(NULL);
3946 }
3947 SHRINK;
3948 do {
3949 NEXT;
3950 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003951 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003952 if (name == NULL) {
3953 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3955 ctxt->sax->error(ctxt->userData,
3956 "Name expected in NOTATION declaration\n");
3957 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003958 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003959 return(ret);
3960 }
3961 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00003962 if (cur == NULL) return(ret);
3963 if (last == NULL) ret = last = cur;
3964 else {
3965 last->next = cur;
3966 last = cur;
3967 }
3968 SKIP_BLANKS;
3969 } while (RAW == '|');
3970 if (RAW != ')') {
3971 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3973 ctxt->sax->error(ctxt->userData,
3974 "')' required to finish NOTATION declaration\n");
3975 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003976 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003977 if ((last != NULL) && (last != ret))
3978 xmlFreeEnumeration(last);
3979 return(ret);
3980 }
3981 NEXT;
3982 return(ret);
3983}
3984
3985/**
3986 * xmlParseEnumerationType:
3987 * @ctxt: an XML parser context
3988 *
3989 * parse an Enumeration attribute type.
3990 *
3991 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3992 *
3993 * [ VC: Enumeration ]
3994 * Values of this type must match one of the Nmtoken tokens in
3995 * the declaration
3996 *
3997 * Returns: the enumeration attribute tree built while parsing
3998 */
3999
4000xmlEnumerationPtr
4001xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4002 xmlChar *name;
4003 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4004
4005 if (RAW != '(') {
4006 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4008 ctxt->sax->error(ctxt->userData,
4009 "'(' required to start ATTLIST enumeration\n");
4010 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004011 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004012 return(NULL);
4013 }
4014 SHRINK;
4015 do {
4016 NEXT;
4017 SKIP_BLANKS;
4018 name = xmlParseNmtoken(ctxt);
4019 if (name == NULL) {
4020 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4022 ctxt->sax->error(ctxt->userData,
4023 "NmToken expected in ATTLIST enumeration\n");
4024 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004026 return(ret);
4027 }
4028 cur = xmlCreateEnumeration(name);
4029 xmlFree(name);
4030 if (cur == NULL) return(ret);
4031 if (last == NULL) ret = last = cur;
4032 else {
4033 last->next = cur;
4034 last = cur;
4035 }
4036 SKIP_BLANKS;
4037 } while (RAW == '|');
4038 if (RAW != ')') {
4039 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4041 ctxt->sax->error(ctxt->userData,
4042 "')' required to finish ATTLIST enumeration\n");
4043 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004044 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004045 return(ret);
4046 }
4047 NEXT;
4048 return(ret);
4049}
4050
4051/**
4052 * xmlParseEnumeratedType:
4053 * @ctxt: an XML parser context
4054 * @tree: the enumeration tree built while parsing
4055 *
4056 * parse an Enumerated attribute type.
4057 *
4058 * [57] EnumeratedType ::= NotationType | Enumeration
4059 *
4060 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4061 *
4062 *
4063 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4064 */
4065
4066int
4067xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4068 if ((RAW == 'N') && (NXT(1) == 'O') &&
4069 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4070 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4071 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4072 SKIP(8);
4073 if (!IS_BLANK(CUR)) {
4074 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4076 ctxt->sax->error(ctxt->userData,
4077 "Space required after 'NOTATION'\n");
4078 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004079 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004080 return(0);
4081 }
4082 SKIP_BLANKS;
4083 *tree = xmlParseNotationType(ctxt);
4084 if (*tree == NULL) return(0);
4085 return(XML_ATTRIBUTE_NOTATION);
4086 }
4087 *tree = xmlParseEnumerationType(ctxt);
4088 if (*tree == NULL) return(0);
4089 return(XML_ATTRIBUTE_ENUMERATION);
4090}
4091
4092/**
4093 * xmlParseAttributeType:
4094 * @ctxt: an XML parser context
4095 * @tree: the enumeration tree built while parsing
4096 *
4097 * parse the Attribute list def for an element
4098 *
4099 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4100 *
4101 * [55] StringType ::= 'CDATA'
4102 *
4103 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4104 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4105 *
4106 * Validity constraints for attribute values syntax are checked in
4107 * xmlValidateAttributeValue()
4108 *
4109 * [ VC: ID ]
4110 * Values of type ID must match the Name production. A name must not
4111 * appear more than once in an XML document as a value of this type;
4112 * i.e., ID values must uniquely identify the elements which bear them.
4113 *
4114 * [ VC: One ID per Element Type ]
4115 * No element type may have more than one ID attribute specified.
4116 *
4117 * [ VC: ID Attribute Default ]
4118 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4119 *
4120 * [ VC: IDREF ]
4121 * Values of type IDREF must match the Name production, and values
4122 * of type IDREFS must match Names; each IDREF Name must match the value
4123 * of an ID attribute on some element in the XML document; i.e. IDREF
4124 * values must match the value of some ID attribute.
4125 *
4126 * [ VC: Entity Name ]
4127 * Values of type ENTITY must match the Name production, values
4128 * of type ENTITIES must match Names; each Entity Name must match the
4129 * name of an unparsed entity declared in the DTD.
4130 *
4131 * [ VC: Name Token ]
4132 * Values of type NMTOKEN must match the Nmtoken production; values
4133 * of type NMTOKENS must match Nmtokens.
4134 *
4135 * Returns the attribute type
4136 */
4137int
4138xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4139 SHRINK;
4140 if ((RAW == 'C') && (NXT(1) == 'D') &&
4141 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4142 (NXT(4) == 'A')) {
4143 SKIP(5);
4144 return(XML_ATTRIBUTE_CDATA);
4145 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4146 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4147 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4148 SKIP(6);
4149 return(XML_ATTRIBUTE_IDREFS);
4150 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4151 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4152 (NXT(4) == 'F')) {
4153 SKIP(5);
4154 return(XML_ATTRIBUTE_IDREF);
4155 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4156 SKIP(2);
4157 return(XML_ATTRIBUTE_ID);
4158 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4159 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4160 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4161 SKIP(6);
4162 return(XML_ATTRIBUTE_ENTITY);
4163 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4164 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4165 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4166 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4167 SKIP(8);
4168 return(XML_ATTRIBUTE_ENTITIES);
4169 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4170 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4171 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4172 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4173 SKIP(8);
4174 return(XML_ATTRIBUTE_NMTOKENS);
4175 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4176 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4177 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4178 (NXT(6) == 'N')) {
4179 SKIP(7);
4180 return(XML_ATTRIBUTE_NMTOKEN);
4181 }
4182 return(xmlParseEnumeratedType(ctxt, tree));
4183}
4184
4185/**
4186 * xmlParseAttributeListDecl:
4187 * @ctxt: an XML parser context
4188 *
4189 * : parse the Attribute list def for an element
4190 *
4191 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4192 *
4193 * [53] AttDef ::= S Name S AttType S DefaultDecl
4194 *
4195 */
4196void
4197xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004198 const xmlChar *elemName;
4199 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004200 xmlEnumerationPtr tree;
4201
4202 if ((RAW == '<') && (NXT(1) == '!') &&
4203 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4204 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4205 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4206 (NXT(8) == 'T')) {
4207 xmlParserInputPtr input = ctxt->input;
4208
4209 SKIP(9);
4210 if (!IS_BLANK(CUR)) {
4211 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4213 ctxt->sax->error(ctxt->userData,
4214 "Space required after '<!ATTLIST'\n");
4215 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004216 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004217 }
4218 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004219 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004220 if (elemName == NULL) {
4221 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4223 ctxt->sax->error(ctxt->userData,
4224 "ATTLIST: no name for Element\n");
4225 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004226 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004227 return;
4228 }
4229 SKIP_BLANKS;
4230 GROW;
4231 while (RAW != '>') {
4232 const xmlChar *check = CUR_PTR;
4233 int type;
4234 int def;
4235 xmlChar *defaultValue = NULL;
4236
4237 GROW;
4238 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004239 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004240 if (attrName == NULL) {
4241 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4242 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4243 ctxt->sax->error(ctxt->userData,
4244 "ATTLIST: no name for Attribute\n");
4245 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004246 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004247 break;
4248 }
4249 GROW;
4250 if (!IS_BLANK(CUR)) {
4251 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4253 ctxt->sax->error(ctxt->userData,
4254 "Space required after the attribute name\n");
4255 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004256 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004257 if (defaultValue != NULL)
4258 xmlFree(defaultValue);
4259 break;
4260 }
4261 SKIP_BLANKS;
4262
4263 type = xmlParseAttributeType(ctxt, &tree);
4264 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004265 if (defaultValue != NULL)
4266 xmlFree(defaultValue);
4267 break;
4268 }
4269
4270 GROW;
4271 if (!IS_BLANK(CUR)) {
4272 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4274 ctxt->sax->error(ctxt->userData,
4275 "Space required after the attribute type\n");
4276 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004277 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004278 if (defaultValue != NULL)
4279 xmlFree(defaultValue);
4280 if (tree != NULL)
4281 xmlFreeEnumeration(tree);
4282 break;
4283 }
4284 SKIP_BLANKS;
4285
4286 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4287 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004288 if (defaultValue != NULL)
4289 xmlFree(defaultValue);
4290 if (tree != NULL)
4291 xmlFreeEnumeration(tree);
4292 break;
4293 }
4294
4295 GROW;
4296 if (RAW != '>') {
4297 if (!IS_BLANK(CUR)) {
4298 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4300 ctxt->sax->error(ctxt->userData,
4301 "Space required after the attribute default value\n");
4302 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004303 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004304 if (defaultValue != NULL)
4305 xmlFree(defaultValue);
4306 if (tree != NULL)
4307 xmlFreeEnumeration(tree);
4308 break;
4309 }
4310 SKIP_BLANKS;
4311 }
4312 if (check == CUR_PTR) {
4313 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4314 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4315 ctxt->sax->error(ctxt->userData,
4316 "xmlParseAttributeListDecl: detected internal error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004317 if (defaultValue != NULL)
4318 xmlFree(defaultValue);
4319 if (tree != NULL)
4320 xmlFreeEnumeration(tree);
4321 break;
4322 }
4323 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4324 (ctxt->sax->attributeDecl != NULL))
4325 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4326 type, def, defaultValue, tree);
Owen Taylor3473f882001-02-23 17:55:21 +00004327 if (defaultValue != NULL)
4328 xmlFree(defaultValue);
4329 GROW;
4330 }
4331 if (RAW == '>') {
4332 if (input != ctxt->input) {
4333 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4335 ctxt->sax->error(ctxt->userData,
4336"Attribute list declaration doesn't start and stop in the same entity\n");
4337 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004338 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004339 }
4340 NEXT;
4341 }
Owen Taylor3473f882001-02-23 17:55:21 +00004342 }
4343}
4344
4345/**
4346 * xmlParseElementMixedContentDecl:
4347 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004348 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004349 *
4350 * parse the declaration for a Mixed Element content
4351 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4352 *
4353 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4354 * '(' S? '#PCDATA' S? ')'
4355 *
4356 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4357 *
4358 * [ VC: No Duplicate Types ]
4359 * The same name must not appear more than once in a single
4360 * mixed-content declaration.
4361 *
4362 * returns: the list of the xmlElementContentPtr describing the element choices
4363 */
4364xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004365xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004366 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004367 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004368
4369 GROW;
4370 if ((RAW == '#') && (NXT(1) == 'P') &&
4371 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4372 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4373 (NXT(6) == 'A')) {
4374 SKIP(7);
4375 SKIP_BLANKS;
4376 SHRINK;
4377 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004378 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4379 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4380 if (ctxt->vctxt.error != NULL)
4381 ctxt->vctxt.error(ctxt->vctxt.userData,
4382"Element content declaration doesn't start and stop in the same entity\n");
4383 ctxt->valid = 0;
4384 }
Owen Taylor3473f882001-02-23 17:55:21 +00004385 NEXT;
4386 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4387 if (RAW == '*') {
4388 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4389 NEXT;
4390 }
4391 return(ret);
4392 }
4393 if ((RAW == '(') || (RAW == '|')) {
4394 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4395 if (ret == NULL) return(NULL);
4396 }
4397 while (RAW == '|') {
4398 NEXT;
4399 if (elem == NULL) {
4400 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4401 if (ret == NULL) return(NULL);
4402 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004403 if (cur != NULL)
4404 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004405 cur = ret;
4406 } else {
4407 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4408 if (n == NULL) return(NULL);
4409 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004410 if (n->c1 != NULL)
4411 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004412 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004413 if (n != NULL)
4414 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004415 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004416 }
4417 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004418 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004419 if (elem == NULL) {
4420 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4421 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4422 ctxt->sax->error(ctxt->userData,
4423 "xmlParseElementMixedContentDecl : Name expected\n");
4424 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004425 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004426 xmlFreeElementContent(cur);
4427 return(NULL);
4428 }
4429 SKIP_BLANKS;
4430 GROW;
4431 }
4432 if ((RAW == ')') && (NXT(1) == '*')) {
4433 if (elem != NULL) {
4434 cur->c2 = xmlNewElementContent(elem,
4435 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004436 if (cur->c2 != NULL)
4437 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004438 }
4439 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004440 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4441 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4442 if (ctxt->vctxt.error != NULL)
4443 ctxt->vctxt.error(ctxt->vctxt.userData,
4444"Element content declaration doesn't start and stop in the same entity\n");
4445 ctxt->valid = 0;
4446 }
Owen Taylor3473f882001-02-23 17:55:21 +00004447 SKIP(2);
4448 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004449 xmlFreeElementContent(ret);
4450 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4451 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4452 ctxt->sax->error(ctxt->userData,
4453 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4454 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004455 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004456 return(NULL);
4457 }
4458
4459 } else {
4460 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4462 ctxt->sax->error(ctxt->userData,
4463 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4464 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004465 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004466 }
4467 return(ret);
4468}
4469
4470/**
4471 * xmlParseElementChildrenContentDecl:
4472 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004473 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004474 *
4475 * parse the declaration for a Mixed Element content
4476 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4477 *
4478 *
4479 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4480 *
4481 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4482 *
4483 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4484 *
4485 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4486 *
4487 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4488 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004489 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004490 * opening or closing parentheses in a choice, seq, or Mixed
4491 * construct is contained in the replacement text for a parameter
4492 * entity, both must be contained in the same replacement text. For
4493 * interoperability, if a parameter-entity reference appears in a
4494 * choice, seq, or Mixed construct, its replacement text should not
4495 * be empty, and neither the first nor last non-blank character of
4496 * the replacement text should be a connector (| or ,).
4497 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004498 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004499 * hierarchy.
4500 */
4501xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004502xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004503(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004504 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004505 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004506 xmlChar type = 0;
4507
4508 SKIP_BLANKS;
4509 GROW;
4510 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004511 xmlParserInputPtr input = ctxt->input;
4512
Owen Taylor3473f882001-02-23 17:55:21 +00004513 /* Recurse on first child */
4514 NEXT;
4515 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004516 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004517 SKIP_BLANKS;
4518 GROW;
4519 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004520 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004521 if (elem == NULL) {
4522 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4524 ctxt->sax->error(ctxt->userData,
4525 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4526 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004527 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004528 return(NULL);
4529 }
4530 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004531 if (cur == NULL) {
4532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4533 ctxt->sax->error(ctxt->userData,
4534 "xmlParseElementChildrenContentDecl : out of memory\n");
4535 ctxt->errNo = XML_ERR_NO_MEMORY;
4536 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004537 return(NULL);
4538 }
Owen Taylor3473f882001-02-23 17:55:21 +00004539 GROW;
4540 if (RAW == '?') {
4541 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4542 NEXT;
4543 } else if (RAW == '*') {
4544 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4545 NEXT;
4546 } else if (RAW == '+') {
4547 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4548 NEXT;
4549 } else {
4550 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4551 }
Owen Taylor3473f882001-02-23 17:55:21 +00004552 GROW;
4553 }
4554 SKIP_BLANKS;
4555 SHRINK;
4556 while (RAW != ')') {
4557 /*
4558 * Each loop we parse one separator and one element.
4559 */
4560 if (RAW == ',') {
4561 if (type == 0) type = CUR;
4562
4563 /*
4564 * Detect "Name | Name , Name" error
4565 */
4566 else if (type != CUR) {
4567 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4569 ctxt->sax->error(ctxt->userData,
4570 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4571 type);
4572 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004573 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004574 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004575 xmlFreeElementContent(last);
4576 if (ret != NULL)
4577 xmlFreeElementContent(ret);
4578 return(NULL);
4579 }
4580 NEXT;
4581
4582 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4583 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004584 if ((last != NULL) && (last != ret))
4585 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004586 xmlFreeElementContent(ret);
4587 return(NULL);
4588 }
4589 if (last == NULL) {
4590 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004591 if (ret != NULL)
4592 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004593 ret = cur = op;
4594 } else {
4595 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004596 if (op != NULL)
4597 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004598 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004599 if (last != NULL)
4600 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004601 cur =op;
4602 last = NULL;
4603 }
4604 } else if (RAW == '|') {
4605 if (type == 0) type = CUR;
4606
4607 /*
4608 * Detect "Name , Name | Name" error
4609 */
4610 else if (type != CUR) {
4611 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4613 ctxt->sax->error(ctxt->userData,
4614 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4615 type);
4616 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004617 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004618 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004619 xmlFreeElementContent(last);
4620 if (ret != NULL)
4621 xmlFreeElementContent(ret);
4622 return(NULL);
4623 }
4624 NEXT;
4625
4626 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4627 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004628 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004629 xmlFreeElementContent(last);
4630 if (ret != NULL)
4631 xmlFreeElementContent(ret);
4632 return(NULL);
4633 }
4634 if (last == NULL) {
4635 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004636 if (ret != NULL)
4637 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004638 ret = cur = op;
4639 } else {
4640 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004641 if (op != NULL)
4642 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004643 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004644 if (last != NULL)
4645 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004646 cur =op;
4647 last = NULL;
4648 }
4649 } else {
4650 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4652 ctxt->sax->error(ctxt->userData,
4653 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4654 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004656 if (ret != NULL)
4657 xmlFreeElementContent(ret);
4658 return(NULL);
4659 }
4660 GROW;
4661 SKIP_BLANKS;
4662 GROW;
4663 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004664 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004665 /* Recurse on second child */
4666 NEXT;
4667 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004668 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004669 SKIP_BLANKS;
4670 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004671 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004672 if (elem == NULL) {
4673 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4675 ctxt->sax->error(ctxt->userData,
4676 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4677 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004678 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004679 if (ret != NULL)
4680 xmlFreeElementContent(ret);
4681 return(NULL);
4682 }
4683 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004684 if (RAW == '?') {
4685 last->ocur = XML_ELEMENT_CONTENT_OPT;
4686 NEXT;
4687 } else if (RAW == '*') {
4688 last->ocur = XML_ELEMENT_CONTENT_MULT;
4689 NEXT;
4690 } else if (RAW == '+') {
4691 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4692 NEXT;
4693 } else {
4694 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4695 }
4696 }
4697 SKIP_BLANKS;
4698 GROW;
4699 }
4700 if ((cur != NULL) && (last != NULL)) {
4701 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004702 if (last != NULL)
4703 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004704 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004705 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4706 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4707 if (ctxt->vctxt.error != NULL)
4708 ctxt->vctxt.error(ctxt->vctxt.userData,
4709"Element content declaration doesn't start and stop in the same entity\n");
4710 ctxt->valid = 0;
4711 }
Owen Taylor3473f882001-02-23 17:55:21 +00004712 NEXT;
4713 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004714 if (ret != NULL)
4715 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004716 NEXT;
4717 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004718 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004719 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004720 cur = ret;
4721 /*
4722 * Some normalization:
4723 * (a | b* | c?)* == (a | b | c)*
4724 */
4725 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4726 if ((cur->c1 != NULL) &&
4727 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4728 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4729 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4730 if ((cur->c2 != NULL) &&
4731 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4732 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4733 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4734 cur = cur->c2;
4735 }
4736 }
Owen Taylor3473f882001-02-23 17:55:21 +00004737 NEXT;
4738 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004739 if (ret != NULL) {
4740 int found = 0;
4741
Daniel Veillarde470df72001-04-18 21:41:07 +00004742 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004743 /*
4744 * Some normalization:
4745 * (a | b*)+ == (a | b)*
4746 * (a | b?)+ == (a | b)*
4747 */
4748 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4749 if ((cur->c1 != NULL) &&
4750 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4751 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4752 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4753 found = 1;
4754 }
4755 if ((cur->c2 != NULL) &&
4756 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4757 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4758 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4759 found = 1;
4760 }
4761 cur = cur->c2;
4762 }
4763 if (found)
4764 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4765 }
Owen Taylor3473f882001-02-23 17:55:21 +00004766 NEXT;
4767 }
4768 return(ret);
4769}
4770
4771/**
4772 * xmlParseElementContentDecl:
4773 * @ctxt: an XML parser context
4774 * @name: the name of the element being defined.
4775 * @result: the Element Content pointer will be stored here if any
4776 *
4777 * parse the declaration for an Element content either Mixed or Children,
4778 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4779 *
4780 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4781 *
4782 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4783 */
4784
4785int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004786xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004787 xmlElementContentPtr *result) {
4788
4789 xmlElementContentPtr tree = NULL;
4790 xmlParserInputPtr input = ctxt->input;
4791 int res;
4792
4793 *result = NULL;
4794
4795 if (RAW != '(') {
4796 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4798 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004799 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004800 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004801 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004802 return(-1);
4803 }
4804 NEXT;
4805 GROW;
4806 SKIP_BLANKS;
4807 if ((RAW == '#') && (NXT(1) == 'P') &&
4808 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4809 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4810 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004811 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004812 res = XML_ELEMENT_TYPE_MIXED;
4813 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004814 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004815 res = XML_ELEMENT_TYPE_ELEMENT;
4816 }
Owen Taylor3473f882001-02-23 17:55:21 +00004817 SKIP_BLANKS;
4818 *result = tree;
4819 return(res);
4820}
4821
4822/**
4823 * xmlParseElementDecl:
4824 * @ctxt: an XML parser context
4825 *
4826 * parse an Element declaration.
4827 *
4828 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4829 *
4830 * [ VC: Unique Element Type Declaration ]
4831 * No element type may be declared more than once
4832 *
4833 * Returns the type of the element, or -1 in case of error
4834 */
4835int
4836xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004837 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004838 int ret = -1;
4839 xmlElementContentPtr content = NULL;
4840
4841 GROW;
4842 if ((RAW == '<') && (NXT(1) == '!') &&
4843 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4844 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4845 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4846 (NXT(8) == 'T')) {
4847 xmlParserInputPtr input = ctxt->input;
4848
4849 SKIP(9);
4850 if (!IS_BLANK(CUR)) {
4851 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4852 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4853 ctxt->sax->error(ctxt->userData,
4854 "Space required after 'ELEMENT'\n");
4855 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004856 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004857 }
4858 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004859 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004860 if (name == NULL) {
4861 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4863 ctxt->sax->error(ctxt->userData,
4864 "xmlParseElementDecl: no name for Element\n");
4865 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004866 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004867 return(-1);
4868 }
4869 while ((RAW == 0) && (ctxt->inputNr > 1))
4870 xmlPopInput(ctxt);
4871 if (!IS_BLANK(CUR)) {
4872 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4874 ctxt->sax->error(ctxt->userData,
4875 "Space required after the element name\n");
4876 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004877 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004878 }
4879 SKIP_BLANKS;
4880 if ((RAW == 'E') && (NXT(1) == 'M') &&
4881 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4882 (NXT(4) == 'Y')) {
4883 SKIP(5);
4884 /*
4885 * Element must always be empty.
4886 */
4887 ret = XML_ELEMENT_TYPE_EMPTY;
4888 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4889 (NXT(2) == 'Y')) {
4890 SKIP(3);
4891 /*
4892 * Element is a generic container.
4893 */
4894 ret = XML_ELEMENT_TYPE_ANY;
4895 } else if (RAW == '(') {
4896 ret = xmlParseElementContentDecl(ctxt, name, &content);
4897 } else {
4898 /*
4899 * [ WFC: PEs in Internal Subset ] error handling.
4900 */
4901 if ((RAW == '%') && (ctxt->external == 0) &&
4902 (ctxt->inputNr == 1)) {
4903 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4904 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4905 ctxt->sax->error(ctxt->userData,
4906 "PEReference: forbidden within markup decl in internal subset\n");
4907 } else {
4908 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4910 ctxt->sax->error(ctxt->userData,
4911 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4912 }
4913 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004914 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004915 return(-1);
4916 }
4917
4918 SKIP_BLANKS;
4919 /*
4920 * Pop-up of finished entities.
4921 */
4922 while ((RAW == 0) && (ctxt->inputNr > 1))
4923 xmlPopInput(ctxt);
4924 SKIP_BLANKS;
4925
4926 if (RAW != '>') {
4927 ctxt->errNo = XML_ERR_GT_REQUIRED;
4928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4929 ctxt->sax->error(ctxt->userData,
4930 "xmlParseElementDecl: expected '>' at the end\n");
4931 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004932 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004933 } else {
4934 if (input != ctxt->input) {
4935 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4937 ctxt->sax->error(ctxt->userData,
4938"Element declaration doesn't start and stop in the same entity\n");
4939 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004940 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004941 }
4942
4943 NEXT;
4944 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4945 (ctxt->sax->elementDecl != NULL))
4946 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4947 content);
4948 }
4949 if (content != NULL) {
4950 xmlFreeElementContent(content);
4951 }
Owen Taylor3473f882001-02-23 17:55:21 +00004952 }
4953 return(ret);
4954}
4955
4956/**
Owen Taylor3473f882001-02-23 17:55:21 +00004957 * xmlParseConditionalSections
4958 * @ctxt: an XML parser context
4959 *
4960 * [61] conditionalSect ::= includeSect | ignoreSect
4961 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4962 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4963 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4964 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4965 */
4966
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004967static void
Owen Taylor3473f882001-02-23 17:55:21 +00004968xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4969 SKIP(3);
4970 SKIP_BLANKS;
4971 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4972 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4973 (NXT(6) == 'E')) {
4974 SKIP(7);
4975 SKIP_BLANKS;
4976 if (RAW != '[') {
4977 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4979 ctxt->sax->error(ctxt->userData,
4980 "XML conditional section '[' expected\n");
4981 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004982 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004983 } else {
4984 NEXT;
4985 }
4986 if (xmlParserDebugEntities) {
4987 if ((ctxt->input != NULL) && (ctxt->input->filename))
4988 xmlGenericError(xmlGenericErrorContext,
4989 "%s(%d): ", ctxt->input->filename,
4990 ctxt->input->line);
4991 xmlGenericError(xmlGenericErrorContext,
4992 "Entering INCLUDE Conditional Section\n");
4993 }
4994
4995 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4996 (NXT(2) != '>'))) {
4997 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00004998 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00004999
5000 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5001 xmlParseConditionalSections(ctxt);
5002 } else if (IS_BLANK(CUR)) {
5003 NEXT;
5004 } else if (RAW == '%') {
5005 xmlParsePEReference(ctxt);
5006 } else
5007 xmlParseMarkupDecl(ctxt);
5008
5009 /*
5010 * Pop-up of finished entities.
5011 */
5012 while ((RAW == 0) && (ctxt->inputNr > 1))
5013 xmlPopInput(ctxt);
5014
Daniel Veillardfdc91562002-07-01 21:52:03 +00005015 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005016 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5018 ctxt->sax->error(ctxt->userData,
5019 "Content error in the external subset\n");
5020 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005021 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005022 break;
5023 }
5024 }
5025 if (xmlParserDebugEntities) {
5026 if ((ctxt->input != NULL) && (ctxt->input->filename))
5027 xmlGenericError(xmlGenericErrorContext,
5028 "%s(%d): ", ctxt->input->filename,
5029 ctxt->input->line);
5030 xmlGenericError(xmlGenericErrorContext,
5031 "Leaving INCLUDE Conditional Section\n");
5032 }
5033
5034 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5035 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5036 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005037 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005038 int depth = 0;
5039
5040 SKIP(6);
5041 SKIP_BLANKS;
5042 if (RAW != '[') {
5043 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5045 ctxt->sax->error(ctxt->userData,
5046 "XML conditional section '[' expected\n");
5047 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005048 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005049 } else {
5050 NEXT;
5051 }
5052 if (xmlParserDebugEntities) {
5053 if ((ctxt->input != NULL) && (ctxt->input->filename))
5054 xmlGenericError(xmlGenericErrorContext,
5055 "%s(%d): ", ctxt->input->filename,
5056 ctxt->input->line);
5057 xmlGenericError(xmlGenericErrorContext,
5058 "Entering IGNORE Conditional Section\n");
5059 }
5060
5061 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005062 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005063 * But disable SAX event generating DTD building in the meantime
5064 */
5065 state = ctxt->disableSAX;
5066 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005067 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005068 ctxt->instate = XML_PARSER_IGNORE;
5069
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005070 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005071 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5072 depth++;
5073 SKIP(3);
5074 continue;
5075 }
5076 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5077 if (--depth >= 0) SKIP(3);
5078 continue;
5079 }
5080 NEXT;
5081 continue;
5082 }
5083
5084 ctxt->disableSAX = state;
5085 ctxt->instate = instate;
5086
5087 if (xmlParserDebugEntities) {
5088 if ((ctxt->input != NULL) && (ctxt->input->filename))
5089 xmlGenericError(xmlGenericErrorContext,
5090 "%s(%d): ", ctxt->input->filename,
5091 ctxt->input->line);
5092 xmlGenericError(xmlGenericErrorContext,
5093 "Leaving IGNORE Conditional Section\n");
5094 }
5095
5096 } else {
5097 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5099 ctxt->sax->error(ctxt->userData,
5100 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5101 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005102 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005103 }
5104
5105 if (RAW == 0)
5106 SHRINK;
5107
5108 if (RAW == 0) {
5109 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5111 ctxt->sax->error(ctxt->userData,
5112 "XML conditional section not closed\n");
5113 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005114 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005115 } else {
5116 SKIP(3);
5117 }
5118}
5119
5120/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005121 * xmlParseMarkupDecl:
5122 * @ctxt: an XML parser context
5123 *
5124 * parse Markup declarations
5125 *
5126 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5127 * NotationDecl | PI | Comment
5128 *
5129 * [ VC: Proper Declaration/PE Nesting ]
5130 * Parameter-entity replacement text must be properly nested with
5131 * markup declarations. That is to say, if either the first character
5132 * or the last character of a markup declaration (markupdecl above) is
5133 * contained in the replacement text for a parameter-entity reference,
5134 * both must be contained in the same replacement text.
5135 *
5136 * [ WFC: PEs in Internal Subset ]
5137 * In the internal DTD subset, parameter-entity references can occur
5138 * only where markup declarations can occur, not within markup declarations.
5139 * (This does not apply to references that occur in external parameter
5140 * entities or to the external subset.)
5141 */
5142void
5143xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5144 GROW;
5145 xmlParseElementDecl(ctxt);
5146 xmlParseAttributeListDecl(ctxt);
5147 xmlParseEntityDecl(ctxt);
5148 xmlParseNotationDecl(ctxt);
5149 xmlParsePI(ctxt);
5150 xmlParseComment(ctxt);
5151 /*
5152 * This is only for internal subset. On external entities,
5153 * the replacement is done before parsing stage
5154 */
5155 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5156 xmlParsePEReference(ctxt);
5157
5158 /*
5159 * Conditional sections are allowed from entities included
5160 * by PE References in the internal subset.
5161 */
5162 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5163 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5164 xmlParseConditionalSections(ctxt);
5165 }
5166 }
5167
5168 ctxt->instate = XML_PARSER_DTD;
5169}
5170
5171/**
5172 * xmlParseTextDecl:
5173 * @ctxt: an XML parser context
5174 *
5175 * parse an XML declaration header for external entities
5176 *
5177 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5178 *
5179 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5180 */
5181
5182void
5183xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5184 xmlChar *version;
5185
5186 /*
5187 * We know that '<?xml' is here.
5188 */
5189 if ((RAW == '<') && (NXT(1) == '?') &&
5190 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5191 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5192 SKIP(5);
5193 } else {
5194 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5196 ctxt->sax->error(ctxt->userData,
5197 "Text declaration '<?xml' required\n");
5198 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005199 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005200
5201 return;
5202 }
5203
5204 if (!IS_BLANK(CUR)) {
5205 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5207 ctxt->sax->error(ctxt->userData,
5208 "Space needed after '<?xml'\n");
5209 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005211 }
5212 SKIP_BLANKS;
5213
5214 /*
5215 * We may have the VersionInfo here.
5216 */
5217 version = xmlParseVersionInfo(ctxt);
5218 if (version == NULL)
5219 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005220 else {
5221 if (!IS_BLANK(CUR)) {
5222 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5224 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5225 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005226 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005227 }
5228 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005229 ctxt->input->version = version;
5230
5231 /*
5232 * We must have the encoding declaration
5233 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005234 xmlParseEncodingDecl(ctxt);
5235 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5236 /*
5237 * The XML REC instructs us to stop parsing right here
5238 */
5239 return;
5240 }
5241
5242 SKIP_BLANKS;
5243 if ((RAW == '?') && (NXT(1) == '>')) {
5244 SKIP(2);
5245 } else if (RAW == '>') {
5246 /* Deprecated old WD ... */
5247 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5249 ctxt->sax->error(ctxt->userData,
5250 "XML declaration must end-up with '?>'\n");
5251 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005252 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005253 NEXT;
5254 } else {
5255 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5257 ctxt->sax->error(ctxt->userData,
5258 "parsing XML declaration: '?>' expected\n");
5259 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005260 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005261 MOVETO_ENDTAG(CUR_PTR);
5262 NEXT;
5263 }
5264}
5265
5266/**
Owen Taylor3473f882001-02-23 17:55:21 +00005267 * xmlParseExternalSubset:
5268 * @ctxt: an XML parser context
5269 * @ExternalID: the external identifier
5270 * @SystemID: the system identifier (or URL)
5271 *
5272 * parse Markup declarations from an external subset
5273 *
5274 * [30] extSubset ::= textDecl? extSubsetDecl
5275 *
5276 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5277 */
5278void
5279xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5280 const xmlChar *SystemID) {
5281 GROW;
5282 if ((RAW == '<') && (NXT(1) == '?') &&
5283 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5284 (NXT(4) == 'l')) {
5285 xmlParseTextDecl(ctxt);
5286 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5287 /*
5288 * The XML REC instructs us to stop parsing right here
5289 */
5290 ctxt->instate = XML_PARSER_EOF;
5291 return;
5292 }
5293 }
5294 if (ctxt->myDoc == NULL) {
5295 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5296 }
5297 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5298 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5299
5300 ctxt->instate = XML_PARSER_DTD;
5301 ctxt->external = 1;
5302 while (((RAW == '<') && (NXT(1) == '?')) ||
5303 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005304 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005305 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005306 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005307
5308 GROW;
5309 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5310 xmlParseConditionalSections(ctxt);
5311 } else if (IS_BLANK(CUR)) {
5312 NEXT;
5313 } else if (RAW == '%') {
5314 xmlParsePEReference(ctxt);
5315 } else
5316 xmlParseMarkupDecl(ctxt);
5317
5318 /*
5319 * Pop-up of finished entities.
5320 */
5321 while ((RAW == 0) && (ctxt->inputNr > 1))
5322 xmlPopInput(ctxt);
5323
Daniel Veillardfdc91562002-07-01 21:52:03 +00005324 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005325 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5327 ctxt->sax->error(ctxt->userData,
5328 "Content error in the external subset\n");
5329 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005330 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005331 break;
5332 }
5333 }
5334
5335 if (RAW != 0) {
5336 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5338 ctxt->sax->error(ctxt->userData,
5339 "Extra content at the end of the document\n");
5340 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005341 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005342 }
5343
5344}
5345
5346/**
5347 * xmlParseReference:
5348 * @ctxt: an XML parser context
5349 *
5350 * parse and handle entity references in content, depending on the SAX
5351 * interface, this may end-up in a call to character() if this is a
5352 * CharRef, a predefined entity, if there is no reference() callback.
5353 * or if the parser was asked to switch to that mode.
5354 *
5355 * [67] Reference ::= EntityRef | CharRef
5356 */
5357void
5358xmlParseReference(xmlParserCtxtPtr ctxt) {
5359 xmlEntityPtr ent;
5360 xmlChar *val;
5361 if (RAW != '&') return;
5362
5363 if (NXT(1) == '#') {
5364 int i = 0;
5365 xmlChar out[10];
5366 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005367 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005368
5369 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5370 /*
5371 * So we are using non-UTF-8 buffers
5372 * Check that the char fit on 8bits, if not
5373 * generate a CharRef.
5374 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005375 if (value <= 0xFF) {
5376 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005377 out[1] = 0;
5378 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5379 (!ctxt->disableSAX))
5380 ctxt->sax->characters(ctxt->userData, out, 1);
5381 } else {
5382 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005383 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005384 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005385 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005386 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5387 (!ctxt->disableSAX))
5388 ctxt->sax->reference(ctxt->userData, out);
5389 }
5390 } else {
5391 /*
5392 * Just encode the value in UTF-8
5393 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005394 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005395 out[i] = 0;
5396 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5397 (!ctxt->disableSAX))
5398 ctxt->sax->characters(ctxt->userData, out, i);
5399 }
5400 } else {
5401 ent = xmlParseEntityRef(ctxt);
5402 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005403 if (!ctxt->wellFormed)
5404 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005405 if ((ent->name != NULL) &&
5406 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5407 xmlNodePtr list = NULL;
5408 int ret;
5409
5410
5411 /*
5412 * The first reference to the entity trigger a parsing phase
5413 * where the ent->children is filled with the result from
5414 * the parsing.
5415 */
5416 if (ent->children == NULL) {
5417 xmlChar *value;
5418 value = ent->content;
5419
5420 /*
5421 * Check that this entity is well formed
5422 */
5423 if ((value != NULL) &&
5424 (value[1] == 0) && (value[0] == '<') &&
5425 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5426 /*
5427 * DONE: get definite answer on this !!!
5428 * Lots of entity decls are used to declare a single
5429 * char
5430 * <!ENTITY lt "<">
5431 * Which seems to be valid since
5432 * 2.4: The ampersand character (&) and the left angle
5433 * bracket (<) may appear in their literal form only
5434 * when used ... They are also legal within the literal
5435 * entity value of an internal entity declaration;i
5436 * see "4.3.2 Well-Formed Parsed Entities".
5437 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5438 * Looking at the OASIS test suite and James Clark
5439 * tests, this is broken. However the XML REC uses
5440 * it. Is the XML REC not well-formed ????
5441 * This is a hack to avoid this problem
5442 *
5443 * ANSWER: since lt gt amp .. are already defined,
5444 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005445 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005446 * is lousy but acceptable.
5447 */
5448 list = xmlNewDocText(ctxt->myDoc, value);
5449 if (list != NULL) {
5450 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5451 (ent->children == NULL)) {
5452 ent->children = list;
5453 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005454 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005455 list->parent = (xmlNodePtr) ent;
5456 } else {
5457 xmlFreeNodeList(list);
5458 }
5459 } else if (list != NULL) {
5460 xmlFreeNodeList(list);
5461 }
5462 } else {
5463 /*
5464 * 4.3.2: An internal general parsed entity is well-formed
5465 * if its replacement text matches the production labeled
5466 * content.
5467 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005468
5469 void *user_data;
5470 /*
5471 * This is a bit hackish but this seems the best
5472 * way to make sure both SAX and DOM entity support
5473 * behaves okay.
5474 */
5475 if (ctxt->userData == ctxt)
5476 user_data = NULL;
5477 else
5478 user_data = ctxt->userData;
5479
Owen Taylor3473f882001-02-23 17:55:21 +00005480 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5481 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005482 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5483 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005484 ctxt->depth--;
5485 } else if (ent->etype ==
5486 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5487 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005488 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005489 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005490 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005491 ctxt->depth--;
5492 } else {
5493 ret = -1;
5494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5495 ctxt->sax->error(ctxt->userData,
5496 "Internal: invalid entity type\n");
5497 }
5498 if (ret == XML_ERR_ENTITY_LOOP) {
5499 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5501 ctxt->sax->error(ctxt->userData,
5502 "Detected entity reference loop\n");
5503 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005504 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005505 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005506 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005507 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5508 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005509 (ent->children == NULL)) {
5510 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005511 if (ctxt->replaceEntities) {
5512 /*
5513 * Prune it directly in the generated document
5514 * except for single text nodes.
5515 */
5516 if ((list->type == XML_TEXT_NODE) &&
5517 (list->next == NULL)) {
5518 list->parent = (xmlNodePtr) ent;
5519 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005520 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005521 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005522 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005523 while (list != NULL) {
5524 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005525 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005526 if (list->next == NULL)
5527 ent->last = list;
5528 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005529 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005530 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005531 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5532 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005533 }
5534 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005535 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005536 while (list != NULL) {
5537 list->parent = (xmlNodePtr) ent;
5538 if (list->next == NULL)
5539 ent->last = list;
5540 list = list->next;
5541 }
Owen Taylor3473f882001-02-23 17:55:21 +00005542 }
5543 } else {
5544 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005545 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005546 }
5547 } else if (ret > 0) {
5548 ctxt->errNo = ret;
5549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5550 ctxt->sax->error(ctxt->userData,
5551 "Entity value required\n");
5552 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005553 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005554 } else if (list != NULL) {
5555 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005556 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005557 }
5558 }
5559 }
5560 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5561 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5562 /*
5563 * Create a node.
5564 */
5565 ctxt->sax->reference(ctxt->userData, ent->name);
5566 return;
5567 } else if (ctxt->replaceEntities) {
5568 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5569 /*
5570 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005571 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005572 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005573 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005574 if ((list == NULL) && (ent->owner == 0)) {
5575 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005576 cur = ent->children;
5577 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005578 nw = xmlCopyNode(cur, 1);
5579 if (nw != NULL) {
5580 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005581 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005582 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005583 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005584 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005585 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005586 if (cur == ent->last)
5587 break;
5588 cur = cur->next;
5589 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005590 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005591 xmlAddEntityReference(ent, firstChild, nw);
5592 } else if (list == NULL) {
5593 xmlNodePtr nw = NULL, cur, next, last,
5594 firstChild = NULL;
5595 /*
5596 * Copy the entity child list and make it the new
5597 * entity child list. The goal is to make sure any
5598 * ID or REF referenced will be the one from the
5599 * document content and not the entity copy.
5600 */
5601 cur = ent->children;
5602 ent->children = NULL;
5603 last = ent->last;
5604 ent->last = NULL;
5605 while (cur != NULL) {
5606 next = cur->next;
5607 cur->next = NULL;
5608 cur->parent = NULL;
5609 nw = xmlCopyNode(cur, 1);
5610 if (nw != NULL) {
5611 nw->_private = cur->_private;
5612 if (firstChild == NULL){
5613 firstChild = cur;
5614 }
5615 xmlAddChild((xmlNodePtr) ent, nw);
5616 xmlAddChild(ctxt->node, cur);
5617 }
5618 if (cur == last)
5619 break;
5620 cur = next;
5621 }
5622 ent->owner = 1;
5623 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5624 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005625 } else {
5626 /*
5627 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005628 * node with a possible previous text one which
5629 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005630 */
5631 if (ent->children->type == XML_TEXT_NODE)
5632 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5633 if ((ent->last != ent->children) &&
5634 (ent->last->type == XML_TEXT_NODE))
5635 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5636 xmlAddChildList(ctxt->node, ent->children);
5637 }
5638
Owen Taylor3473f882001-02-23 17:55:21 +00005639 /*
5640 * This is to avoid a nasty side effect, see
5641 * characters() in SAX.c
5642 */
5643 ctxt->nodemem = 0;
5644 ctxt->nodelen = 0;
5645 return;
5646 } else {
5647 /*
5648 * Probably running in SAX mode
5649 */
5650 xmlParserInputPtr input;
5651
5652 input = xmlNewEntityInputStream(ctxt, ent);
5653 xmlPushInput(ctxt, input);
5654 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5655 (RAW == '<') && (NXT(1) == '?') &&
5656 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5657 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5658 xmlParseTextDecl(ctxt);
5659 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5660 /*
5661 * The XML REC instructs us to stop parsing right here
5662 */
5663 ctxt->instate = XML_PARSER_EOF;
5664 return;
5665 }
5666 if (input->standalone == 1) {
5667 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5669 ctxt->sax->error(ctxt->userData,
5670 "external parsed entities cannot be standalone\n");
5671 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005672 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005673 }
5674 }
5675 return;
5676 }
5677 }
5678 } else {
5679 val = ent->content;
5680 if (val == NULL) return;
5681 /*
5682 * inline the entity.
5683 */
5684 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5685 (!ctxt->disableSAX))
5686 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5687 }
5688 }
5689}
5690
5691/**
5692 * xmlParseEntityRef:
5693 * @ctxt: an XML parser context
5694 *
5695 * parse ENTITY references declarations
5696 *
5697 * [68] EntityRef ::= '&' Name ';'
5698 *
5699 * [ WFC: Entity Declared ]
5700 * In a document without any DTD, a document with only an internal DTD
5701 * subset which contains no parameter entity references, or a document
5702 * with "standalone='yes'", the Name given in the entity reference
5703 * must match that in an entity declaration, except that well-formed
5704 * documents need not declare any of the following entities: amp, lt,
5705 * gt, apos, quot. The declaration of a parameter entity must precede
5706 * any reference to it. Similarly, the declaration of a general entity
5707 * must precede any reference to it which appears in a default value in an
5708 * attribute-list declaration. Note that if entities are declared in the
5709 * external subset or in external parameter entities, a non-validating
5710 * processor is not obligated to read and process their declarations;
5711 * for such documents, the rule that an entity must be declared is a
5712 * well-formedness constraint only if standalone='yes'.
5713 *
5714 * [ WFC: Parsed Entity ]
5715 * An entity reference must not contain the name of an unparsed entity
5716 *
5717 * Returns the xmlEntityPtr if found, or NULL otherwise.
5718 */
5719xmlEntityPtr
5720xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005721 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005722 xmlEntityPtr ent = NULL;
5723
5724 GROW;
5725
5726 if (RAW == '&') {
5727 NEXT;
5728 name = xmlParseName(ctxt);
5729 if (name == NULL) {
5730 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5732 ctxt->sax->error(ctxt->userData,
5733 "xmlParseEntityRef: no name\n");
5734 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005735 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005736 } else {
5737 if (RAW == ';') {
5738 NEXT;
5739 /*
5740 * Ask first SAX for entity resolution, otherwise try the
5741 * predefined set.
5742 */
5743 if (ctxt->sax != NULL) {
5744 if (ctxt->sax->getEntity != NULL)
5745 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005746 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005747 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005748 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5749 (ctxt->userData==ctxt)) {
Daniel Veillard5997aca2002-03-18 18:36:20 +00005750 ent = getEntity(ctxt, name);
5751 }
Owen Taylor3473f882001-02-23 17:55:21 +00005752 }
5753 /*
5754 * [ WFC: Entity Declared ]
5755 * In a document without any DTD, a document with only an
5756 * internal DTD subset which contains no parameter entity
5757 * references, or a document with "standalone='yes'", the
5758 * Name given in the entity reference must match that in an
5759 * entity declaration, except that well-formed documents
5760 * need not declare any of the following entities: amp, lt,
5761 * gt, apos, quot.
5762 * The declaration of a parameter entity must precede any
5763 * reference to it.
5764 * Similarly, the declaration of a general entity must
5765 * precede any reference to it which appears in a default
5766 * value in an attribute-list declaration. Note that if
5767 * entities are declared in the external subset or in
5768 * external parameter entities, a non-validating processor
5769 * is not obligated to read and process their declarations;
5770 * for such documents, the rule that an entity must be
5771 * declared is a well-formedness constraint only if
5772 * standalone='yes'.
5773 */
5774 if (ent == NULL) {
5775 if ((ctxt->standalone == 1) ||
5776 ((ctxt->hasExternalSubset == 0) &&
5777 (ctxt->hasPErefs == 0))) {
5778 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5779 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5780 ctxt->sax->error(ctxt->userData,
5781 "Entity '%s' not defined\n", name);
5782 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005783 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005784 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005785 } else {
5786 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005788 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005789 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005790 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005791 }
5792 }
5793
5794 /*
5795 * [ WFC: Parsed Entity ]
5796 * An entity reference must not contain the name of an
5797 * unparsed entity
5798 */
5799 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5800 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5802 ctxt->sax->error(ctxt->userData,
5803 "Entity reference to unparsed entity %s\n", name);
5804 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005805 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005806 }
5807
5808 /*
5809 * [ WFC: No External Entity References ]
5810 * Attribute values cannot contain direct or indirect
5811 * entity references to external entities.
5812 */
5813 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5814 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5815 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5817 ctxt->sax->error(ctxt->userData,
5818 "Attribute references external entity '%s'\n", name);
5819 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005821 }
5822 /*
5823 * [ WFC: No < in Attribute Values ]
5824 * The replacement text of any entity referred to directly or
5825 * indirectly in an attribute value (other than "&lt;") must
5826 * not contain a <.
5827 */
5828 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5829 (ent != NULL) &&
5830 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5831 (ent->content != NULL) &&
5832 (xmlStrchr(ent->content, '<'))) {
5833 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5834 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5835 ctxt->sax->error(ctxt->userData,
5836 "'<' in entity '%s' is not allowed in attributes values\n", name);
5837 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005838 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005839 }
5840
5841 /*
5842 * Internal check, no parameter entities here ...
5843 */
5844 else {
5845 switch (ent->etype) {
5846 case XML_INTERNAL_PARAMETER_ENTITY:
5847 case XML_EXTERNAL_PARAMETER_ENTITY:
5848 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5849 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5850 ctxt->sax->error(ctxt->userData,
5851 "Attempt to reference the parameter entity '%s'\n", name);
5852 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005853 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005854 break;
5855 default:
5856 break;
5857 }
5858 }
5859
5860 /*
5861 * [ WFC: No Recursion ]
5862 * A parsed entity must not contain a recursive reference
5863 * to itself, either directly or indirectly.
5864 * Done somewhere else
5865 */
5866
5867 } else {
5868 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5870 ctxt->sax->error(ctxt->userData,
5871 "xmlParseEntityRef: expecting ';'\n");
5872 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005873 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005874 }
Owen Taylor3473f882001-02-23 17:55:21 +00005875 }
5876 }
5877 return(ent);
5878}
5879
5880/**
5881 * xmlParseStringEntityRef:
5882 * @ctxt: an XML parser context
5883 * @str: a pointer to an index in the string
5884 *
5885 * parse ENTITY references declarations, but this version parses it from
5886 * a string value.
5887 *
5888 * [68] EntityRef ::= '&' Name ';'
5889 *
5890 * [ WFC: Entity Declared ]
5891 * In a document without any DTD, a document with only an internal DTD
5892 * subset which contains no parameter entity references, or a document
5893 * with "standalone='yes'", the Name given in the entity reference
5894 * must match that in an entity declaration, except that well-formed
5895 * documents need not declare any of the following entities: amp, lt,
5896 * gt, apos, quot. The declaration of a parameter entity must precede
5897 * any reference to it. Similarly, the declaration of a general entity
5898 * must precede any reference to it which appears in a default value in an
5899 * attribute-list declaration. Note that if entities are declared in the
5900 * external subset or in external parameter entities, a non-validating
5901 * processor is not obligated to read and process their declarations;
5902 * for such documents, the rule that an entity must be declared is a
5903 * well-formedness constraint only if standalone='yes'.
5904 *
5905 * [ WFC: Parsed Entity ]
5906 * An entity reference must not contain the name of an unparsed entity
5907 *
5908 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5909 * is updated to the current location in the string.
5910 */
5911xmlEntityPtr
5912xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5913 xmlChar *name;
5914 const xmlChar *ptr;
5915 xmlChar cur;
5916 xmlEntityPtr ent = NULL;
5917
5918 if ((str == NULL) || (*str == NULL))
5919 return(NULL);
5920 ptr = *str;
5921 cur = *ptr;
5922 if (cur == '&') {
5923 ptr++;
5924 cur = *ptr;
5925 name = xmlParseStringName(ctxt, &ptr);
5926 if (name == NULL) {
5927 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5929 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005930 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005931 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005932 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005933 } else {
5934 if (*ptr == ';') {
5935 ptr++;
5936 /*
5937 * Ask first SAX for entity resolution, otherwise try the
5938 * predefined set.
5939 */
5940 if (ctxt->sax != NULL) {
5941 if (ctxt->sax->getEntity != NULL)
5942 ent = ctxt->sax->getEntity(ctxt->userData, name);
5943 if (ent == NULL)
5944 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005945 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5946 ent = getEntity(ctxt, name);
5947 }
Owen Taylor3473f882001-02-23 17:55:21 +00005948 }
5949 /*
5950 * [ WFC: Entity Declared ]
5951 * In a document without any DTD, a document with only an
5952 * internal DTD subset which contains no parameter entity
5953 * references, or a document with "standalone='yes'", the
5954 * Name given in the entity reference must match that in an
5955 * entity declaration, except that well-formed documents
5956 * need not declare any of the following entities: amp, lt,
5957 * gt, apos, quot.
5958 * The declaration of a parameter entity must precede any
5959 * reference to it.
5960 * Similarly, the declaration of a general entity must
5961 * precede any reference to it which appears in a default
5962 * value in an attribute-list declaration. Note that if
5963 * entities are declared in the external subset or in
5964 * external parameter entities, a non-validating processor
5965 * is not obligated to read and process their declarations;
5966 * for such documents, the rule that an entity must be
5967 * declared is a well-formedness constraint only if
5968 * standalone='yes'.
5969 */
5970 if (ent == NULL) {
5971 if ((ctxt->standalone == 1) ||
5972 ((ctxt->hasExternalSubset == 0) &&
5973 (ctxt->hasPErefs == 0))) {
5974 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5976 ctxt->sax->error(ctxt->userData,
5977 "Entity '%s' not defined\n", name);
5978 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005979 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005980 } else {
5981 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5982 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5983 ctxt->sax->warning(ctxt->userData,
5984 "Entity '%s' not defined\n", name);
5985 }
5986 }
5987
5988 /*
5989 * [ WFC: Parsed Entity ]
5990 * An entity reference must not contain the name of an
5991 * unparsed entity
5992 */
5993 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5994 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5996 ctxt->sax->error(ctxt->userData,
5997 "Entity reference to unparsed entity %s\n", name);
5998 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005999 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006000 }
6001
6002 /*
6003 * [ WFC: No External Entity References ]
6004 * Attribute values cannot contain direct or indirect
6005 * entity references to external entities.
6006 */
6007 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6008 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6009 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6011 ctxt->sax->error(ctxt->userData,
6012 "Attribute references external entity '%s'\n", name);
6013 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006014 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006015 }
6016 /*
6017 * [ WFC: No < in Attribute Values ]
6018 * The replacement text of any entity referred to directly or
6019 * indirectly in an attribute value (other than "&lt;") must
6020 * not contain a <.
6021 */
6022 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6023 (ent != NULL) &&
6024 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6025 (ent->content != NULL) &&
6026 (xmlStrchr(ent->content, '<'))) {
6027 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6029 ctxt->sax->error(ctxt->userData,
6030 "'<' in entity '%s' is not allowed in attributes values\n", name);
6031 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006032 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006033 }
6034
6035 /*
6036 * Internal check, no parameter entities here ...
6037 */
6038 else {
6039 switch (ent->etype) {
6040 case XML_INTERNAL_PARAMETER_ENTITY:
6041 case XML_EXTERNAL_PARAMETER_ENTITY:
6042 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6044 ctxt->sax->error(ctxt->userData,
6045 "Attempt to reference the parameter entity '%s'\n", name);
6046 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006047 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006048 break;
6049 default:
6050 break;
6051 }
6052 }
6053
6054 /*
6055 * [ WFC: No Recursion ]
6056 * A parsed entity must not contain a recursive reference
6057 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006058 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006059 */
6060
6061 } else {
6062 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6063 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6064 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006065 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006066 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006067 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006068 }
6069 xmlFree(name);
6070 }
6071 }
6072 *str = ptr;
6073 return(ent);
6074}
6075
6076/**
6077 * xmlParsePEReference:
6078 * @ctxt: an XML parser context
6079 *
6080 * parse PEReference declarations
6081 * The entity content is handled directly by pushing it's content as
6082 * a new input stream.
6083 *
6084 * [69] PEReference ::= '%' Name ';'
6085 *
6086 * [ WFC: No Recursion ]
6087 * A parsed entity must not contain a recursive
6088 * reference to itself, either directly or indirectly.
6089 *
6090 * [ WFC: Entity Declared ]
6091 * In a document without any DTD, a document with only an internal DTD
6092 * subset which contains no parameter entity references, or a document
6093 * with "standalone='yes'", ... ... The declaration of a parameter
6094 * entity must precede any reference to it...
6095 *
6096 * [ VC: Entity Declared ]
6097 * In a document with an external subset or external parameter entities
6098 * with "standalone='no'", ... ... The declaration of a parameter entity
6099 * must precede any reference to it...
6100 *
6101 * [ WFC: In DTD ]
6102 * Parameter-entity references may only appear in the DTD.
6103 * NOTE: misleading but this is handled.
6104 */
6105void
6106xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006107 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006108 xmlEntityPtr entity = NULL;
6109 xmlParserInputPtr input;
6110
6111 if (RAW == '%') {
6112 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006113 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006114 if (name == NULL) {
6115 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6117 ctxt->sax->error(ctxt->userData,
6118 "xmlParsePEReference: no name\n");
6119 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006120 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006121 } else {
6122 if (RAW == ';') {
6123 NEXT;
6124 if ((ctxt->sax != NULL) &&
6125 (ctxt->sax->getParameterEntity != NULL))
6126 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6127 name);
6128 if (entity == NULL) {
6129 /*
6130 * [ WFC: Entity Declared ]
6131 * In a document without any DTD, a document with only an
6132 * internal DTD subset which contains no parameter entity
6133 * references, or a document with "standalone='yes'", ...
6134 * ... The declaration of a parameter entity must precede
6135 * any reference to it...
6136 */
6137 if ((ctxt->standalone == 1) ||
6138 ((ctxt->hasExternalSubset == 0) &&
6139 (ctxt->hasPErefs == 0))) {
6140 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6141 if ((!ctxt->disableSAX) &&
6142 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6143 ctxt->sax->error(ctxt->userData,
6144 "PEReference: %%%s; not found\n", name);
6145 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006146 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006147 } else {
6148 /*
6149 * [ VC: Entity Declared ]
6150 * In a document with an external subset or external
6151 * parameter entities with "standalone='no'", ...
6152 * ... The declaration of a parameter entity must precede
6153 * any reference to it...
6154 */
6155 if ((!ctxt->disableSAX) &&
6156 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6157 ctxt->sax->warning(ctxt->userData,
6158 "PEReference: %%%s; not found\n", name);
6159 ctxt->valid = 0;
6160 }
6161 } else {
6162 /*
6163 * Internal checking in case the entity quest barfed
6164 */
6165 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6166 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6167 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6168 ctxt->sax->warning(ctxt->userData,
6169 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006170 } else if (ctxt->input->free != deallocblankswrapper) {
6171 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6172 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006173 } else {
6174 /*
6175 * TODO !!!
6176 * handle the extra spaces added before and after
6177 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6178 */
6179 input = xmlNewEntityInputStream(ctxt, entity);
6180 xmlPushInput(ctxt, input);
6181 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6182 (RAW == '<') && (NXT(1) == '?') &&
6183 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6184 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6185 xmlParseTextDecl(ctxt);
6186 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6187 /*
6188 * The XML REC instructs us to stop parsing
6189 * right here
6190 */
6191 ctxt->instate = XML_PARSER_EOF;
Owen Taylor3473f882001-02-23 17:55:21 +00006192 return;
6193 }
6194 }
Owen Taylor3473f882001-02-23 17:55:21 +00006195 }
6196 }
6197 ctxt->hasPErefs = 1;
6198 } else {
6199 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6201 ctxt->sax->error(ctxt->userData,
6202 "xmlParsePEReference: expecting ';'\n");
6203 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006204 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006205 }
Owen Taylor3473f882001-02-23 17:55:21 +00006206 }
6207 }
6208}
6209
6210/**
6211 * xmlParseStringPEReference:
6212 * @ctxt: an XML parser context
6213 * @str: a pointer to an index in the string
6214 *
6215 * parse PEReference declarations
6216 *
6217 * [69] PEReference ::= '%' Name ';'
6218 *
6219 * [ WFC: No Recursion ]
6220 * A parsed entity must not contain a recursive
6221 * reference to itself, either directly or indirectly.
6222 *
6223 * [ WFC: Entity Declared ]
6224 * In a document without any DTD, a document with only an internal DTD
6225 * subset which contains no parameter entity references, or a document
6226 * with "standalone='yes'", ... ... The declaration of a parameter
6227 * entity must precede any reference to it...
6228 *
6229 * [ VC: Entity Declared ]
6230 * In a document with an external subset or external parameter entities
6231 * with "standalone='no'", ... ... The declaration of a parameter entity
6232 * must precede any reference to it...
6233 *
6234 * [ WFC: In DTD ]
6235 * Parameter-entity references may only appear in the DTD.
6236 * NOTE: misleading but this is handled.
6237 *
6238 * Returns the string of the entity content.
6239 * str is updated to the current value of the index
6240 */
6241xmlEntityPtr
6242xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6243 const xmlChar *ptr;
6244 xmlChar cur;
6245 xmlChar *name;
6246 xmlEntityPtr entity = NULL;
6247
6248 if ((str == NULL) || (*str == NULL)) return(NULL);
6249 ptr = *str;
6250 cur = *ptr;
6251 if (cur == '%') {
6252 ptr++;
6253 cur = *ptr;
6254 name = xmlParseStringName(ctxt, &ptr);
6255 if (name == NULL) {
6256 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6258 ctxt->sax->error(ctxt->userData,
6259 "xmlParseStringPEReference: no name\n");
6260 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006261 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006262 } else {
6263 cur = *ptr;
6264 if (cur == ';') {
6265 ptr++;
6266 cur = *ptr;
6267 if ((ctxt->sax != NULL) &&
6268 (ctxt->sax->getParameterEntity != NULL))
6269 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6270 name);
6271 if (entity == NULL) {
6272 /*
6273 * [ WFC: Entity Declared ]
6274 * In a document without any DTD, a document with only an
6275 * internal DTD subset which contains no parameter entity
6276 * references, or a document with "standalone='yes'", ...
6277 * ... The declaration of a parameter entity must precede
6278 * any reference to it...
6279 */
6280 if ((ctxt->standalone == 1) ||
6281 ((ctxt->hasExternalSubset == 0) &&
6282 (ctxt->hasPErefs == 0))) {
6283 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6285 ctxt->sax->error(ctxt->userData,
6286 "PEReference: %%%s; not found\n", name);
6287 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006288 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006289 } else {
6290 /*
6291 * [ VC: Entity Declared ]
6292 * In a document with an external subset or external
6293 * parameter entities with "standalone='no'", ...
6294 * ... The declaration of a parameter entity must
6295 * precede any reference to it...
6296 */
6297 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6298 ctxt->sax->warning(ctxt->userData,
6299 "PEReference: %%%s; not found\n", name);
6300 ctxt->valid = 0;
6301 }
6302 } else {
6303 /*
6304 * Internal checking in case the entity quest barfed
6305 */
6306 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6307 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6308 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6309 ctxt->sax->warning(ctxt->userData,
6310 "Internal: %%%s; is not a parameter entity\n", name);
6311 }
6312 }
6313 ctxt->hasPErefs = 1;
6314 } else {
6315 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6316 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6317 ctxt->sax->error(ctxt->userData,
6318 "xmlParseStringPEReference: expecting ';'\n");
6319 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006320 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006321 }
6322 xmlFree(name);
6323 }
6324 }
6325 *str = ptr;
6326 return(entity);
6327}
6328
6329/**
6330 * xmlParseDocTypeDecl:
6331 * @ctxt: an XML parser context
6332 *
6333 * parse a DOCTYPE declaration
6334 *
6335 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6336 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6337 *
6338 * [ VC: Root Element Type ]
6339 * The Name in the document type declaration must match the element
6340 * type of the root element.
6341 */
6342
6343void
6344xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006345 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006346 xmlChar *ExternalID = NULL;
6347 xmlChar *URI = NULL;
6348
6349 /*
6350 * We know that '<!DOCTYPE' has been detected.
6351 */
6352 SKIP(9);
6353
6354 SKIP_BLANKS;
6355
6356 /*
6357 * Parse the DOCTYPE name.
6358 */
6359 name = xmlParseName(ctxt);
6360 if (name == NULL) {
6361 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6363 ctxt->sax->error(ctxt->userData,
6364 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6365 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006366 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006367 }
6368 ctxt->intSubName = name;
6369
6370 SKIP_BLANKS;
6371
6372 /*
6373 * Check for SystemID and ExternalID
6374 */
6375 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6376
6377 if ((URI != NULL) || (ExternalID != NULL)) {
6378 ctxt->hasExternalSubset = 1;
6379 }
6380 ctxt->extSubURI = URI;
6381 ctxt->extSubSystem = ExternalID;
6382
6383 SKIP_BLANKS;
6384
6385 /*
6386 * Create and update the internal subset.
6387 */
6388 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6389 (!ctxt->disableSAX))
6390 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6391
6392 /*
6393 * Is there any internal subset declarations ?
6394 * they are handled separately in xmlParseInternalSubset()
6395 */
6396 if (RAW == '[')
6397 return;
6398
6399 /*
6400 * We should be at the end of the DOCTYPE declaration.
6401 */
6402 if (RAW != '>') {
6403 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006405 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006406 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006407 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006408 }
6409 NEXT;
6410}
6411
6412/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006413 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006414 * @ctxt: an XML parser context
6415 *
6416 * parse the internal subset declaration
6417 *
6418 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6419 */
6420
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006421static void
Owen Taylor3473f882001-02-23 17:55:21 +00006422xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6423 /*
6424 * Is there any DTD definition ?
6425 */
6426 if (RAW == '[') {
6427 ctxt->instate = XML_PARSER_DTD;
6428 NEXT;
6429 /*
6430 * Parse the succession of Markup declarations and
6431 * PEReferences.
6432 * Subsequence (markupdecl | PEReference | S)*
6433 */
6434 while (RAW != ']') {
6435 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006436 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006437
6438 SKIP_BLANKS;
6439 xmlParseMarkupDecl(ctxt);
6440 xmlParsePEReference(ctxt);
6441
6442 /*
6443 * Pop-up of finished entities.
6444 */
6445 while ((RAW == 0) && (ctxt->inputNr > 1))
6446 xmlPopInput(ctxt);
6447
6448 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6449 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6450 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6451 ctxt->sax->error(ctxt->userData,
6452 "xmlParseInternalSubset: error detected in Markup declaration\n");
6453 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006454 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006455 break;
6456 }
6457 }
6458 if (RAW == ']') {
6459 NEXT;
6460 SKIP_BLANKS;
6461 }
6462 }
6463
6464 /*
6465 * We should be at the end of the DOCTYPE declaration.
6466 */
6467 if (RAW != '>') {
6468 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006470 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006471 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006472 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006473 }
6474 NEXT;
6475}
6476
6477/**
6478 * xmlParseAttribute:
6479 * @ctxt: an XML parser context
6480 * @value: a xmlChar ** used to store the value of the attribute
6481 *
6482 * parse an attribute
6483 *
6484 * [41] Attribute ::= Name Eq AttValue
6485 *
6486 * [ WFC: No External Entity References ]
6487 * Attribute values cannot contain direct or indirect entity references
6488 * to external entities.
6489 *
6490 * [ WFC: No < in Attribute Values ]
6491 * The replacement text of any entity referred to directly or indirectly in
6492 * an attribute value (other than "&lt;") must not contain a <.
6493 *
6494 * [ VC: Attribute Value Type ]
6495 * The attribute must have been declared; the value must be of the type
6496 * declared for it.
6497 *
6498 * [25] Eq ::= S? '=' S?
6499 *
6500 * With namespace:
6501 *
6502 * [NS 11] Attribute ::= QName Eq AttValue
6503 *
6504 * Also the case QName == xmlns:??? is handled independently as a namespace
6505 * definition.
6506 *
6507 * Returns the attribute name, and the value in *value.
6508 */
6509
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006510const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006511xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006512 const xmlChar *name;
6513 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006514
6515 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006516 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006517 name = xmlParseName(ctxt);
6518 if (name == NULL) {
6519 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6521 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6522 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006523 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006524 return(NULL);
6525 }
6526
6527 /*
6528 * read the value
6529 */
6530 SKIP_BLANKS;
6531 if (RAW == '=') {
6532 NEXT;
6533 SKIP_BLANKS;
6534 val = xmlParseAttValue(ctxt);
6535 ctxt->instate = XML_PARSER_CONTENT;
6536 } else {
6537 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6539 ctxt->sax->error(ctxt->userData,
6540 "Specification mandate value for attribute %s\n", name);
6541 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006542 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006543 return(NULL);
6544 }
6545
6546 /*
6547 * Check that xml:lang conforms to the specification
6548 * No more registered as an error, just generate a warning now
6549 * since this was deprecated in XML second edition
6550 */
6551 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6552 if (!xmlCheckLanguageID(val)) {
6553 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6554 ctxt->sax->warning(ctxt->userData,
6555 "Malformed value for xml:lang : %s\n", val);
6556 }
6557 }
6558
6559 /*
6560 * Check that xml:space conforms to the specification
6561 */
6562 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6563 if (xmlStrEqual(val, BAD_CAST "default"))
6564 *(ctxt->space) = 0;
6565 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6566 *(ctxt->space) = 1;
6567 else {
6568 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6570 ctxt->sax->error(ctxt->userData,
Daniel Veillard642104e2003-03-26 16:32:05 +00006571"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006572 val);
6573 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006574 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006575 }
6576 }
6577
6578 *value = val;
6579 return(name);
6580}
6581
6582/**
6583 * xmlParseStartTag:
6584 * @ctxt: an XML parser context
6585 *
6586 * parse a start of tag either for rule element or
6587 * EmptyElement. In both case we don't parse the tag closing chars.
6588 *
6589 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6590 *
6591 * [ WFC: Unique Att Spec ]
6592 * No attribute name may appear more than once in the same start-tag or
6593 * empty-element tag.
6594 *
6595 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6596 *
6597 * [ WFC: Unique Att Spec ]
6598 * No attribute name may appear more than once in the same start-tag or
6599 * empty-element tag.
6600 *
6601 * With namespace:
6602 *
6603 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6604 *
6605 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6606 *
6607 * Returns the element name parsed
6608 */
6609
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006610const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006611xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006612 const xmlChar *name;
6613 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006614 xmlChar *attvalue;
6615 const xmlChar **atts = NULL;
6616 int nbatts = 0;
6617 int maxatts = 0;
6618 int i;
6619
6620 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006621 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006622
6623 name = xmlParseName(ctxt);
6624 if (name == NULL) {
6625 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6627 ctxt->sax->error(ctxt->userData,
6628 "xmlParseStartTag: invalid element name\n");
6629 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006630 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006631 return(NULL);
6632 }
6633
6634 /*
6635 * Now parse the attributes, it ends up with the ending
6636 *
6637 * (S Attribute)* S?
6638 */
6639 SKIP_BLANKS;
6640 GROW;
6641
Daniel Veillard21a0f912001-02-25 19:54:14 +00006642 while ((RAW != '>') &&
6643 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard34ba3872003-07-15 13:34:05 +00006644 (IS_CHAR((unsigned int) RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006645 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006646 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006647
6648 attname = xmlParseAttribute(ctxt, &attvalue);
6649 if ((attname != NULL) && (attvalue != NULL)) {
6650 /*
6651 * [ WFC: Unique Att Spec ]
6652 * No attribute name may appear more than once in the same
6653 * start-tag or empty-element tag.
6654 */
6655 for (i = 0; i < nbatts;i += 2) {
6656 if (xmlStrEqual(atts[i], attname)) {
6657 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6659 ctxt->sax->error(ctxt->userData,
6660 "Attribute %s redefined\n",
6661 attname);
6662 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006663 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006664 xmlFree(attvalue);
6665 goto failed;
6666 }
6667 }
6668
6669 /*
6670 * Add the pair to atts
6671 */
6672 if (atts == NULL) {
6673 maxatts = 10;
6674 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6675 if (atts == NULL) {
6676 xmlGenericError(xmlGenericErrorContext,
6677 "malloc of %ld byte failed\n",
6678 maxatts * (long)sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006679 if (attvalue != NULL)
6680 xmlFree(attvalue);
6681 ctxt->errNo = XML_ERR_NO_MEMORY;
6682 ctxt->instate = XML_PARSER_EOF;
6683 ctxt->disableSAX = 1;
6684 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006685 }
6686 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006687 const xmlChar **n;
6688
Owen Taylor3473f882001-02-23 17:55:21 +00006689 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006690 n = (const xmlChar **) xmlRealloc((void *) atts,
Owen Taylor3473f882001-02-23 17:55:21 +00006691 maxatts * sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006692 if (n == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006693 xmlGenericError(xmlGenericErrorContext,
6694 "realloc of %ld byte failed\n",
6695 maxatts * (long)sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006696 if (attvalue != NULL)
6697 xmlFree(attvalue);
6698 ctxt->errNo = XML_ERR_NO_MEMORY;
6699 ctxt->instate = XML_PARSER_EOF;
6700 ctxt->disableSAX = 1;
6701 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006702 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006703 atts = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006704 }
6705 atts[nbatts++] = attname;
6706 atts[nbatts++] = attvalue;
6707 atts[nbatts] = NULL;
6708 atts[nbatts + 1] = NULL;
6709 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006710 if (attvalue != NULL)
6711 xmlFree(attvalue);
6712 }
6713
6714failed:
6715
Daniel Veillard3772de32002-12-17 10:31:45 +00006716 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006717 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6718 break;
6719 if (!IS_BLANK(RAW)) {
6720 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6722 ctxt->sax->error(ctxt->userData,
6723 "attributes construct error\n");
6724 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006725 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006726 }
6727 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006728 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6729 (attname == NULL) && (attvalue == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006730 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6732 ctxt->sax->error(ctxt->userData,
6733 "xmlParseStartTag: problem parsing attributes\n");
6734 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006735 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006736 break;
6737 }
6738 GROW;
6739 }
6740
6741 /*
6742 * SAX: Start of Element !
6743 */
6744 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6745 (!ctxt->disableSAX))
6746 ctxt->sax->startElement(ctxt->userData, name, atts);
6747
6748 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006749 /* Free only the content strings */
6750 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006751 if (atts[i] != NULL)
6752 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006753 xmlFree((void *) atts);
6754 }
6755 return(name);
6756}
6757
6758/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006759 * xmlParseEndTagInternal:
Owen Taylor3473f882001-02-23 17:55:21 +00006760 * @ctxt: an XML parser context
6761 *
6762 * parse an end of tag
6763 *
6764 * [42] ETag ::= '</' Name S? '>'
6765 *
6766 * With namespace
6767 *
6768 * [NS 9] ETag ::= '</' QName S? '>'
6769 */
6770
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006771static void
6772xmlParseEndTagInternal(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006773 const xmlChar *name;
6774 const xmlChar *oldname;
Owen Taylor3473f882001-02-23 17:55:21 +00006775
6776 GROW;
6777 if ((RAW != '<') || (NXT(1) != '/')) {
6778 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6779 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6780 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6781 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006782 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006783 return;
6784 }
6785 SKIP(2);
6786
Daniel Veillard46de64e2002-05-29 08:21:33 +00006787 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006788
6789 /*
6790 * We should definitely be at the ending "S? '>'" part
6791 */
6792 GROW;
6793 SKIP_BLANKS;
Daniel Veillard34ba3872003-07-15 13:34:05 +00006794 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006795 ctxt->errNo = XML_ERR_GT_REQUIRED;
6796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6797 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6798 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006799 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006800 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006801 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006802
6803 /*
6804 * [ WFC: Element Type Match ]
6805 * The Name in an element's end-tag must match the element type in the
6806 * start-tag.
6807 *
6808 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006809 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006810 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006812 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006813 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006814 "Opening and ending tag mismatch: %s line %d and %s\n",
6815 ctxt->name, line, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006816 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006817 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006818 "Ending tag error for: %s line %d\n", ctxt->name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00006819 }
6820
6821 }
6822 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006823 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006824 }
6825
6826 /*
6827 * SAX: End of Tag
6828 */
6829 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6830 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006831 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006832
Owen Taylor3473f882001-02-23 17:55:21 +00006833 oldname = namePop(ctxt);
6834 spacePop(ctxt);
6835 if (oldname != NULL) {
6836#ifdef DEBUG_STACK
6837 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6838#endif
Owen Taylor3473f882001-02-23 17:55:21 +00006839 }
6840 return;
6841}
6842
6843/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006844 * xmlParseEndTag:
6845 * @ctxt: an XML parser context
6846 *
6847 * parse an end of tag
6848 *
6849 * [42] ETag ::= '</' Name S? '>'
6850 *
6851 * With namespace
6852 *
6853 * [NS 9] ETag ::= '</' QName S? '>'
6854 */
6855
6856void
6857xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6858 xmlParseEndTagInternal(ctxt, 0);
6859}
6860
6861/**
Owen Taylor3473f882001-02-23 17:55:21 +00006862 * xmlParseCDSect:
6863 * @ctxt: an XML parser context
6864 *
6865 * Parse escaped pure raw content.
6866 *
6867 * [18] CDSect ::= CDStart CData CDEnd
6868 *
6869 * [19] CDStart ::= '<![CDATA['
6870 *
6871 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6872 *
6873 * [21] CDEnd ::= ']]>'
6874 */
6875void
6876xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6877 xmlChar *buf = NULL;
6878 int len = 0;
6879 int size = XML_PARSER_BUFFER_SIZE;
6880 int r, rl;
6881 int s, sl;
6882 int cur, l;
6883 int count = 0;
6884
6885 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6886 (NXT(2) == '[') && (NXT(3) == 'C') &&
6887 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6888 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6889 (NXT(8) == '[')) {
6890 SKIP(9);
6891 } else
6892 return;
6893
6894 ctxt->instate = XML_PARSER_CDATA_SECTION;
6895 r = CUR_CHAR(rl);
6896 if (!IS_CHAR(r)) {
6897 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6899 ctxt->sax->error(ctxt->userData,
6900 "CData section not finished\n");
6901 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006902 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006903 ctxt->instate = XML_PARSER_CONTENT;
6904 return;
6905 }
6906 NEXTL(rl);
6907 s = CUR_CHAR(sl);
6908 if (!IS_CHAR(s)) {
6909 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6910 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6911 ctxt->sax->error(ctxt->userData,
6912 "CData section not finished\n");
6913 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006914 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006915 ctxt->instate = XML_PARSER_CONTENT;
6916 return;
6917 }
6918 NEXTL(sl);
6919 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00006920 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00006921 if (buf == NULL) {
6922 xmlGenericError(xmlGenericErrorContext,
6923 "malloc of %d byte failed\n", size);
6924 return;
6925 }
6926 while (IS_CHAR(cur) &&
6927 ((r != ']') || (s != ']') || (cur != '>'))) {
6928 if (len + 5 >= size) {
6929 size *= 2;
6930 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6931 if (buf == NULL) {
6932 xmlGenericError(xmlGenericErrorContext,
6933 "realloc of %d byte failed\n", size);
6934 return;
6935 }
6936 }
6937 COPY_BUF(rl,buf,len,r);
6938 r = s;
6939 rl = sl;
6940 s = cur;
6941 sl = l;
6942 count++;
6943 if (count > 50) {
6944 GROW;
6945 count = 0;
6946 }
6947 NEXTL(l);
6948 cur = CUR_CHAR(l);
6949 }
6950 buf[len] = 0;
6951 ctxt->instate = XML_PARSER_CONTENT;
6952 if (cur != '>') {
6953 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6955 ctxt->sax->error(ctxt->userData,
6956 "CData section not finished\n%.50s\n", buf);
6957 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006958 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006959 xmlFree(buf);
6960 return;
6961 }
6962 NEXTL(l);
6963
6964 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006965 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006966 */
6967 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6968 if (ctxt->sax->cdataBlock != NULL)
6969 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006970 else if (ctxt->sax->characters != NULL)
6971 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006972 }
6973 xmlFree(buf);
6974}
6975
6976/**
6977 * xmlParseContent:
6978 * @ctxt: an XML parser context
6979 *
6980 * Parse a content:
6981 *
6982 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6983 */
6984
6985void
6986xmlParseContent(xmlParserCtxtPtr ctxt) {
6987 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006988 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006989 ((RAW != '<') || (NXT(1) != '/'))) {
6990 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006991 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006992 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006993
6994 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006995 * First case : a Processing Instruction.
6996 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006997 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006998 xmlParsePI(ctxt);
6999 }
7000
7001 /*
7002 * Second case : a CDSection
7003 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007004 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007005 (NXT(2) == '[') && (NXT(3) == 'C') &&
7006 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7007 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7008 (NXT(8) == '[')) {
7009 xmlParseCDSect(ctxt);
7010 }
7011
7012 /*
7013 * Third case : a comment
7014 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007015 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007016 (NXT(2) == '-') && (NXT(3) == '-')) {
7017 xmlParseComment(ctxt);
7018 ctxt->instate = XML_PARSER_CONTENT;
7019 }
7020
7021 /*
7022 * Fourth case : a sub-element.
7023 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007024 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007025 xmlParseElement(ctxt);
7026 }
7027
7028 /*
7029 * Fifth case : a reference. If if has not been resolved,
7030 * parsing returns it's Name, create the node
7031 */
7032
Daniel Veillard21a0f912001-02-25 19:54:14 +00007033 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007034 xmlParseReference(ctxt);
7035 }
7036
7037 /*
7038 * Last case, text. Note that References are handled directly.
7039 */
7040 else {
7041 xmlParseCharData(ctxt, 0);
7042 }
7043
7044 GROW;
7045 /*
7046 * Pop-up of finished entities.
7047 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007048 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007049 xmlPopInput(ctxt);
7050 SHRINK;
7051
Daniel Veillardfdc91562002-07-01 21:52:03 +00007052 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007053 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7055 ctxt->sax->error(ctxt->userData,
7056 "detected an error in element content\n");
7057 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007058 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007059 ctxt->instate = XML_PARSER_EOF;
7060 break;
7061 }
7062 }
7063}
7064
7065/**
7066 * xmlParseElement:
7067 * @ctxt: an XML parser context
7068 *
7069 * parse an XML element, this is highly recursive
7070 *
7071 * [39] element ::= EmptyElemTag | STag content ETag
7072 *
7073 * [ WFC: Element Type Match ]
7074 * The Name in an element's end-tag must match the element type in the
7075 * start-tag.
7076 *
7077 * [ VC: Element Valid ]
7078 * An element is valid if there is a declaration matching elementdecl
7079 * where the Name matches the element type and one of the following holds:
7080 * - The declaration matches EMPTY and the element has no content.
7081 * - The declaration matches children and the sequence of child elements
7082 * belongs to the language generated by the regular expression in the
7083 * content model, with optional white space (characters matching the
7084 * nonterminal S) between each pair of child elements.
7085 * - The declaration matches Mixed and the content consists of character
7086 * data and child elements whose types match names in the content model.
7087 * - The declaration matches ANY, and the types of any child elements have
7088 * been declared.
7089 */
7090
7091void
7092xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007093 const xmlChar *name;
7094 const xmlChar *oldname;
Owen Taylor3473f882001-02-23 17:55:21 +00007095 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007096 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00007097 xmlNodePtr ret;
7098
7099 /* Capture start position */
7100 if (ctxt->record_info) {
7101 node_info.begin_pos = ctxt->input->consumed +
7102 (CUR_PTR - ctxt->input->base);
7103 node_info.begin_line = ctxt->input->line;
7104 }
7105
7106 if (ctxt->spaceNr == 0)
7107 spacePush(ctxt, -1);
7108 else
7109 spacePush(ctxt, *ctxt->space);
7110
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007111 line = ctxt->input->line;
Owen Taylor3473f882001-02-23 17:55:21 +00007112 name = xmlParseStartTag(ctxt);
7113 if (name == NULL) {
7114 spacePop(ctxt);
7115 return;
7116 }
7117 namePush(ctxt, name);
7118 ret = ctxt->node;
7119
7120 /*
7121 * [ VC: Root Element Type ]
7122 * The Name in the document type declaration must match the element
7123 * type of the root element.
7124 */
7125 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7126 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7127 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7128
7129 /*
7130 * Check for an Empty Element.
7131 */
7132 if ((RAW == '/') && (NXT(1) == '>')) {
7133 SKIP(2);
7134 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7135 (!ctxt->disableSAX))
7136 ctxt->sax->endElement(ctxt->userData, name);
7137 oldname = namePop(ctxt);
7138 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007139#ifdef DEBUG_STACK
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007140 if (oldname != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00007141 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Owen Taylor3473f882001-02-23 17:55:21 +00007142 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007143#endif
Owen Taylor3473f882001-02-23 17:55:21 +00007144 if ( ret != NULL && ctxt->record_info ) {
7145 node_info.end_pos = ctxt->input->consumed +
7146 (CUR_PTR - ctxt->input->base);
7147 node_info.end_line = ctxt->input->line;
7148 node_info.node = ret;
7149 xmlParserAddNodeInfo(ctxt, &node_info);
7150 }
7151 return;
7152 }
7153 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007154 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007155 } else {
7156 ctxt->errNo = XML_ERR_GT_REQUIRED;
7157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7158 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007159 "Couldn't find end of Start Tag %s line %d\n",
7160 name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007161 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007162 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007163
7164 /*
7165 * end of parsing of this node.
7166 */
7167 nodePop(ctxt);
7168 oldname = namePop(ctxt);
7169 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007170#ifdef DEBUG_STACK
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007171 if (oldname != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00007172 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Owen Taylor3473f882001-02-23 17:55:21 +00007173 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007174#endif
Owen Taylor3473f882001-02-23 17:55:21 +00007175
7176 /*
7177 * Capture end position and add node
7178 */
7179 if ( ret != NULL && ctxt->record_info ) {
7180 node_info.end_pos = ctxt->input->consumed +
7181 (CUR_PTR - ctxt->input->base);
7182 node_info.end_line = ctxt->input->line;
7183 node_info.node = ret;
7184 xmlParserAddNodeInfo(ctxt, &node_info);
7185 }
7186 return;
7187 }
7188
7189 /*
7190 * Parse the content of the element:
7191 */
7192 xmlParseContent(ctxt);
Daniel Veillard34ba3872003-07-15 13:34:05 +00007193 if (!IS_CHAR((unsigned int) RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007194 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7196 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007197 "Premature end of data in tag %s line %d\n", name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007198 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007199 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007200
7201 /*
7202 * end of parsing of this node.
7203 */
7204 nodePop(ctxt);
7205 oldname = namePop(ctxt);
7206 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007207#ifdef DEBUG_STACK
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007208 if (oldname != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00007209 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Owen Taylor3473f882001-02-23 17:55:21 +00007210 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007211#endif
Owen Taylor3473f882001-02-23 17:55:21 +00007212 return;
7213 }
7214
7215 /*
7216 * parse the end of tag: '</' should be here.
7217 */
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007218 xmlParseEndTagInternal(ctxt, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007219
7220 /*
7221 * Capture end position and add node
7222 */
7223 if ( ret != NULL && ctxt->record_info ) {
7224 node_info.end_pos = ctxt->input->consumed +
7225 (CUR_PTR - ctxt->input->base);
7226 node_info.end_line = ctxt->input->line;
7227 node_info.node = ret;
7228 xmlParserAddNodeInfo(ctxt, &node_info);
7229 }
7230}
7231
7232/**
7233 * xmlParseVersionNum:
7234 * @ctxt: an XML parser context
7235 *
7236 * parse the XML version value.
7237 *
7238 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7239 *
7240 * Returns the string giving the XML version number, or NULL
7241 */
7242xmlChar *
7243xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7244 xmlChar *buf = NULL;
7245 int len = 0;
7246 int size = 10;
7247 xmlChar cur;
7248
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007249 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007250 if (buf == NULL) {
7251 xmlGenericError(xmlGenericErrorContext,
7252 "malloc of %d byte failed\n", size);
7253 return(NULL);
7254 }
7255 cur = CUR;
7256 while (((cur >= 'a') && (cur <= 'z')) ||
7257 ((cur >= 'A') && (cur <= 'Z')) ||
7258 ((cur >= '0') && (cur <= '9')) ||
7259 (cur == '_') || (cur == '.') ||
7260 (cur == ':') || (cur == '-')) {
7261 if (len + 1 >= size) {
7262 size *= 2;
7263 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7264 if (buf == NULL) {
7265 xmlGenericError(xmlGenericErrorContext,
7266 "realloc of %d byte failed\n", size);
7267 return(NULL);
7268 }
7269 }
7270 buf[len++] = cur;
7271 NEXT;
7272 cur=CUR;
7273 }
7274 buf[len] = 0;
7275 return(buf);
7276}
7277
7278/**
7279 * xmlParseVersionInfo:
7280 * @ctxt: an XML parser context
7281 *
7282 * parse the XML version.
7283 *
7284 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7285 *
7286 * [25] Eq ::= S? '=' S?
7287 *
7288 * Returns the version string, e.g. "1.0"
7289 */
7290
7291xmlChar *
7292xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7293 xmlChar *version = NULL;
7294 const xmlChar *q;
7295
7296 if ((RAW == 'v') && (NXT(1) == 'e') &&
7297 (NXT(2) == 'r') && (NXT(3) == 's') &&
7298 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7299 (NXT(6) == 'n')) {
7300 SKIP(7);
7301 SKIP_BLANKS;
7302 if (RAW != '=') {
7303 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7305 ctxt->sax->error(ctxt->userData,
7306 "xmlParseVersionInfo : expected '='\n");
7307 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007308 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007309 return(NULL);
7310 }
7311 NEXT;
7312 SKIP_BLANKS;
7313 if (RAW == '"') {
7314 NEXT;
7315 q = CUR_PTR;
7316 version = xmlParseVersionNum(ctxt);
7317 if (RAW != '"') {
7318 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7320 ctxt->sax->error(ctxt->userData,
7321 "String not closed\n%.50s\n", q);
7322 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007323 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007324 } else
7325 NEXT;
7326 } else if (RAW == '\''){
7327 NEXT;
7328 q = CUR_PTR;
7329 version = xmlParseVersionNum(ctxt);
7330 if (RAW != '\'') {
7331 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7333 ctxt->sax->error(ctxt->userData,
7334 "String not closed\n%.50s\n", q);
7335 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007336 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007337 } else
7338 NEXT;
7339 } else {
7340 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7342 ctxt->sax->error(ctxt->userData,
7343 "xmlParseVersionInfo : expected ' or \"\n");
7344 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007345 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007346 }
7347 }
7348 return(version);
7349}
7350
7351/**
7352 * xmlParseEncName:
7353 * @ctxt: an XML parser context
7354 *
7355 * parse the XML encoding name
7356 *
7357 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7358 *
7359 * Returns the encoding name value or NULL
7360 */
7361xmlChar *
7362xmlParseEncName(xmlParserCtxtPtr ctxt) {
7363 xmlChar *buf = NULL;
7364 int len = 0;
7365 int size = 10;
7366 xmlChar cur;
7367
7368 cur = CUR;
7369 if (((cur >= 'a') && (cur <= 'z')) ||
7370 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007371 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007372 if (buf == NULL) {
7373 xmlGenericError(xmlGenericErrorContext,
7374 "malloc of %d byte failed\n", size);
7375 return(NULL);
7376 }
7377
7378 buf[len++] = cur;
7379 NEXT;
7380 cur = CUR;
7381 while (((cur >= 'a') && (cur <= 'z')) ||
7382 ((cur >= 'A') && (cur <= 'Z')) ||
7383 ((cur >= '0') && (cur <= '9')) ||
7384 (cur == '.') || (cur == '_') ||
7385 (cur == '-')) {
7386 if (len + 1 >= size) {
7387 size *= 2;
7388 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7389 if (buf == NULL) {
7390 xmlGenericError(xmlGenericErrorContext,
7391 "realloc of %d byte failed\n", size);
7392 return(NULL);
7393 }
7394 }
7395 buf[len++] = cur;
7396 NEXT;
7397 cur = CUR;
7398 if (cur == 0) {
7399 SHRINK;
7400 GROW;
7401 cur = CUR;
7402 }
7403 }
7404 buf[len] = 0;
7405 } else {
7406 ctxt->errNo = XML_ERR_ENCODING_NAME;
7407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7408 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7409 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007410 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007411 }
7412 return(buf);
7413}
7414
7415/**
7416 * xmlParseEncodingDecl:
7417 * @ctxt: an XML parser context
7418 *
7419 * parse the XML encoding declaration
7420 *
7421 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7422 *
7423 * this setups the conversion filters.
7424 *
7425 * Returns the encoding value or NULL
7426 */
7427
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007428const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007429xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7430 xmlChar *encoding = NULL;
7431 const xmlChar *q;
7432
7433 SKIP_BLANKS;
7434 if ((RAW == 'e') && (NXT(1) == 'n') &&
7435 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7436 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7437 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7438 SKIP(8);
7439 SKIP_BLANKS;
7440 if (RAW != '=') {
7441 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7443 ctxt->sax->error(ctxt->userData,
7444 "xmlParseEncodingDecl : expected '='\n");
7445 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007446 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007447 return(NULL);
7448 }
7449 NEXT;
7450 SKIP_BLANKS;
7451 if (RAW == '"') {
7452 NEXT;
7453 q = CUR_PTR;
7454 encoding = xmlParseEncName(ctxt);
7455 if (RAW != '"') {
7456 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7457 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7458 ctxt->sax->error(ctxt->userData,
7459 "String not closed\n%.50s\n", q);
7460 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007461 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007462 } else
7463 NEXT;
7464 } else if (RAW == '\''){
7465 NEXT;
7466 q = CUR_PTR;
7467 encoding = xmlParseEncName(ctxt);
7468 if (RAW != '\'') {
7469 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7471 ctxt->sax->error(ctxt->userData,
7472 "String not closed\n%.50s\n", q);
7473 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007474 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007475 } else
7476 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007477 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007478 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7480 ctxt->sax->error(ctxt->userData,
7481 "xmlParseEncodingDecl : expected ' or \"\n");
7482 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007483 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007484 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00007485 /*
7486 * UTF-16 encoding stwich has already taken place at this stage,
7487 * more over the little-endian/big-endian selection is already done
7488 */
7489 if ((encoding != NULL) &&
7490 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
7491 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007492 if (ctxt->encoding != NULL)
7493 xmlFree((xmlChar *) ctxt->encoding);
7494 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00007495 }
7496 /*
7497 * UTF-8 encoding is handled natively
7498 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007499 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00007500 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
7501 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007502 if (ctxt->encoding != NULL)
7503 xmlFree((xmlChar *) ctxt->encoding);
7504 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00007505 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007506 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00007507 xmlCharEncodingHandlerPtr handler;
7508
7509 if (ctxt->input->encoding != NULL)
7510 xmlFree((xmlChar *) ctxt->input->encoding);
7511 ctxt->input->encoding = encoding;
7512
Daniel Veillarda6874ca2003-07-29 16:47:24 +00007513 handler = xmlFindCharEncodingHandler((const char *) encoding);
7514 if (handler != NULL) {
7515 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00007516 } else {
Daniel Veillarda6874ca2003-07-29 16:47:24 +00007517 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7518 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7519 ctxt->sax->error(ctxt->userData,
7520 "Unsupported encoding %s\n", encoding);
7521 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007522 }
7523 }
7524 }
7525 return(encoding);
7526}
7527
7528/**
7529 * xmlParseSDDecl:
7530 * @ctxt: an XML parser context
7531 *
7532 * parse the XML standalone declaration
7533 *
7534 * [32] SDDecl ::= S 'standalone' Eq
7535 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7536 *
7537 * [ VC: Standalone Document Declaration ]
7538 * TODO The standalone document declaration must have the value "no"
7539 * if any external markup declarations contain declarations of:
7540 * - attributes with default values, if elements to which these
7541 * attributes apply appear in the document without specifications
7542 * of values for these attributes, or
7543 * - entities (other than amp, lt, gt, apos, quot), if references
7544 * to those entities appear in the document, or
7545 * - attributes with values subject to normalization, where the
7546 * attribute appears in the document with a value which will change
7547 * as a result of normalization, or
7548 * - element types with element content, if white space occurs directly
7549 * within any instance of those types.
7550 *
7551 * Returns 1 if standalone, 0 otherwise
7552 */
7553
7554int
7555xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7556 int standalone = -1;
7557
7558 SKIP_BLANKS;
7559 if ((RAW == 's') && (NXT(1) == 't') &&
7560 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7561 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7562 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7563 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7564 SKIP(10);
7565 SKIP_BLANKS;
7566 if (RAW != '=') {
7567 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7569 ctxt->sax->error(ctxt->userData,
7570 "XML standalone declaration : expected '='\n");
7571 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007572 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007573 return(standalone);
7574 }
7575 NEXT;
7576 SKIP_BLANKS;
7577 if (RAW == '\''){
7578 NEXT;
7579 if ((RAW == 'n') && (NXT(1) == 'o')) {
7580 standalone = 0;
7581 SKIP(2);
7582 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7583 (NXT(2) == 's')) {
7584 standalone = 1;
7585 SKIP(3);
7586 } else {
7587 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7589 ctxt->sax->error(ctxt->userData,
7590 "standalone accepts only 'yes' or 'no'\n");
7591 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007592 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007593 }
7594 if (RAW != '\'') {
7595 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7597 ctxt->sax->error(ctxt->userData, "String not closed\n");
7598 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007599 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007600 } else
7601 NEXT;
7602 } else if (RAW == '"'){
7603 NEXT;
7604 if ((RAW == 'n') && (NXT(1) == 'o')) {
7605 standalone = 0;
7606 SKIP(2);
7607 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7608 (NXT(2) == 's')) {
7609 standalone = 1;
7610 SKIP(3);
7611 } else {
7612 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7614 ctxt->sax->error(ctxt->userData,
7615 "standalone accepts only 'yes' or 'no'\n");
7616 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007617 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007618 }
7619 if (RAW != '"') {
7620 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7622 ctxt->sax->error(ctxt->userData, "String not closed\n");
7623 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007624 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007625 } else
7626 NEXT;
7627 } else {
7628 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7630 ctxt->sax->error(ctxt->userData,
7631 "Standalone value not found\n");
7632 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007633 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007634 }
7635 }
7636 return(standalone);
7637}
7638
7639/**
7640 * xmlParseXMLDecl:
7641 * @ctxt: an XML parser context
7642 *
7643 * parse an XML declaration header
7644 *
7645 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7646 */
7647
7648void
7649xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7650 xmlChar *version;
7651
7652 /*
7653 * We know that '<?xml' is here.
7654 */
7655 SKIP(5);
7656
7657 if (!IS_BLANK(RAW)) {
7658 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7660 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7661 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007662 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007663 }
7664 SKIP_BLANKS;
7665
7666 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007667 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007668 */
7669 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007670 if (version == NULL) {
7671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7672 ctxt->sax->error(ctxt->userData,
7673 "Malformed declaration expecting version\n");
7674 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007675 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007676 } else {
7677 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7678 /*
7679 * TODO: Blueberry should be detected here
7680 */
7681 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7682 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7683 version);
7684 }
7685 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007686 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007687 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007688 }
Owen Taylor3473f882001-02-23 17:55:21 +00007689
7690 /*
7691 * We may have the encoding declaration
7692 */
7693 if (!IS_BLANK(RAW)) {
7694 if ((RAW == '?') && (NXT(1) == '>')) {
7695 SKIP(2);
7696 return;
7697 }
7698 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7699 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7700 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7701 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007702 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007703 }
7704 xmlParseEncodingDecl(ctxt);
7705 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7706 /*
7707 * The XML REC instructs us to stop parsing right here
7708 */
7709 return;
7710 }
7711
7712 /*
7713 * We may have the standalone status.
7714 */
7715 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7716 if ((RAW == '?') && (NXT(1) == '>')) {
7717 SKIP(2);
7718 return;
7719 }
7720 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7722 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7723 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007724 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007725 }
7726 SKIP_BLANKS;
7727 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7728
7729 SKIP_BLANKS;
7730 if ((RAW == '?') && (NXT(1) == '>')) {
7731 SKIP(2);
7732 } else if (RAW == '>') {
7733 /* Deprecated old WD ... */
7734 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7736 ctxt->sax->error(ctxt->userData,
7737 "XML declaration must end-up with '?>'\n");
7738 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007739 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007740 NEXT;
7741 } else {
7742 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7744 ctxt->sax->error(ctxt->userData,
7745 "parsing XML declaration: '?>' expected\n");
7746 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007747 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007748 MOVETO_ENDTAG(CUR_PTR);
7749 NEXT;
7750 }
7751}
7752
7753/**
7754 * xmlParseMisc:
7755 * @ctxt: an XML parser context
7756 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007757 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007758 *
7759 * [27] Misc ::= Comment | PI | S
7760 */
7761
7762void
7763xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007764 while (((RAW == '<') && (NXT(1) == '?')) ||
7765 ((RAW == '<') && (NXT(1) == '!') &&
7766 (NXT(2) == '-') && (NXT(3) == '-')) ||
7767 IS_BLANK(CUR)) {
7768 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007769 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007770 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007771 NEXT;
7772 } else
7773 xmlParseComment(ctxt);
7774 }
7775}
7776
7777/**
7778 * xmlParseDocument:
7779 * @ctxt: an XML parser context
7780 *
7781 * parse an XML document (and build a tree if using the standard SAX
7782 * interface).
7783 *
7784 * [1] document ::= prolog element Misc*
7785 *
7786 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7787 *
7788 * Returns 0, -1 in case of error. the parser context is augmented
7789 * as a result of the parsing.
7790 */
7791
7792int
7793xmlParseDocument(xmlParserCtxtPtr ctxt) {
7794 xmlChar start[4];
7795 xmlCharEncoding enc;
7796
7797 xmlInitParser();
7798
7799 GROW;
7800
7801 /*
7802 * SAX: beginning of the document processing.
7803 */
7804 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7805 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7806
Daniel Veillard50f34372001-08-03 12:06:36 +00007807 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007808 /*
7809 * Get the 4 first bytes and decode the charset
7810 * if enc != XML_CHAR_ENCODING_NONE
7811 * plug some encoding conversion routines.
7812 */
7813 start[0] = RAW;
7814 start[1] = NXT(1);
7815 start[2] = NXT(2);
7816 start[3] = NXT(3);
7817 enc = xmlDetectCharEncoding(start, 4);
7818 if (enc != XML_CHAR_ENCODING_NONE) {
7819 xmlSwitchEncoding(ctxt, enc);
7820 }
Owen Taylor3473f882001-02-23 17:55:21 +00007821 }
7822
7823
7824 if (CUR == 0) {
7825 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7827 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7828 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007829 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007830 }
7831
7832 /*
7833 * Check for the XMLDecl in the Prolog.
7834 */
7835 GROW;
7836 if ((RAW == '<') && (NXT(1) == '?') &&
7837 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7838 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7839
7840 /*
7841 * Note that we will switch encoding on the fly.
7842 */
7843 xmlParseXMLDecl(ctxt);
7844 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7845 /*
7846 * The XML REC instructs us to stop parsing right here
7847 */
7848 return(-1);
7849 }
7850 ctxt->standalone = ctxt->input->standalone;
7851 SKIP_BLANKS;
7852 } else {
7853 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7854 }
7855 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7856 ctxt->sax->startDocument(ctxt->userData);
7857
7858 /*
7859 * The Misc part of the Prolog
7860 */
7861 GROW;
7862 xmlParseMisc(ctxt);
7863
7864 /*
7865 * Then possibly doc type declaration(s) and more Misc
7866 * (doctypedecl Misc*)?
7867 */
7868 GROW;
7869 if ((RAW == '<') && (NXT(1) == '!') &&
7870 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7871 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7872 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7873 (NXT(8) == 'E')) {
7874
7875 ctxt->inSubset = 1;
7876 xmlParseDocTypeDecl(ctxt);
7877 if (RAW == '[') {
7878 ctxt->instate = XML_PARSER_DTD;
7879 xmlParseInternalSubset(ctxt);
7880 }
7881
7882 /*
7883 * Create and update the external subset.
7884 */
7885 ctxt->inSubset = 2;
7886 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7887 (!ctxt->disableSAX))
7888 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7889 ctxt->extSubSystem, ctxt->extSubURI);
7890 ctxt->inSubset = 0;
7891
7892
7893 ctxt->instate = XML_PARSER_PROLOG;
7894 xmlParseMisc(ctxt);
7895 }
7896
7897 /*
7898 * Time to start parsing the tree itself
7899 */
7900 GROW;
7901 if (RAW != '<') {
7902 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7904 ctxt->sax->error(ctxt->userData,
7905 "Start tag expected, '<' not found\n");
7906 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007907 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007908 ctxt->instate = XML_PARSER_EOF;
7909 } else {
7910 ctxt->instate = XML_PARSER_CONTENT;
7911 xmlParseElement(ctxt);
7912 ctxt->instate = XML_PARSER_EPILOG;
7913
7914
7915 /*
7916 * The Misc part at the end
7917 */
7918 xmlParseMisc(ctxt);
7919
Daniel Veillard561b7f82002-03-20 21:55:57 +00007920 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007921 ctxt->errNo = XML_ERR_DOCUMENT_END;
7922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7923 ctxt->sax->error(ctxt->userData,
7924 "Extra content at the end of the document\n");
7925 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007926 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007927 }
7928 ctxt->instate = XML_PARSER_EOF;
7929 }
7930
7931 /*
7932 * SAX: end of the document processing.
7933 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007934 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007935 ctxt->sax->endDocument(ctxt->userData);
7936
Daniel Veillard5997aca2002-03-18 18:36:20 +00007937 /*
7938 * Remove locally kept entity definitions if the tree was not built
7939 */
7940 if ((ctxt->myDoc != NULL) &&
7941 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7942 xmlFreeDoc(ctxt->myDoc);
7943 ctxt->myDoc = NULL;
7944 }
7945
Daniel Veillardc7612992002-02-17 22:47:37 +00007946 if (! ctxt->wellFormed) {
7947 ctxt->valid = 0;
7948 return(-1);
7949 }
Owen Taylor3473f882001-02-23 17:55:21 +00007950 return(0);
7951}
7952
7953/**
7954 * xmlParseExtParsedEnt:
7955 * @ctxt: an XML parser context
7956 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007957 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007958 * An external general parsed entity is well-formed if it matches the
7959 * production labeled extParsedEnt.
7960 *
7961 * [78] extParsedEnt ::= TextDecl? content
7962 *
7963 * Returns 0, -1 in case of error. the parser context is augmented
7964 * as a result of the parsing.
7965 */
7966
7967int
7968xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7969 xmlChar start[4];
7970 xmlCharEncoding enc;
7971
7972 xmlDefaultSAXHandlerInit();
7973
7974 GROW;
7975
7976 /*
7977 * SAX: beginning of the document processing.
7978 */
7979 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7980 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7981
7982 /*
7983 * Get the 4 first bytes and decode the charset
7984 * if enc != XML_CHAR_ENCODING_NONE
7985 * plug some encoding conversion routines.
7986 */
7987 start[0] = RAW;
7988 start[1] = NXT(1);
7989 start[2] = NXT(2);
7990 start[3] = NXT(3);
7991 enc = xmlDetectCharEncoding(start, 4);
7992 if (enc != XML_CHAR_ENCODING_NONE) {
7993 xmlSwitchEncoding(ctxt, enc);
7994 }
7995
7996
7997 if (CUR == 0) {
7998 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8000 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8001 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008002 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008003 }
8004
8005 /*
8006 * Check for the XMLDecl in the Prolog.
8007 */
8008 GROW;
8009 if ((RAW == '<') && (NXT(1) == '?') &&
8010 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8011 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8012
8013 /*
8014 * Note that we will switch encoding on the fly.
8015 */
8016 xmlParseXMLDecl(ctxt);
8017 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8018 /*
8019 * The XML REC instructs us to stop parsing right here
8020 */
8021 return(-1);
8022 }
8023 SKIP_BLANKS;
8024 } else {
8025 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8026 }
8027 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8028 ctxt->sax->startDocument(ctxt->userData);
8029
8030 /*
8031 * Doing validity checking on chunk doesn't make sense
8032 */
8033 ctxt->instate = XML_PARSER_CONTENT;
8034 ctxt->validate = 0;
8035 ctxt->loadsubset = 0;
8036 ctxt->depth = 0;
8037
8038 xmlParseContent(ctxt);
8039
8040 if ((RAW == '<') && (NXT(1) == '/')) {
8041 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8042 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8043 ctxt->sax->error(ctxt->userData,
8044 "chunk is not well balanced\n");
8045 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008046 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008047 } else if (RAW != 0) {
8048 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8050 ctxt->sax->error(ctxt->userData,
8051 "extra content at the end of well balanced chunk\n");
8052 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008053 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008054 }
8055
8056 /*
8057 * SAX: end of the document processing.
8058 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008059 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008060 ctxt->sax->endDocument(ctxt->userData);
8061
8062 if (! ctxt->wellFormed) return(-1);
8063 return(0);
8064}
8065
8066/************************************************************************
8067 * *
8068 * Progressive parsing interfaces *
8069 * *
8070 ************************************************************************/
8071
8072/**
8073 * xmlParseLookupSequence:
8074 * @ctxt: an XML parser context
8075 * @first: the first char to lookup
8076 * @next: the next char to lookup or zero
8077 * @third: the next char to lookup or zero
8078 *
8079 * Try to find if a sequence (first, next, third) or just (first next) or
8080 * (first) is available in the input stream.
8081 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8082 * to avoid rescanning sequences of bytes, it DOES change the state of the
8083 * parser, do not use liberally.
8084 *
8085 * Returns the index to the current parsing point if the full sequence
8086 * is available, -1 otherwise.
8087 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008088static int
Owen Taylor3473f882001-02-23 17:55:21 +00008089xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8090 xmlChar next, xmlChar third) {
8091 int base, len;
8092 xmlParserInputPtr in;
8093 const xmlChar *buf;
8094
8095 in = ctxt->input;
8096 if (in == NULL) return(-1);
8097 base = in->cur - in->base;
8098 if (base < 0) return(-1);
8099 if (ctxt->checkIndex > base)
8100 base = ctxt->checkIndex;
8101 if (in->buf == NULL) {
8102 buf = in->base;
8103 len = in->length;
8104 } else {
8105 buf = in->buf->buffer->content;
8106 len = in->buf->buffer->use;
8107 }
8108 /* take into account the sequence length */
8109 if (third) len -= 2;
8110 else if (next) len --;
8111 for (;base < len;base++) {
8112 if (buf[base] == first) {
8113 if (third != 0) {
8114 if ((buf[base + 1] != next) ||
8115 (buf[base + 2] != third)) continue;
8116 } else if (next != 0) {
8117 if (buf[base + 1] != next) continue;
8118 }
8119 ctxt->checkIndex = 0;
8120#ifdef DEBUG_PUSH
8121 if (next == 0)
8122 xmlGenericError(xmlGenericErrorContext,
8123 "PP: lookup '%c' found at %d\n",
8124 first, base);
8125 else if (third == 0)
8126 xmlGenericError(xmlGenericErrorContext,
8127 "PP: lookup '%c%c' found at %d\n",
8128 first, next, base);
8129 else
8130 xmlGenericError(xmlGenericErrorContext,
8131 "PP: lookup '%c%c%c' found at %d\n",
8132 first, next, third, base);
8133#endif
8134 return(base - (in->cur - in->base));
8135 }
8136 }
8137 ctxt->checkIndex = base;
8138#ifdef DEBUG_PUSH
8139 if (next == 0)
8140 xmlGenericError(xmlGenericErrorContext,
8141 "PP: lookup '%c' failed\n", first);
8142 else if (third == 0)
8143 xmlGenericError(xmlGenericErrorContext,
8144 "PP: lookup '%c%c' failed\n", first, next);
8145 else
8146 xmlGenericError(xmlGenericErrorContext,
8147 "PP: lookup '%c%c%c' failed\n", first, next, third);
8148#endif
8149 return(-1);
8150}
8151
8152/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008153 * xmlParseGetLasts:
8154 * @ctxt: an XML parser context
8155 * @lastlt: pointer to store the last '<' from the input
8156 * @lastgt: pointer to store the last '>' from the input
8157 *
8158 * Lookup the last < and > in the current chunk
8159 */
8160static void
8161xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8162 const xmlChar **lastgt) {
8163 const xmlChar *tmp;
8164
8165 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8166 xmlGenericError(xmlGenericErrorContext,
8167 "Internal error: xmlParseGetLasts\n");
8168 return;
8169 }
8170 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
8171 tmp = ctxt->input->end;
8172 tmp--;
8173 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8174 (*tmp != '>')) tmp--;
8175 if (tmp < ctxt->input->base) {
8176 *lastlt = NULL;
8177 *lastgt = NULL;
8178 } else if (*tmp == '<') {
8179 *lastlt = tmp;
8180 tmp--;
8181 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8182 if (tmp < ctxt->input->base)
8183 *lastgt = NULL;
8184 else
8185 *lastgt = tmp;
8186 } else {
8187 *lastgt = tmp;
8188 tmp--;
8189 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8190 if (tmp < ctxt->input->base)
8191 *lastlt = NULL;
8192 else
8193 *lastlt = tmp;
8194 }
8195
8196 } else {
8197 *lastlt = NULL;
8198 *lastgt = NULL;
8199 }
8200}
8201/**
Owen Taylor3473f882001-02-23 17:55:21 +00008202 * xmlParseTryOrFinish:
8203 * @ctxt: an XML parser context
8204 * @terminate: last chunk indicator
8205 *
8206 * Try to progress on parsing
8207 *
8208 * Returns zero if no parsing was possible
8209 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008210static int
Owen Taylor3473f882001-02-23 17:55:21 +00008211xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8212 int ret = 0;
8213 int avail;
8214 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008215 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008216
8217#ifdef DEBUG_PUSH
8218 switch (ctxt->instate) {
8219 case XML_PARSER_EOF:
8220 xmlGenericError(xmlGenericErrorContext,
8221 "PP: try EOF\n"); break;
8222 case XML_PARSER_START:
8223 xmlGenericError(xmlGenericErrorContext,
8224 "PP: try START\n"); break;
8225 case XML_PARSER_MISC:
8226 xmlGenericError(xmlGenericErrorContext,
8227 "PP: try MISC\n");break;
8228 case XML_PARSER_COMMENT:
8229 xmlGenericError(xmlGenericErrorContext,
8230 "PP: try COMMENT\n");break;
8231 case XML_PARSER_PROLOG:
8232 xmlGenericError(xmlGenericErrorContext,
8233 "PP: try PROLOG\n");break;
8234 case XML_PARSER_START_TAG:
8235 xmlGenericError(xmlGenericErrorContext,
8236 "PP: try START_TAG\n");break;
8237 case XML_PARSER_CONTENT:
8238 xmlGenericError(xmlGenericErrorContext,
8239 "PP: try CONTENT\n");break;
8240 case XML_PARSER_CDATA_SECTION:
8241 xmlGenericError(xmlGenericErrorContext,
8242 "PP: try CDATA_SECTION\n");break;
8243 case XML_PARSER_END_TAG:
8244 xmlGenericError(xmlGenericErrorContext,
8245 "PP: try END_TAG\n");break;
8246 case XML_PARSER_ENTITY_DECL:
8247 xmlGenericError(xmlGenericErrorContext,
8248 "PP: try ENTITY_DECL\n");break;
8249 case XML_PARSER_ENTITY_VALUE:
8250 xmlGenericError(xmlGenericErrorContext,
8251 "PP: try ENTITY_VALUE\n");break;
8252 case XML_PARSER_ATTRIBUTE_VALUE:
8253 xmlGenericError(xmlGenericErrorContext,
8254 "PP: try ATTRIBUTE_VALUE\n");break;
8255 case XML_PARSER_DTD:
8256 xmlGenericError(xmlGenericErrorContext,
8257 "PP: try DTD\n");break;
8258 case XML_PARSER_EPILOG:
8259 xmlGenericError(xmlGenericErrorContext,
8260 "PP: try EPILOG\n");break;
8261 case XML_PARSER_PI:
8262 xmlGenericError(xmlGenericErrorContext,
8263 "PP: try PI\n");break;
8264 case XML_PARSER_IGNORE:
8265 xmlGenericError(xmlGenericErrorContext,
8266 "PP: try IGNORE\n");break;
8267 }
8268#endif
8269
Daniel Veillarda880b122003-04-21 21:36:41 +00008270 if (ctxt->input->cur - ctxt->input->base > 4096) {
8271 xmlSHRINK(ctxt);
8272 ctxt->checkIndex = 0;
8273 }
8274 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008275
Daniel Veillarda880b122003-04-21 21:36:41 +00008276 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008277 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8278 return(0);
8279
8280
Owen Taylor3473f882001-02-23 17:55:21 +00008281 /*
8282 * Pop-up of finished entities.
8283 */
8284 while ((RAW == 0) && (ctxt->inputNr > 1))
8285 xmlPopInput(ctxt);
8286
8287 if (ctxt->input ==NULL) break;
8288 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008289 avail = ctxt->input->length -
8290 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008291 else {
8292 /*
8293 * If we are operating on converted input, try to flush
8294 * remainng chars to avoid them stalling in the non-converted
8295 * buffer.
8296 */
8297 if ((ctxt->input->buf->raw != NULL) &&
8298 (ctxt->input->buf->raw->use > 0)) {
8299 int base = ctxt->input->base -
8300 ctxt->input->buf->buffer->content;
8301 int current = ctxt->input->cur - ctxt->input->base;
8302
8303 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8304 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8305 ctxt->input->cur = ctxt->input->base + current;
8306 ctxt->input->end =
8307 &ctxt->input->buf->buffer->content[
8308 ctxt->input->buf->buffer->use];
8309 }
8310 avail = ctxt->input->buf->buffer->use -
8311 (ctxt->input->cur - ctxt->input->base);
8312 }
Owen Taylor3473f882001-02-23 17:55:21 +00008313 if (avail < 1)
8314 goto done;
8315 switch (ctxt->instate) {
8316 case XML_PARSER_EOF:
8317 /*
8318 * Document parsing is done !
8319 */
8320 goto done;
8321 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008322 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8323 xmlChar start[4];
8324 xmlCharEncoding enc;
8325
8326 /*
8327 * Very first chars read from the document flow.
8328 */
8329 if (avail < 4)
8330 goto done;
8331
8332 /*
8333 * Get the 4 first bytes and decode the charset
8334 * if enc != XML_CHAR_ENCODING_NONE
8335 * plug some encoding conversion routines.
8336 */
8337 start[0] = RAW;
8338 start[1] = NXT(1);
8339 start[2] = NXT(2);
8340 start[3] = NXT(3);
8341 enc = xmlDetectCharEncoding(start, 4);
8342 if (enc != XML_CHAR_ENCODING_NONE) {
8343 xmlSwitchEncoding(ctxt, enc);
8344 }
8345 break;
8346 }
Owen Taylor3473f882001-02-23 17:55:21 +00008347
8348 cur = ctxt->input->cur[0];
8349 next = ctxt->input->cur[1];
8350 if (cur == 0) {
8351 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8352 ctxt->sax->setDocumentLocator(ctxt->userData,
8353 &xmlDefaultSAXLocator);
8354 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8356 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8357 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008358 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008359 ctxt->instate = XML_PARSER_EOF;
8360#ifdef DEBUG_PUSH
8361 xmlGenericError(xmlGenericErrorContext,
8362 "PP: entering EOF\n");
8363#endif
8364 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8365 ctxt->sax->endDocument(ctxt->userData);
8366 goto done;
8367 }
8368 if ((cur == '<') && (next == '?')) {
8369 /* PI or XML decl */
8370 if (avail < 5) return(ret);
8371 if ((!terminate) &&
8372 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8373 return(ret);
8374 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8375 ctxt->sax->setDocumentLocator(ctxt->userData,
8376 &xmlDefaultSAXLocator);
8377 if ((ctxt->input->cur[2] == 'x') &&
8378 (ctxt->input->cur[3] == 'm') &&
8379 (ctxt->input->cur[4] == 'l') &&
8380 (IS_BLANK(ctxt->input->cur[5]))) {
8381 ret += 5;
8382#ifdef DEBUG_PUSH
8383 xmlGenericError(xmlGenericErrorContext,
8384 "PP: Parsing XML Decl\n");
8385#endif
8386 xmlParseXMLDecl(ctxt);
8387 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8388 /*
8389 * The XML REC instructs us to stop parsing right
8390 * here
8391 */
8392 ctxt->instate = XML_PARSER_EOF;
8393 return(0);
8394 }
8395 ctxt->standalone = ctxt->input->standalone;
8396 if ((ctxt->encoding == NULL) &&
8397 (ctxt->input->encoding != NULL))
8398 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8399 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8400 (!ctxt->disableSAX))
8401 ctxt->sax->startDocument(ctxt->userData);
8402 ctxt->instate = XML_PARSER_MISC;
8403#ifdef DEBUG_PUSH
8404 xmlGenericError(xmlGenericErrorContext,
8405 "PP: entering MISC\n");
8406#endif
8407 } else {
8408 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8409 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8410 (!ctxt->disableSAX))
8411 ctxt->sax->startDocument(ctxt->userData);
8412 ctxt->instate = XML_PARSER_MISC;
8413#ifdef DEBUG_PUSH
8414 xmlGenericError(xmlGenericErrorContext,
8415 "PP: entering MISC\n");
8416#endif
8417 }
8418 } else {
8419 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8420 ctxt->sax->setDocumentLocator(ctxt->userData,
8421 &xmlDefaultSAXLocator);
8422 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8423 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8424 (!ctxt->disableSAX))
8425 ctxt->sax->startDocument(ctxt->userData);
8426 ctxt->instate = XML_PARSER_MISC;
8427#ifdef DEBUG_PUSH
8428 xmlGenericError(xmlGenericErrorContext,
8429 "PP: entering MISC\n");
8430#endif
8431 }
8432 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00008433 case XML_PARSER_START_TAG: {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008434 const xmlChar *name, *oldname;
Daniel Veillarda880b122003-04-21 21:36:41 +00008435
8436 if ((avail < 2) && (ctxt->inputNr == 1))
8437 goto done;
8438 cur = ctxt->input->cur[0];
8439 if (cur != '<') {
8440 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8442 ctxt->sax->error(ctxt->userData,
8443 "Start tag expect, '<' not found\n");
8444 ctxt->wellFormed = 0;
8445 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8446 ctxt->instate = XML_PARSER_EOF;
8447#ifdef DEBUG_PUSH
8448 xmlGenericError(xmlGenericErrorContext,
8449 "PP: entering EOF\n");
8450#endif
8451 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8452 ctxt->sax->endDocument(ctxt->userData);
8453 goto done;
8454 }
8455 if (!terminate) {
8456 if (ctxt->progressive) {
8457 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8458 goto done;
8459 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8460 goto done;
8461 }
8462 }
8463 if (ctxt->spaceNr == 0)
8464 spacePush(ctxt, -1);
8465 else
8466 spacePush(ctxt, *ctxt->space);
8467 name = xmlParseStartTag(ctxt);
8468 if (name == NULL) {
8469 spacePop(ctxt);
8470 ctxt->instate = XML_PARSER_EOF;
8471#ifdef DEBUG_PUSH
8472 xmlGenericError(xmlGenericErrorContext,
8473 "PP: entering EOF\n");
8474#endif
8475 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8476 ctxt->sax->endDocument(ctxt->userData);
8477 goto done;
8478 }
8479 namePush(ctxt, name);
8480
8481 /*
8482 * [ VC: Root Element Type ]
8483 * The Name in the document type declaration must match
8484 * the element type of the root element.
8485 */
8486 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8487 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8488 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8489
8490 /*
8491 * Check for an Empty Element.
8492 */
8493 if ((RAW == '/') && (NXT(1) == '>')) {
8494 SKIP(2);
8495 if ((ctxt->sax != NULL) &&
8496 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8497 ctxt->sax->endElement(ctxt->userData, name);
8498 oldname = namePop(ctxt);
8499 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00008500#ifdef DEBUG_STACK
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008501 if (oldname != NULL) {
8502 xmlGenericError(xmlGenericErrorContext,
8503 "Close: popped %s\n", oldname);
Daniel Veillarda880b122003-04-21 21:36:41 +00008504 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008505#endif
Daniel Veillarda880b122003-04-21 21:36:41 +00008506 if (ctxt->name == NULL) {
8507 ctxt->instate = XML_PARSER_EPILOG;
8508#ifdef DEBUG_PUSH
8509 xmlGenericError(xmlGenericErrorContext,
8510 "PP: entering EPILOG\n");
8511#endif
8512 } else {
8513 ctxt->instate = XML_PARSER_CONTENT;
8514#ifdef DEBUG_PUSH
8515 xmlGenericError(xmlGenericErrorContext,
8516 "PP: entering CONTENT\n");
8517#endif
8518 }
8519 break;
8520 }
8521 if (RAW == '>') {
8522 NEXT;
8523 } else {
8524 ctxt->errNo = XML_ERR_GT_REQUIRED;
8525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8526 ctxt->sax->error(ctxt->userData,
8527 "Couldn't find end of Start Tag %s\n",
8528 name);
8529 ctxt->wellFormed = 0;
8530 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8531
8532 /*
8533 * end of parsing of this node.
8534 */
8535 nodePop(ctxt);
8536 oldname = namePop(ctxt);
8537 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00008538#ifdef DEBUG_STACK
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008539 if (oldname != NULL) {
8540 xmlGenericError(xmlGenericErrorContext,
8541 "Close: popped %s\n", oldname);
Daniel Veillarda880b122003-04-21 21:36:41 +00008542 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008543#endif
Daniel Veillarda880b122003-04-21 21:36:41 +00008544 }
8545 ctxt->instate = XML_PARSER_CONTENT;
8546#ifdef DEBUG_PUSH
8547 xmlGenericError(xmlGenericErrorContext,
8548 "PP: entering CONTENT\n");
8549#endif
8550 break;
8551 }
8552 case XML_PARSER_CONTENT: {
8553 const xmlChar *test;
8554 unsigned int cons;
8555 if ((avail < 2) && (ctxt->inputNr == 1))
8556 goto done;
8557 cur = ctxt->input->cur[0];
8558 next = ctxt->input->cur[1];
8559
8560 test = CUR_PTR;
8561 cons = ctxt->input->consumed;
8562 if ((cur == '<') && (next == '/')) {
8563 ctxt->instate = XML_PARSER_END_TAG;
8564#ifdef DEBUG_PUSH
8565 xmlGenericError(xmlGenericErrorContext,
8566 "PP: entering END_TAG\n");
8567#endif
8568 break;
8569 } else if ((cur == '<') && (next == '?')) {
8570 if ((!terminate) &&
8571 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8572 goto done;
8573#ifdef DEBUG_PUSH
8574 xmlGenericError(xmlGenericErrorContext,
8575 "PP: Parsing PI\n");
8576#endif
8577 xmlParsePI(ctxt);
8578 } else if ((cur == '<') && (next != '!')) {
8579 ctxt->instate = XML_PARSER_START_TAG;
8580#ifdef DEBUG_PUSH
8581 xmlGenericError(xmlGenericErrorContext,
8582 "PP: entering START_TAG\n");
8583#endif
8584 break;
8585 } else if ((cur == '<') && (next == '!') &&
8586 (ctxt->input->cur[2] == '-') &&
8587 (ctxt->input->cur[3] == '-')) {
8588 if ((!terminate) &&
8589 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8590 goto done;
8591#ifdef DEBUG_PUSH
8592 xmlGenericError(xmlGenericErrorContext,
8593 "PP: Parsing Comment\n");
8594#endif
8595 xmlParseComment(ctxt);
8596 ctxt->instate = XML_PARSER_CONTENT;
8597 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8598 (ctxt->input->cur[2] == '[') &&
8599 (ctxt->input->cur[3] == 'C') &&
8600 (ctxt->input->cur[4] == 'D') &&
8601 (ctxt->input->cur[5] == 'A') &&
8602 (ctxt->input->cur[6] == 'T') &&
8603 (ctxt->input->cur[7] == 'A') &&
8604 (ctxt->input->cur[8] == '[')) {
8605 SKIP(9);
8606 ctxt->instate = XML_PARSER_CDATA_SECTION;
8607#ifdef DEBUG_PUSH
8608 xmlGenericError(xmlGenericErrorContext,
8609 "PP: entering CDATA_SECTION\n");
8610#endif
8611 break;
8612 } else if ((cur == '<') && (next == '!') &&
8613 (avail < 9)) {
8614 goto done;
8615 } else if (cur == '&') {
8616 if ((!terminate) &&
8617 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8618 goto done;
8619#ifdef DEBUG_PUSH
8620 xmlGenericError(xmlGenericErrorContext,
8621 "PP: Parsing Reference\n");
8622#endif
8623 xmlParseReference(ctxt);
8624 } else {
8625 /* TODO Avoid the extra copy, handle directly !!! */
8626 /*
8627 * Goal of the following test is:
8628 * - minimize calls to the SAX 'character' callback
8629 * when they are mergeable
8630 * - handle an problem for isBlank when we only parse
8631 * a sequence of blank chars and the next one is
8632 * not available to check against '<' presence.
8633 * - tries to homogenize the differences in SAX
8634 * callbacks between the push and pull versions
8635 * of the parser.
8636 */
8637 if ((ctxt->inputNr == 1) &&
8638 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8639 if (!terminate) {
8640 if (ctxt->progressive) {
8641 if ((lastlt == NULL) ||
8642 (ctxt->input->cur > lastlt))
8643 goto done;
8644 } else if (xmlParseLookupSequence(ctxt,
8645 '<', 0, 0) < 0) {
8646 goto done;
8647 }
8648 }
8649 }
8650 ctxt->checkIndex = 0;
8651#ifdef DEBUG_PUSH
8652 xmlGenericError(xmlGenericErrorContext,
8653 "PP: Parsing char data\n");
8654#endif
8655 xmlParseCharData(ctxt, 0);
8656 }
8657 /*
8658 * Pop-up of finished entities.
8659 */
8660 while ((RAW == 0) && (ctxt->inputNr > 1))
8661 xmlPopInput(ctxt);
8662 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8663 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8665 ctxt->sax->error(ctxt->userData,
8666 "detected an error in element content\n");
8667 ctxt->wellFormed = 0;
8668 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8669 ctxt->instate = XML_PARSER_EOF;
8670 break;
8671 }
8672 break;
8673 }
8674 case XML_PARSER_END_TAG:
8675 if (avail < 2)
8676 goto done;
8677 if (!terminate) {
8678 if (ctxt->progressive) {
8679 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8680 goto done;
8681 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8682 goto done;
8683 }
8684 }
8685 xmlParseEndTag(ctxt);
8686 if (ctxt->name == NULL) {
8687 ctxt->instate = XML_PARSER_EPILOG;
8688#ifdef DEBUG_PUSH
8689 xmlGenericError(xmlGenericErrorContext,
8690 "PP: entering EPILOG\n");
8691#endif
8692 } else {
8693 ctxt->instate = XML_PARSER_CONTENT;
8694#ifdef DEBUG_PUSH
8695 xmlGenericError(xmlGenericErrorContext,
8696 "PP: entering CONTENT\n");
8697#endif
8698 }
8699 break;
8700 case XML_PARSER_CDATA_SECTION: {
8701 /*
8702 * The Push mode need to have the SAX callback for
8703 * cdataBlock merge back contiguous callbacks.
8704 */
8705 int base;
8706
8707 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8708 if (base < 0) {
8709 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8710 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8711 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00008712 ctxt->sax->cdataBlock(ctxt->userData,
8713 ctxt->input->cur,
8714 XML_PARSER_BIG_BUFFER_SIZE);
8715 else if (ctxt->sax->characters != NULL)
8716 ctxt->sax->characters(ctxt->userData,
8717 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00008718 XML_PARSER_BIG_BUFFER_SIZE);
8719 }
8720 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8721 ctxt->checkIndex = 0;
8722 }
8723 goto done;
8724 } else {
8725 if ((ctxt->sax != NULL) && (base > 0) &&
8726 (!ctxt->disableSAX)) {
8727 if (ctxt->sax->cdataBlock != NULL)
8728 ctxt->sax->cdataBlock(ctxt->userData,
8729 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00008730 else if (ctxt->sax->characters != NULL)
8731 ctxt->sax->characters(ctxt->userData,
8732 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00008733 }
8734 SKIP(base + 3);
8735 ctxt->checkIndex = 0;
8736 ctxt->instate = XML_PARSER_CONTENT;
8737#ifdef DEBUG_PUSH
8738 xmlGenericError(xmlGenericErrorContext,
8739 "PP: entering CONTENT\n");
8740#endif
8741 }
8742 break;
8743 }
Owen Taylor3473f882001-02-23 17:55:21 +00008744 case XML_PARSER_MISC:
8745 SKIP_BLANKS;
8746 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008747 avail = ctxt->input->length -
8748 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00008749 else
Daniel Veillarda880b122003-04-21 21:36:41 +00008750 avail = ctxt->input->buf->buffer->use -
8751 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00008752 if (avail < 2)
8753 goto done;
8754 cur = ctxt->input->cur[0];
8755 next = ctxt->input->cur[1];
8756 if ((cur == '<') && (next == '?')) {
8757 if ((!terminate) &&
8758 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8759 goto done;
8760#ifdef DEBUG_PUSH
8761 xmlGenericError(xmlGenericErrorContext,
8762 "PP: Parsing PI\n");
8763#endif
8764 xmlParsePI(ctxt);
8765 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00008766 (ctxt->input->cur[2] == '-') &&
8767 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008768 if ((!terminate) &&
8769 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8770 goto done;
8771#ifdef DEBUG_PUSH
8772 xmlGenericError(xmlGenericErrorContext,
8773 "PP: Parsing Comment\n");
8774#endif
8775 xmlParseComment(ctxt);
8776 ctxt->instate = XML_PARSER_MISC;
8777 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00008778 (ctxt->input->cur[2] == 'D') &&
8779 (ctxt->input->cur[3] == 'O') &&
8780 (ctxt->input->cur[4] == 'C') &&
8781 (ctxt->input->cur[5] == 'T') &&
8782 (ctxt->input->cur[6] == 'Y') &&
8783 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008784 (ctxt->input->cur[8] == 'E')) {
8785 if ((!terminate) &&
8786 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8787 goto done;
8788#ifdef DEBUG_PUSH
8789 xmlGenericError(xmlGenericErrorContext,
8790 "PP: Parsing internal subset\n");
8791#endif
8792 ctxt->inSubset = 1;
8793 xmlParseDocTypeDecl(ctxt);
8794 if (RAW == '[') {
8795 ctxt->instate = XML_PARSER_DTD;
8796#ifdef DEBUG_PUSH
8797 xmlGenericError(xmlGenericErrorContext,
8798 "PP: entering DTD\n");
8799#endif
8800 } else {
8801 /*
8802 * Create and update the external subset.
8803 */
8804 ctxt->inSubset = 2;
8805 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8806 (ctxt->sax->externalSubset != NULL))
8807 ctxt->sax->externalSubset(ctxt->userData,
8808 ctxt->intSubName, ctxt->extSubSystem,
8809 ctxt->extSubURI);
8810 ctxt->inSubset = 0;
8811 ctxt->instate = XML_PARSER_PROLOG;
8812#ifdef DEBUG_PUSH
8813 xmlGenericError(xmlGenericErrorContext,
8814 "PP: entering PROLOG\n");
8815#endif
8816 }
8817 } else if ((cur == '<') && (next == '!') &&
8818 (avail < 9)) {
8819 goto done;
8820 } else {
8821 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00008822 ctxt->progressive = 1;
8823 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00008824#ifdef DEBUG_PUSH
8825 xmlGenericError(xmlGenericErrorContext,
8826 "PP: entering START_TAG\n");
8827#endif
8828 }
8829 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008830 case XML_PARSER_PROLOG:
8831 SKIP_BLANKS;
8832 if (ctxt->input->buf == NULL)
8833 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8834 else
8835 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8836 if (avail < 2)
8837 goto done;
8838 cur = ctxt->input->cur[0];
8839 next = ctxt->input->cur[1];
8840 if ((cur == '<') && (next == '?')) {
8841 if ((!terminate) &&
8842 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8843 goto done;
8844#ifdef DEBUG_PUSH
8845 xmlGenericError(xmlGenericErrorContext,
8846 "PP: Parsing PI\n");
8847#endif
8848 xmlParsePI(ctxt);
8849 } else if ((cur == '<') && (next == '!') &&
8850 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8851 if ((!terminate) &&
8852 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8853 goto done;
8854#ifdef DEBUG_PUSH
8855 xmlGenericError(xmlGenericErrorContext,
8856 "PP: Parsing Comment\n");
8857#endif
8858 xmlParseComment(ctxt);
8859 ctxt->instate = XML_PARSER_PROLOG;
8860 } else if ((cur == '<') && (next == '!') &&
8861 (avail < 4)) {
8862 goto done;
8863 } else {
8864 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00008865 ctxt->progressive = 1;
8866 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00008867#ifdef DEBUG_PUSH
8868 xmlGenericError(xmlGenericErrorContext,
8869 "PP: entering START_TAG\n");
8870#endif
8871 }
8872 break;
8873 case XML_PARSER_EPILOG:
8874 SKIP_BLANKS;
8875 if (ctxt->input->buf == NULL)
8876 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8877 else
8878 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8879 if (avail < 2)
8880 goto done;
8881 cur = ctxt->input->cur[0];
8882 next = ctxt->input->cur[1];
8883 if ((cur == '<') && (next == '?')) {
8884 if ((!terminate) &&
8885 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8886 goto done;
8887#ifdef DEBUG_PUSH
8888 xmlGenericError(xmlGenericErrorContext,
8889 "PP: Parsing PI\n");
8890#endif
8891 xmlParsePI(ctxt);
8892 ctxt->instate = XML_PARSER_EPILOG;
8893 } else if ((cur == '<') && (next == '!') &&
8894 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8895 if ((!terminate) &&
8896 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8897 goto done;
8898#ifdef DEBUG_PUSH
8899 xmlGenericError(xmlGenericErrorContext,
8900 "PP: Parsing Comment\n");
8901#endif
8902 xmlParseComment(ctxt);
8903 ctxt->instate = XML_PARSER_EPILOG;
8904 } else if ((cur == '<') && (next == '!') &&
8905 (avail < 4)) {
8906 goto done;
8907 } else {
8908 ctxt->errNo = XML_ERR_DOCUMENT_END;
8909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8910 ctxt->sax->error(ctxt->userData,
8911 "Extra content at the end of the document\n");
8912 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008913 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008914 ctxt->instate = XML_PARSER_EOF;
8915#ifdef DEBUG_PUSH
8916 xmlGenericError(xmlGenericErrorContext,
8917 "PP: entering EOF\n");
8918#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008919 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008920 ctxt->sax->endDocument(ctxt->userData);
8921 goto done;
8922 }
8923 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008924 case XML_PARSER_DTD: {
8925 /*
8926 * Sorry but progressive parsing of the internal subset
8927 * is not expected to be supported. We first check that
8928 * the full content of the internal subset is available and
8929 * the parsing is launched only at that point.
8930 * Internal subset ends up with "']' S? '>'" in an unescaped
8931 * section and not in a ']]>' sequence which are conditional
8932 * sections (whoever argued to keep that crap in XML deserve
8933 * a place in hell !).
8934 */
8935 int base, i;
8936 xmlChar *buf;
8937 xmlChar quote = 0;
8938
8939 base = ctxt->input->cur - ctxt->input->base;
8940 if (base < 0) return(0);
8941 if (ctxt->checkIndex > base)
8942 base = ctxt->checkIndex;
8943 buf = ctxt->input->buf->buffer->content;
8944 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8945 base++) {
8946 if (quote != 0) {
8947 if (buf[base] == quote)
8948 quote = 0;
8949 continue;
8950 }
8951 if (buf[base] == '"') {
8952 quote = '"';
8953 continue;
8954 }
8955 if (buf[base] == '\'') {
8956 quote = '\'';
8957 continue;
8958 }
8959 if (buf[base] == ']') {
8960 if ((unsigned int) base +1 >=
8961 ctxt->input->buf->buffer->use)
8962 break;
8963 if (buf[base + 1] == ']') {
8964 /* conditional crap, skip both ']' ! */
8965 base++;
8966 continue;
8967 }
8968 for (i = 0;
8969 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8970 i++) {
8971 if (buf[base + i] == '>')
8972 goto found_end_int_subset;
8973 }
8974 break;
8975 }
8976 }
8977 /*
8978 * We didn't found the end of the Internal subset
8979 */
8980 if (quote == 0)
8981 ctxt->checkIndex = base;
8982#ifdef DEBUG_PUSH
8983 if (next == 0)
8984 xmlGenericError(xmlGenericErrorContext,
8985 "PP: lookup of int subset end filed\n");
8986#endif
8987 goto done;
8988
8989found_end_int_subset:
8990 xmlParseInternalSubset(ctxt);
8991 ctxt->inSubset = 2;
8992 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8993 (ctxt->sax->externalSubset != NULL))
8994 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8995 ctxt->extSubSystem, ctxt->extSubURI);
8996 ctxt->inSubset = 0;
8997 ctxt->instate = XML_PARSER_PROLOG;
8998 ctxt->checkIndex = 0;
8999#ifdef DEBUG_PUSH
9000 xmlGenericError(xmlGenericErrorContext,
9001 "PP: entering PROLOG\n");
9002#endif
9003 break;
9004 }
9005 case XML_PARSER_COMMENT:
9006 xmlGenericError(xmlGenericErrorContext,
9007 "PP: internal error, state == COMMENT\n");
9008 ctxt->instate = XML_PARSER_CONTENT;
9009#ifdef DEBUG_PUSH
9010 xmlGenericError(xmlGenericErrorContext,
9011 "PP: entering CONTENT\n");
9012#endif
9013 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009014 case XML_PARSER_IGNORE:
9015 xmlGenericError(xmlGenericErrorContext,
9016 "PP: internal error, state == IGNORE");
9017 ctxt->instate = XML_PARSER_DTD;
9018#ifdef DEBUG_PUSH
9019 xmlGenericError(xmlGenericErrorContext,
9020 "PP: entering DTD\n");
9021#endif
9022 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009023 case XML_PARSER_PI:
9024 xmlGenericError(xmlGenericErrorContext,
9025 "PP: internal error, state == PI\n");
9026 ctxt->instate = XML_PARSER_CONTENT;
9027#ifdef DEBUG_PUSH
9028 xmlGenericError(xmlGenericErrorContext,
9029 "PP: entering CONTENT\n");
9030#endif
9031 break;
9032 case XML_PARSER_ENTITY_DECL:
9033 xmlGenericError(xmlGenericErrorContext,
9034 "PP: internal error, state == ENTITY_DECL\n");
9035 ctxt->instate = XML_PARSER_DTD;
9036#ifdef DEBUG_PUSH
9037 xmlGenericError(xmlGenericErrorContext,
9038 "PP: entering DTD\n");
9039#endif
9040 break;
9041 case XML_PARSER_ENTITY_VALUE:
9042 xmlGenericError(xmlGenericErrorContext,
9043 "PP: internal error, state == ENTITY_VALUE\n");
9044 ctxt->instate = XML_PARSER_CONTENT;
9045#ifdef DEBUG_PUSH
9046 xmlGenericError(xmlGenericErrorContext,
9047 "PP: entering DTD\n");
9048#endif
9049 break;
9050 case XML_PARSER_ATTRIBUTE_VALUE:
9051 xmlGenericError(xmlGenericErrorContext,
9052 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9053 ctxt->instate = XML_PARSER_START_TAG;
9054#ifdef DEBUG_PUSH
9055 xmlGenericError(xmlGenericErrorContext,
9056 "PP: entering START_TAG\n");
9057#endif
9058 break;
9059 case XML_PARSER_SYSTEM_LITERAL:
9060 xmlGenericError(xmlGenericErrorContext,
9061 "PP: internal error, state == SYSTEM_LITERAL\n");
9062 ctxt->instate = XML_PARSER_START_TAG;
9063#ifdef DEBUG_PUSH
9064 xmlGenericError(xmlGenericErrorContext,
9065 "PP: entering START_TAG\n");
9066#endif
9067 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009068 case XML_PARSER_PUBLIC_LITERAL:
9069 xmlGenericError(xmlGenericErrorContext,
9070 "PP: internal error, state == PUBLIC_LITERAL\n");
9071 ctxt->instate = XML_PARSER_START_TAG;
9072#ifdef DEBUG_PUSH
9073 xmlGenericError(xmlGenericErrorContext,
9074 "PP: entering START_TAG\n");
9075#endif
9076 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009077 }
9078 }
9079done:
9080#ifdef DEBUG_PUSH
9081 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9082#endif
9083 return(ret);
9084}
9085
9086/**
Owen Taylor3473f882001-02-23 17:55:21 +00009087 * xmlParseChunk:
9088 * @ctxt: an XML parser context
9089 * @chunk: an char array
9090 * @size: the size in byte of the chunk
9091 * @terminate: last chunk indicator
9092 *
9093 * Parse a Chunk of memory
9094 *
9095 * Returns zero if no error, the xmlParserErrors otherwise.
9096 */
9097int
9098xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9099 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009100 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9101 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009102 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9103 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9104 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9105 int cur = ctxt->input->cur - ctxt->input->base;
9106
9107 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9108 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9109 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009110 ctxt->input->end =
9111 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009112#ifdef DEBUG_PUSH
9113 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9114#endif
9115
Daniel Veillarda880b122003-04-21 21:36:41 +00009116#if 0
Owen Taylor3473f882001-02-23 17:55:21 +00009117 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9118 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda880b122003-04-21 21:36:41 +00009119#endif
Owen Taylor3473f882001-02-23 17:55:21 +00009120 } else if (ctxt->instate != XML_PARSER_EOF) {
9121 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9122 xmlParserInputBufferPtr in = ctxt->input->buf;
9123 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9124 (in->raw != NULL)) {
9125 int nbchars;
9126
9127 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9128 if (nbchars < 0) {
9129 xmlGenericError(xmlGenericErrorContext,
9130 "xmlParseChunk: encoder error\n");
9131 return(XML_ERR_INVALID_ENCODING);
9132 }
9133 }
9134 }
9135 }
9136 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009137 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9138 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009139 if (terminate) {
9140 /*
9141 * Check for termination
9142 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009143 int avail = 0;
9144 if (ctxt->input->buf == NULL)
9145 avail = ctxt->input->length -
9146 (ctxt->input->cur - ctxt->input->base);
9147 else
9148 avail = ctxt->input->buf->buffer->use -
9149 (ctxt->input->cur - ctxt->input->base);
9150
Owen Taylor3473f882001-02-23 17:55:21 +00009151 if ((ctxt->instate != XML_PARSER_EOF) &&
9152 (ctxt->instate != XML_PARSER_EPILOG)) {
9153 ctxt->errNo = XML_ERR_DOCUMENT_END;
9154 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9155 ctxt->sax->error(ctxt->userData,
9156 "Extra content at the end of the document\n");
9157 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009158 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009159 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009160 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9161 ctxt->errNo = XML_ERR_DOCUMENT_END;
9162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9163 ctxt->sax->error(ctxt->userData,
9164 "Extra content at the end of the document\n");
9165 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009166 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009167
9168 }
Owen Taylor3473f882001-02-23 17:55:21 +00009169 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009170 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009171 ctxt->sax->endDocument(ctxt->userData);
9172 }
9173 ctxt->instate = XML_PARSER_EOF;
9174 }
9175 return((xmlParserErrors) ctxt->errNo);
9176}
9177
9178/************************************************************************
9179 * *
9180 * I/O front end functions to the parser *
9181 * *
9182 ************************************************************************/
9183
9184/**
9185 * xmlStopParser:
9186 * @ctxt: an XML parser context
9187 *
9188 * Blocks further parser processing
9189 */
9190void
9191xmlStopParser(xmlParserCtxtPtr ctxt) {
9192 ctxt->instate = XML_PARSER_EOF;
9193 if (ctxt->input != NULL)
9194 ctxt->input->cur = BAD_CAST"";
9195}
9196
9197/**
9198 * xmlCreatePushParserCtxt:
9199 * @sax: a SAX handler
9200 * @user_data: The user data returned on SAX callbacks
9201 * @chunk: a pointer to an array of chars
9202 * @size: number of chars in the array
9203 * @filename: an optional file name or URI
9204 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009205 * Create a parser context for using the XML parser in push mode.
9206 * If @buffer and @size are non-NULL, the data is used to detect
9207 * the encoding. The remaining characters will be parsed so they
9208 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009209 * To allow content encoding detection, @size should be >= 4
9210 * The value of @filename is used for fetching external entities
9211 * and error/warning reports.
9212 *
9213 * Returns the new parser context or NULL
9214 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009215
Owen Taylor3473f882001-02-23 17:55:21 +00009216xmlParserCtxtPtr
9217xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9218 const char *chunk, int size, const char *filename) {
9219 xmlParserCtxtPtr ctxt;
9220 xmlParserInputPtr inputStream;
9221 xmlParserInputBufferPtr buf;
9222 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9223
9224 /*
9225 * plug some encoding conversion routines
9226 */
9227 if ((chunk != NULL) && (size >= 4))
9228 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9229
9230 buf = xmlAllocParserInputBuffer(enc);
9231 if (buf == NULL) return(NULL);
9232
9233 ctxt = xmlNewParserCtxt();
9234 if (ctxt == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009235 xmlGenericError(xmlGenericErrorContext,
9236 "xml parser: out of memory\n");
9237 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009238 return(NULL);
9239 }
9240 if (sax != NULL) {
9241 if (ctxt->sax != &xmlDefaultSAXHandler)
9242 xmlFree(ctxt->sax);
9243 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9244 if (ctxt->sax == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009245 xmlGenericError(xmlGenericErrorContext,
9246 "xml parser: out of memory\n");
9247 xmlFreeParserInputBuffer(buf);
9248 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009249 return(NULL);
9250 }
9251 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9252 if (user_data != NULL)
9253 ctxt->userData = user_data;
9254 }
9255 if (filename == NULL) {
9256 ctxt->directory = NULL;
9257 } else {
9258 ctxt->directory = xmlParserGetDirectory(filename);
9259 }
9260
9261 inputStream = xmlNewInputStream(ctxt);
9262 if (inputStream == NULL) {
9263 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009264 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009265 return(NULL);
9266 }
9267
9268 if (filename == NULL)
9269 inputStream->filename = NULL;
9270 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009271 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009272 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009273 inputStream->buf = buf;
9274 inputStream->base = inputStream->buf->buffer->content;
9275 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009276 inputStream->end =
9277 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009278
9279 inputPush(ctxt, inputStream);
9280
9281 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9282 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009283 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9284 int cur = ctxt->input->cur - ctxt->input->base;
9285
Owen Taylor3473f882001-02-23 17:55:21 +00009286 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009287
9288 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9289 ctxt->input->cur = ctxt->input->base + cur;
9290 ctxt->input->end =
9291 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009292#ifdef DEBUG_PUSH
9293 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9294#endif
9295 }
9296
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009297 if (enc != XML_CHAR_ENCODING_NONE) {
9298 xmlSwitchEncoding(ctxt, enc);
9299 }
9300
Owen Taylor3473f882001-02-23 17:55:21 +00009301 return(ctxt);
9302}
9303
9304/**
9305 * xmlCreateIOParserCtxt:
9306 * @sax: a SAX handler
9307 * @user_data: The user data returned on SAX callbacks
9308 * @ioread: an I/O read function
9309 * @ioclose: an I/O close function
9310 * @ioctx: an I/O handler
9311 * @enc: the charset encoding if known
9312 *
9313 * Create a parser context for using the XML parser with an existing
9314 * I/O stream
9315 *
9316 * Returns the new parser context or NULL
9317 */
9318xmlParserCtxtPtr
9319xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9320 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9321 void *ioctx, xmlCharEncoding enc) {
9322 xmlParserCtxtPtr ctxt;
9323 xmlParserInputPtr inputStream;
9324 xmlParserInputBufferPtr buf;
9325
9326 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9327 if (buf == NULL) return(NULL);
9328
9329 ctxt = xmlNewParserCtxt();
9330 if (ctxt == NULL) {
9331 xmlFree(buf);
9332 return(NULL);
9333 }
9334 if (sax != NULL) {
9335 if (ctxt->sax != &xmlDefaultSAXHandler)
9336 xmlFree(ctxt->sax);
9337 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9338 if (ctxt->sax == NULL) {
9339 xmlFree(buf);
9340 xmlFree(ctxt);
9341 return(NULL);
9342 }
9343 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9344 if (user_data != NULL)
9345 ctxt->userData = user_data;
9346 }
9347
9348 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9349 if (inputStream == NULL) {
9350 xmlFreeParserCtxt(ctxt);
9351 return(NULL);
9352 }
9353 inputPush(ctxt, inputStream);
9354
9355 return(ctxt);
9356}
9357
9358/************************************************************************
9359 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009360 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009361 * *
9362 ************************************************************************/
9363
9364/**
9365 * xmlIOParseDTD:
9366 * @sax: the SAX handler block or NULL
9367 * @input: an Input Buffer
9368 * @enc: the charset encoding if known
9369 *
9370 * Load and parse a DTD
9371 *
9372 * Returns the resulting xmlDtdPtr or NULL in case of error.
9373 * @input will be freed at parsing end.
9374 */
9375
9376xmlDtdPtr
9377xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9378 xmlCharEncoding enc) {
9379 xmlDtdPtr ret = NULL;
9380 xmlParserCtxtPtr ctxt;
9381 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009382 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009383
9384 if (input == NULL)
9385 return(NULL);
9386
9387 ctxt = xmlNewParserCtxt();
9388 if (ctxt == NULL) {
9389 return(NULL);
9390 }
9391
9392 /*
9393 * Set-up the SAX context
9394 */
9395 if (sax != NULL) {
9396 if (ctxt->sax != NULL)
9397 xmlFree(ctxt->sax);
9398 ctxt->sax = sax;
9399 ctxt->userData = NULL;
9400 }
9401
9402 /*
9403 * generate a parser input from the I/O handler
9404 */
9405
9406 pinput = xmlNewIOInputStream(ctxt, input, enc);
9407 if (pinput == NULL) {
9408 if (sax != NULL) ctxt->sax = NULL;
9409 xmlFreeParserCtxt(ctxt);
9410 return(NULL);
9411 }
9412
9413 /*
9414 * plug some encoding conversion routines here.
9415 */
9416 xmlPushInput(ctxt, pinput);
9417
9418 pinput->filename = NULL;
9419 pinput->line = 1;
9420 pinput->col = 1;
9421 pinput->base = ctxt->input->cur;
9422 pinput->cur = ctxt->input->cur;
9423 pinput->free = NULL;
9424
9425 /*
9426 * let's parse that entity knowing it's an external subset.
9427 */
9428 ctxt->inSubset = 2;
9429 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9430 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9431 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009432
9433 if (enc == XML_CHAR_ENCODING_NONE) {
9434 /*
9435 * Get the 4 first bytes and decode the charset
9436 * if enc != XML_CHAR_ENCODING_NONE
9437 * plug some encoding conversion routines.
9438 */
9439 start[0] = RAW;
9440 start[1] = NXT(1);
9441 start[2] = NXT(2);
9442 start[3] = NXT(3);
9443 enc = xmlDetectCharEncoding(start, 4);
9444 if (enc != XML_CHAR_ENCODING_NONE) {
9445 xmlSwitchEncoding(ctxt, enc);
9446 }
9447 }
9448
Owen Taylor3473f882001-02-23 17:55:21 +00009449 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9450
9451 if (ctxt->myDoc != NULL) {
9452 if (ctxt->wellFormed) {
9453 ret = ctxt->myDoc->extSubset;
9454 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +00009455 if (ret != NULL) {
9456 xmlNodePtr tmp;
9457
9458 ret->doc = NULL;
9459 tmp = ret->children;
9460 while (tmp != NULL) {
9461 tmp->doc = NULL;
9462 tmp = tmp->next;
9463 }
9464 }
Owen Taylor3473f882001-02-23 17:55:21 +00009465 } else {
9466 ret = NULL;
9467 }
9468 xmlFreeDoc(ctxt->myDoc);
9469 ctxt->myDoc = NULL;
9470 }
9471 if (sax != NULL) ctxt->sax = NULL;
9472 xmlFreeParserCtxt(ctxt);
9473
9474 return(ret);
9475}
9476
9477/**
9478 * xmlSAXParseDTD:
9479 * @sax: the SAX handler block
9480 * @ExternalID: a NAME* containing the External ID of the DTD
9481 * @SystemID: a NAME* containing the URL to the DTD
9482 *
9483 * Load and parse an external subset.
9484 *
9485 * Returns the resulting xmlDtdPtr or NULL in case of error.
9486 */
9487
9488xmlDtdPtr
9489xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9490 const xmlChar *SystemID) {
9491 xmlDtdPtr ret = NULL;
9492 xmlParserCtxtPtr ctxt;
9493 xmlParserInputPtr input = NULL;
9494 xmlCharEncoding enc;
9495
9496 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9497
9498 ctxt = xmlNewParserCtxt();
9499 if (ctxt == NULL) {
9500 return(NULL);
9501 }
9502
9503 /*
9504 * Set-up the SAX context
9505 */
9506 if (sax != NULL) {
9507 if (ctxt->sax != NULL)
9508 xmlFree(ctxt->sax);
9509 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +00009510 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +00009511 }
9512
9513 /*
9514 * Ask the Entity resolver to load the damn thing
9515 */
9516
9517 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +00009518 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +00009519 if (input == NULL) {
9520 if (sax != NULL) ctxt->sax = NULL;
9521 xmlFreeParserCtxt(ctxt);
9522 return(NULL);
9523 }
9524
9525 /*
9526 * plug some encoding conversion routines here.
9527 */
9528 xmlPushInput(ctxt, input);
9529 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9530 xmlSwitchEncoding(ctxt, enc);
9531
9532 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +00009533 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +00009534 input->line = 1;
9535 input->col = 1;
9536 input->base = ctxt->input->cur;
9537 input->cur = ctxt->input->cur;
9538 input->free = NULL;
9539
9540 /*
9541 * let's parse that entity knowing it's an external subset.
9542 */
9543 ctxt->inSubset = 2;
9544 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9545 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9546 ExternalID, SystemID);
9547 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9548
9549 if (ctxt->myDoc != NULL) {
9550 if (ctxt->wellFormed) {
9551 ret = ctxt->myDoc->extSubset;
9552 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +00009553 if (ret != NULL) {
9554 xmlNodePtr tmp;
9555
9556 ret->doc = NULL;
9557 tmp = ret->children;
9558 while (tmp != NULL) {
9559 tmp->doc = NULL;
9560 tmp = tmp->next;
9561 }
9562 }
Owen Taylor3473f882001-02-23 17:55:21 +00009563 } else {
9564 ret = NULL;
9565 }
9566 xmlFreeDoc(ctxt->myDoc);
9567 ctxt->myDoc = NULL;
9568 }
9569 if (sax != NULL) ctxt->sax = NULL;
9570 xmlFreeParserCtxt(ctxt);
9571
9572 return(ret);
9573}
9574
9575/**
9576 * xmlParseDTD:
9577 * @ExternalID: a NAME* containing the External ID of the DTD
9578 * @SystemID: a NAME* containing the URL to the DTD
9579 *
9580 * Load and parse an external subset.
9581 *
9582 * Returns the resulting xmlDtdPtr or NULL in case of error.
9583 */
9584
9585xmlDtdPtr
9586xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9587 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9588}
9589
9590/************************************************************************
9591 * *
9592 * Front ends when parsing an Entity *
9593 * *
9594 ************************************************************************/
9595
9596/**
Owen Taylor3473f882001-02-23 17:55:21 +00009597 * xmlParseCtxtExternalEntity:
9598 * @ctx: the existing parsing context
9599 * @URL: the URL for the entity to load
9600 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009601 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009602 *
9603 * Parse an external general entity within an existing parsing context
9604 * An external general parsed entity is well-formed if it matches the
9605 * production labeled extParsedEnt.
9606 *
9607 * [78] extParsedEnt ::= TextDecl? content
9608 *
9609 * Returns 0 if the entity is well formed, -1 in case of args problem and
9610 * the parser error code otherwise
9611 */
9612
9613int
9614xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009615 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009616 xmlParserCtxtPtr ctxt;
9617 xmlDocPtr newDoc;
9618 xmlSAXHandlerPtr oldsax = NULL;
9619 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009620 xmlChar start[4];
9621 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009622
9623 if (ctx->depth > 40) {
9624 return(XML_ERR_ENTITY_LOOP);
9625 }
9626
Daniel Veillardcda96922001-08-21 10:56:31 +00009627 if (lst != NULL)
9628 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009629 if ((URL == NULL) && (ID == NULL))
9630 return(-1);
9631 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9632 return(-1);
9633
9634
9635 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9636 if (ctxt == NULL) return(-1);
9637 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +00009638 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +00009639 oldsax = ctxt->sax;
9640 ctxt->sax = ctx->sax;
9641 newDoc = xmlNewDoc(BAD_CAST "1.0");
9642 if (newDoc == NULL) {
9643 xmlFreeParserCtxt(ctxt);
9644 return(-1);
9645 }
9646 if (ctx->myDoc != NULL) {
9647 newDoc->intSubset = ctx->myDoc->intSubset;
9648 newDoc->extSubset = ctx->myDoc->extSubset;
9649 }
9650 if (ctx->myDoc->URL != NULL) {
9651 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9652 }
9653 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9654 if (newDoc->children == NULL) {
9655 ctxt->sax = oldsax;
9656 xmlFreeParserCtxt(ctxt);
9657 newDoc->intSubset = NULL;
9658 newDoc->extSubset = NULL;
9659 xmlFreeDoc(newDoc);
9660 return(-1);
9661 }
9662 nodePush(ctxt, newDoc->children);
9663 if (ctx->myDoc == NULL) {
9664 ctxt->myDoc = newDoc;
9665 } else {
9666 ctxt->myDoc = ctx->myDoc;
9667 newDoc->children->doc = ctx->myDoc;
9668 }
9669
Daniel Veillard87a764e2001-06-20 17:41:10 +00009670 /*
9671 * Get the 4 first bytes and decode the charset
9672 * if enc != XML_CHAR_ENCODING_NONE
9673 * plug some encoding conversion routines.
9674 */
9675 GROW
9676 start[0] = RAW;
9677 start[1] = NXT(1);
9678 start[2] = NXT(2);
9679 start[3] = NXT(3);
9680 enc = xmlDetectCharEncoding(start, 4);
9681 if (enc != XML_CHAR_ENCODING_NONE) {
9682 xmlSwitchEncoding(ctxt, enc);
9683 }
9684
Owen Taylor3473f882001-02-23 17:55:21 +00009685 /*
9686 * Parse a possible text declaration first
9687 */
Owen Taylor3473f882001-02-23 17:55:21 +00009688 if ((RAW == '<') && (NXT(1) == '?') &&
9689 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9690 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9691 xmlParseTextDecl(ctxt);
9692 }
9693
9694 /*
9695 * Doing validity checking on chunk doesn't make sense
9696 */
9697 ctxt->instate = XML_PARSER_CONTENT;
9698 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009699 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009700 ctxt->loadsubset = ctx->loadsubset;
9701 ctxt->depth = ctx->depth + 1;
9702 ctxt->replaceEntities = ctx->replaceEntities;
9703 if (ctxt->validate) {
9704 ctxt->vctxt.error = ctx->vctxt.error;
9705 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009706 } else {
9707 ctxt->vctxt.error = NULL;
9708 ctxt->vctxt.warning = NULL;
9709 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009710 ctxt->vctxt.nodeTab = NULL;
9711 ctxt->vctxt.nodeNr = 0;
9712 ctxt->vctxt.nodeMax = 0;
9713 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009714
9715 xmlParseContent(ctxt);
9716
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009717 ctx->validate = ctxt->validate;
9718 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009719 if ((RAW == '<') && (NXT(1) == '/')) {
9720 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9722 ctxt->sax->error(ctxt->userData,
9723 "chunk is not well balanced\n");
9724 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009725 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009726 } else if (RAW != 0) {
9727 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9729 ctxt->sax->error(ctxt->userData,
9730 "extra content at the end of well balanced chunk\n");
9731 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009732 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009733 }
9734 if (ctxt->node != newDoc->children) {
9735 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9737 ctxt->sax->error(ctxt->userData,
9738 "chunk is not well balanced\n");
9739 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009740 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009741 }
9742
9743 if (!ctxt->wellFormed) {
9744 if (ctxt->errNo == 0)
9745 ret = 1;
9746 else
9747 ret = ctxt->errNo;
9748 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009749 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009750 xmlNodePtr cur;
9751
9752 /*
9753 * Return the newly created nodeset after unlinking it from
9754 * they pseudo parent.
9755 */
9756 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009757 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009758 while (cur != NULL) {
9759 cur->parent = NULL;
9760 cur = cur->next;
9761 }
9762 newDoc->children->children = NULL;
9763 }
9764 ret = 0;
9765 }
9766 ctxt->sax = oldsax;
9767 xmlFreeParserCtxt(ctxt);
9768 newDoc->intSubset = NULL;
9769 newDoc->extSubset = NULL;
9770 xmlFreeDoc(newDoc);
9771
9772 return(ret);
9773}
9774
9775/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009776 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009777 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009778 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009779 * @sax: the SAX handler bloc (possibly NULL)
9780 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9781 * @depth: Used for loop detection, use 0
9782 * @URL: the URL for the entity to load
9783 * @ID: the System ID for the entity to load
9784 * @list: the return value for the set of parsed nodes
9785 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009786 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009787 *
9788 * Returns 0 if the entity is well formed, -1 in case of args problem and
9789 * the parser error code otherwise
9790 */
9791
Daniel Veillard257d9102001-05-08 10:41:44 +00009792static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009793xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9794 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009795 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009796 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009797 xmlParserCtxtPtr ctxt;
9798 xmlDocPtr newDoc;
9799 xmlSAXHandlerPtr oldsax = NULL;
9800 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009801 xmlChar start[4];
9802 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009803
9804 if (depth > 40) {
9805 return(XML_ERR_ENTITY_LOOP);
9806 }
9807
9808
9809
9810 if (list != NULL)
9811 *list = NULL;
9812 if ((URL == NULL) && (ID == NULL))
9813 return(-1);
9814 if (doc == NULL) /* @@ relax but check for dereferences */
9815 return(-1);
9816
9817
9818 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9819 if (ctxt == NULL) return(-1);
9820 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009821 if (oldctxt != NULL) {
9822 ctxt->_private = oldctxt->_private;
9823 ctxt->loadsubset = oldctxt->loadsubset;
9824 ctxt->validate = oldctxt->validate;
9825 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009826 ctxt->record_info = oldctxt->record_info;
9827 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
9828 ctxt->node_seq.length = oldctxt->node_seq.length;
9829 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009830 } else {
9831 /*
9832 * Doing validity checking on chunk without context
9833 * doesn't make sense
9834 */
9835 ctxt->_private = NULL;
9836 ctxt->validate = 0;
9837 ctxt->external = 2;
9838 ctxt->loadsubset = 0;
9839 }
Owen Taylor3473f882001-02-23 17:55:21 +00009840 if (sax != NULL) {
9841 oldsax = ctxt->sax;
9842 ctxt->sax = sax;
9843 if (user_data != NULL)
9844 ctxt->userData = user_data;
9845 }
9846 newDoc = xmlNewDoc(BAD_CAST "1.0");
9847 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009848 ctxt->node_seq.maximum = 0;
9849 ctxt->node_seq.length = 0;
9850 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009851 xmlFreeParserCtxt(ctxt);
9852 return(-1);
9853 }
9854 if (doc != NULL) {
9855 newDoc->intSubset = doc->intSubset;
9856 newDoc->extSubset = doc->extSubset;
9857 }
9858 if (doc->URL != NULL) {
9859 newDoc->URL = xmlStrdup(doc->URL);
9860 }
9861 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9862 if (newDoc->children == NULL) {
9863 if (sax != NULL)
9864 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009865 ctxt->node_seq.maximum = 0;
9866 ctxt->node_seq.length = 0;
9867 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009868 xmlFreeParserCtxt(ctxt);
9869 newDoc->intSubset = NULL;
9870 newDoc->extSubset = NULL;
9871 xmlFreeDoc(newDoc);
9872 return(-1);
9873 }
9874 nodePush(ctxt, newDoc->children);
9875 if (doc == NULL) {
9876 ctxt->myDoc = newDoc;
9877 } else {
9878 ctxt->myDoc = doc;
9879 newDoc->children->doc = doc;
9880 }
9881
Daniel Veillard87a764e2001-06-20 17:41:10 +00009882 /*
9883 * Get the 4 first bytes and decode the charset
9884 * if enc != XML_CHAR_ENCODING_NONE
9885 * plug some encoding conversion routines.
9886 */
9887 GROW;
9888 start[0] = RAW;
9889 start[1] = NXT(1);
9890 start[2] = NXT(2);
9891 start[3] = NXT(3);
9892 enc = xmlDetectCharEncoding(start, 4);
9893 if (enc != XML_CHAR_ENCODING_NONE) {
9894 xmlSwitchEncoding(ctxt, enc);
9895 }
9896
Owen Taylor3473f882001-02-23 17:55:21 +00009897 /*
9898 * Parse a possible text declaration first
9899 */
Owen Taylor3473f882001-02-23 17:55:21 +00009900 if ((RAW == '<') && (NXT(1) == '?') &&
9901 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9902 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9903 xmlParseTextDecl(ctxt);
9904 }
9905
Owen Taylor3473f882001-02-23 17:55:21 +00009906 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009907 ctxt->depth = depth;
9908
9909 xmlParseContent(ctxt);
9910
Daniel Veillard561b7f82002-03-20 21:55:57 +00009911 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009912 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9914 ctxt->sax->error(ctxt->userData,
9915 "chunk is not well balanced\n");
9916 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009917 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009918 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009919 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9921 ctxt->sax->error(ctxt->userData,
9922 "extra content at the end of well balanced chunk\n");
9923 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009924 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009925 }
9926 if (ctxt->node != newDoc->children) {
9927 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9929 ctxt->sax->error(ctxt->userData,
9930 "chunk is not well balanced\n");
9931 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009932 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009933 }
9934
9935 if (!ctxt->wellFormed) {
9936 if (ctxt->errNo == 0)
9937 ret = 1;
9938 else
9939 ret = ctxt->errNo;
9940 } else {
9941 if (list != NULL) {
9942 xmlNodePtr cur;
9943
9944 /*
9945 * Return the newly created nodeset after unlinking it from
9946 * they pseudo parent.
9947 */
9948 cur = newDoc->children->children;
9949 *list = cur;
9950 while (cur != NULL) {
9951 cur->parent = NULL;
9952 cur = cur->next;
9953 }
9954 newDoc->children->children = NULL;
9955 }
9956 ret = 0;
9957 }
9958 if (sax != NULL)
9959 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +00009960 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
9961 oldctxt->node_seq.length = ctxt->node_seq.length;
9962 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009963 ctxt->node_seq.maximum = 0;
9964 ctxt->node_seq.length = 0;
9965 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009966 xmlFreeParserCtxt(ctxt);
9967 newDoc->intSubset = NULL;
9968 newDoc->extSubset = NULL;
9969 xmlFreeDoc(newDoc);
9970
9971 return(ret);
9972}
9973
9974/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009975 * xmlParseExternalEntity:
9976 * @doc: the document the chunk pertains to
9977 * @sax: the SAX handler bloc (possibly NULL)
9978 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9979 * @depth: Used for loop detection, use 0
9980 * @URL: the URL for the entity to load
9981 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009982 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009983 *
9984 * Parse an external general entity
9985 * An external general parsed entity is well-formed if it matches the
9986 * production labeled extParsedEnt.
9987 *
9988 * [78] extParsedEnt ::= TextDecl? content
9989 *
9990 * Returns 0 if the entity is well formed, -1 in case of args problem and
9991 * the parser error code otherwise
9992 */
9993
9994int
9995xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009996 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009997 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009998 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009999}
10000
10001/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010002 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010003 * @doc: the document the chunk pertains to
10004 * @sax: the SAX handler bloc (possibly NULL)
10005 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10006 * @depth: Used for loop detection, use 0
10007 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010008 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010009 *
10010 * Parse a well-balanced chunk of an XML document
10011 * called by the parser
10012 * The allowed sequence for the Well Balanced Chunk is the one defined by
10013 * the content production in the XML grammar:
10014 *
10015 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10016 *
10017 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10018 * the parser error code otherwise
10019 */
10020
10021int
10022xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010023 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010024 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10025 depth, string, lst, 0 );
10026}
10027
10028/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010029 * xmlParseBalancedChunkMemoryInternal:
10030 * @oldctxt: the existing parsing context
10031 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10032 * @user_data: the user data field for the parser context
10033 * @lst: the return value for the set of parsed nodes
10034 *
10035 *
10036 * Parse a well-balanced chunk of an XML document
10037 * called by the parser
10038 * The allowed sequence for the Well Balanced Chunk is the one defined by
10039 * the content production in the XML grammar:
10040 *
10041 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042 *
10043 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10044 * the parser error code otherwise
10045 *
10046 * In case recover is set to 1, the nodelist will not be empty even if
10047 * the parsed chunk is not well balanced.
10048 */
10049static int
10050xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10051 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10052 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010053 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010054 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010055 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010056 int size;
10057 int ret = 0;
10058
10059 if (oldctxt->depth > 40) {
10060 return(XML_ERR_ENTITY_LOOP);
10061 }
10062
10063
10064 if (lst != NULL)
10065 *lst = NULL;
10066 if (string == NULL)
10067 return(-1);
10068
10069 size = xmlStrlen(string);
10070
10071 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10072 if (ctxt == NULL) return(-1);
10073 if (user_data != NULL)
10074 ctxt->userData = user_data;
10075 else
10076 ctxt->userData = ctxt;
10077
10078 oldsax = ctxt->sax;
10079 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010080 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010081 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010082 newDoc = xmlNewDoc(BAD_CAST "1.0");
10083 if (newDoc == NULL) {
10084 ctxt->sax = oldsax;
10085 xmlFreeParserCtxt(ctxt);
10086 return(-1);
10087 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010088 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010089 } else {
10090 ctxt->myDoc = oldctxt->myDoc;
10091 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010092 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010093 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010094 BAD_CAST "pseudoroot", NULL);
10095 if (ctxt->myDoc->children == NULL) {
10096 ctxt->sax = oldsax;
10097 xmlFreeParserCtxt(ctxt);
10098 if (newDoc != NULL)
10099 xmlFreeDoc(newDoc);
10100 return(-1);
10101 }
10102 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010103 ctxt->instate = XML_PARSER_CONTENT;
10104 ctxt->depth = oldctxt->depth + 1;
10105
Daniel Veillard328f48c2002-11-15 15:24:34 +000010106 ctxt->validate = 0;
10107 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010108 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10109 /*
10110 * ID/IDREF registration will be done in xmlValidateElement below
10111 */
10112 ctxt->loadsubset |= XML_SKIP_IDS;
10113 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010114
Daniel Veillard68e9e742002-11-16 15:35:11 +000010115 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010116 if ((RAW == '<') && (NXT(1) == '/')) {
10117 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10119 ctxt->sax->error(ctxt->userData,
10120 "chunk is not well balanced\n");
10121 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010122 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010123 } else if (RAW != 0) {
10124 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10126 ctxt->sax->error(ctxt->userData,
10127 "extra content at the end of well balanced chunk\n");
10128 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010129 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010130 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010131 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010132 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10134 ctxt->sax->error(ctxt->userData,
10135 "chunk is not well balanced\n");
10136 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010137 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010138 }
10139
10140 if (!ctxt->wellFormed) {
10141 if (ctxt->errNo == 0)
10142 ret = 1;
10143 else
10144 ret = ctxt->errNo;
10145 } else {
10146 ret = 0;
10147 }
10148
10149 if ((lst != NULL) && (ret == 0)) {
10150 xmlNodePtr cur;
10151
10152 /*
10153 * Return the newly created nodeset after unlinking it from
10154 * they pseudo parent.
10155 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010156 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010157 *lst = cur;
10158 while (cur != NULL) {
Daniel Veillard8d589042003-02-04 15:07:21 +000010159 if (oldctxt->validate && oldctxt->wellFormed &&
10160 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10161 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10162 oldctxt->myDoc, cur);
10163 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010164 cur->parent = NULL;
10165 cur = cur->next;
10166 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010167 ctxt->myDoc->children->children = NULL;
10168 }
10169 if (ctxt->myDoc != NULL) {
10170 xmlFreeNode(ctxt->myDoc->children);
10171 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010172 }
10173
10174 ctxt->sax = oldsax;
10175 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010176 if (newDoc != NULL)
10177 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010178
10179 return(ret);
10180}
10181
10182/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010183 * xmlParseBalancedChunkMemoryRecover:
10184 * @doc: the document the chunk pertains to
10185 * @sax: the SAX handler bloc (possibly NULL)
10186 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10187 * @depth: Used for loop detection, use 0
10188 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10189 * @lst: the return value for the set of parsed nodes
10190 * @recover: return nodes even if the data is broken (use 0)
10191 *
10192 *
10193 * Parse a well-balanced chunk of an XML document
10194 * called by the parser
10195 * The allowed sequence for the Well Balanced Chunk is the one defined by
10196 * the content production in the XML grammar:
10197 *
10198 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10199 *
10200 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10201 * the parser error code otherwise
10202 *
10203 * In case recover is set to 1, the nodelist will not be empty even if
10204 * the parsed chunk is not well balanced.
10205 */
10206int
10207xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10208 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10209 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010210 xmlParserCtxtPtr ctxt;
10211 xmlDocPtr newDoc;
10212 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010213 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010214 int size;
10215 int ret = 0;
10216
10217 if (depth > 40) {
10218 return(XML_ERR_ENTITY_LOOP);
10219 }
10220
10221
Daniel Veillardcda96922001-08-21 10:56:31 +000010222 if (lst != NULL)
10223 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010224 if (string == NULL)
10225 return(-1);
10226
10227 size = xmlStrlen(string);
10228
10229 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10230 if (ctxt == NULL) return(-1);
10231 ctxt->userData = ctxt;
10232 if (sax != NULL) {
10233 oldsax = ctxt->sax;
10234 ctxt->sax = sax;
10235 if (user_data != NULL)
10236 ctxt->userData = user_data;
10237 }
10238 newDoc = xmlNewDoc(BAD_CAST "1.0");
10239 if (newDoc == NULL) {
10240 xmlFreeParserCtxt(ctxt);
10241 return(-1);
10242 }
10243 if (doc != NULL) {
10244 newDoc->intSubset = doc->intSubset;
10245 newDoc->extSubset = doc->extSubset;
10246 }
10247 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10248 if (newDoc->children == NULL) {
10249 if (sax != NULL)
10250 ctxt->sax = oldsax;
10251 xmlFreeParserCtxt(ctxt);
10252 newDoc->intSubset = NULL;
10253 newDoc->extSubset = NULL;
10254 xmlFreeDoc(newDoc);
10255 return(-1);
10256 }
10257 nodePush(ctxt, newDoc->children);
10258 if (doc == NULL) {
10259 ctxt->myDoc = newDoc;
10260 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010261 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010262 newDoc->children->doc = doc;
10263 }
10264 ctxt->instate = XML_PARSER_CONTENT;
10265 ctxt->depth = depth;
10266
10267 /*
10268 * Doing validity checking on chunk doesn't make sense
10269 */
10270 ctxt->validate = 0;
10271 ctxt->loadsubset = 0;
10272
Daniel Veillardb39bc392002-10-26 19:29:51 +000010273 if ( doc != NULL ){
10274 content = doc->children;
10275 doc->children = NULL;
10276 xmlParseContent(ctxt);
10277 doc->children = content;
10278 }
10279 else {
10280 xmlParseContent(ctxt);
10281 }
Owen Taylor3473f882001-02-23 17:55:21 +000010282 if ((RAW == '<') && (NXT(1) == '/')) {
10283 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10285 ctxt->sax->error(ctxt->userData,
10286 "chunk is not well balanced\n");
10287 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010288 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010289 } else if (RAW != 0) {
10290 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10292 ctxt->sax->error(ctxt->userData,
10293 "extra content at the end of well balanced chunk\n");
10294 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010295 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010296 }
10297 if (ctxt->node != newDoc->children) {
10298 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10300 ctxt->sax->error(ctxt->userData,
10301 "chunk is not well balanced\n");
10302 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010303 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010304 }
10305
10306 if (!ctxt->wellFormed) {
10307 if (ctxt->errNo == 0)
10308 ret = 1;
10309 else
10310 ret = ctxt->errNo;
10311 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010312 ret = 0;
10313 }
10314
10315 if (lst != NULL && (ret == 0 || recover == 1)) {
10316 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010317
10318 /*
10319 * Return the newly created nodeset after unlinking it from
10320 * they pseudo parent.
10321 */
10322 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010323 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010324 while (cur != NULL) {
10325 cur->parent = NULL;
10326 cur = cur->next;
10327 }
10328 newDoc->children->children = NULL;
10329 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010330
Owen Taylor3473f882001-02-23 17:55:21 +000010331 if (sax != NULL)
10332 ctxt->sax = oldsax;
10333 xmlFreeParserCtxt(ctxt);
10334 newDoc->intSubset = NULL;
10335 newDoc->extSubset = NULL;
10336 xmlFreeDoc(newDoc);
10337
10338 return(ret);
10339}
10340
10341/**
10342 * xmlSAXParseEntity:
10343 * @sax: the SAX handler block
10344 * @filename: the filename
10345 *
10346 * parse an XML external entity out of context and build a tree.
10347 * It use the given SAX function block to handle the parsing callback.
10348 * If sax is NULL, fallback to the default DOM tree building routines.
10349 *
10350 * [78] extParsedEnt ::= TextDecl? content
10351 *
10352 * This correspond to a "Well Balanced" chunk
10353 *
10354 * Returns the resulting document tree
10355 */
10356
10357xmlDocPtr
10358xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10359 xmlDocPtr ret;
10360 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010361
10362 ctxt = xmlCreateFileParserCtxt(filename);
10363 if (ctxt == NULL) {
10364 return(NULL);
10365 }
10366 if (sax != NULL) {
10367 if (ctxt->sax != NULL)
10368 xmlFree(ctxt->sax);
10369 ctxt->sax = sax;
10370 ctxt->userData = NULL;
10371 }
10372
Owen Taylor3473f882001-02-23 17:55:21 +000010373 xmlParseExtParsedEnt(ctxt);
10374
10375 if (ctxt->wellFormed)
10376 ret = ctxt->myDoc;
10377 else {
10378 ret = NULL;
10379 xmlFreeDoc(ctxt->myDoc);
10380 ctxt->myDoc = NULL;
10381 }
10382 if (sax != NULL)
10383 ctxt->sax = NULL;
10384 xmlFreeParserCtxt(ctxt);
10385
10386 return(ret);
10387}
10388
10389/**
10390 * xmlParseEntity:
10391 * @filename: the filename
10392 *
10393 * parse an XML external entity out of context and build a tree.
10394 *
10395 * [78] extParsedEnt ::= TextDecl? content
10396 *
10397 * This correspond to a "Well Balanced" chunk
10398 *
10399 * Returns the resulting document tree
10400 */
10401
10402xmlDocPtr
10403xmlParseEntity(const char *filename) {
10404 return(xmlSAXParseEntity(NULL, filename));
10405}
10406
10407/**
10408 * xmlCreateEntityParserCtxt:
10409 * @URL: the entity URL
10410 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010411 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010412 *
10413 * Create a parser context for an external entity
10414 * Automatic support for ZLIB/Compress compressed document is provided
10415 * by default if found at compile-time.
10416 *
10417 * Returns the new parser context or NULL
10418 */
10419xmlParserCtxtPtr
10420xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10421 const xmlChar *base) {
10422 xmlParserCtxtPtr ctxt;
10423 xmlParserInputPtr inputStream;
10424 char *directory = NULL;
10425 xmlChar *uri;
10426
10427 ctxt = xmlNewParserCtxt();
10428 if (ctxt == NULL) {
10429 return(NULL);
10430 }
10431
10432 uri = xmlBuildURI(URL, base);
10433
10434 if (uri == NULL) {
10435 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10436 if (inputStream == NULL) {
10437 xmlFreeParserCtxt(ctxt);
10438 return(NULL);
10439 }
10440
10441 inputPush(ctxt, inputStream);
10442
10443 if ((ctxt->directory == NULL) && (directory == NULL))
10444 directory = xmlParserGetDirectory((char *)URL);
10445 if ((ctxt->directory == NULL) && (directory != NULL))
10446 ctxt->directory = directory;
10447 } else {
10448 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10449 if (inputStream == NULL) {
10450 xmlFree(uri);
10451 xmlFreeParserCtxt(ctxt);
10452 return(NULL);
10453 }
10454
10455 inputPush(ctxt, inputStream);
10456
10457 if ((ctxt->directory == NULL) && (directory == NULL))
10458 directory = xmlParserGetDirectory((char *)uri);
10459 if ((ctxt->directory == NULL) && (directory != NULL))
10460 ctxt->directory = directory;
10461 xmlFree(uri);
10462 }
10463
10464 return(ctxt);
10465}
10466
10467/************************************************************************
10468 * *
10469 * Front ends when parsing from a file *
10470 * *
10471 ************************************************************************/
10472
10473/**
10474 * xmlCreateFileParserCtxt:
10475 * @filename: the filename
10476 *
10477 * Create a parser context for a file content.
10478 * Automatic support for ZLIB/Compress compressed document is provided
10479 * by default if found at compile-time.
10480 *
10481 * Returns the new parser context or NULL
10482 */
10483xmlParserCtxtPtr
10484xmlCreateFileParserCtxt(const char *filename)
10485{
10486 xmlParserCtxtPtr ctxt;
10487 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010488 char *directory = NULL;
10489
Owen Taylor3473f882001-02-23 17:55:21 +000010490 ctxt = xmlNewParserCtxt();
10491 if (ctxt == NULL) {
10492 if (xmlDefaultSAXHandler.error != NULL) {
10493 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10494 }
10495 return(NULL);
10496 }
10497
Igor Zlatkovicce076162003-02-23 13:39:39 +000010498
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000010499 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010500 if (inputStream == NULL) {
10501 xmlFreeParserCtxt(ctxt);
10502 return(NULL);
10503 }
10504
Owen Taylor3473f882001-02-23 17:55:21 +000010505 inputPush(ctxt, inputStream);
10506 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010507 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010508 if ((ctxt->directory == NULL) && (directory != NULL))
10509 ctxt->directory = directory;
10510
10511 return(ctxt);
10512}
10513
10514/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010515 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010516 * @sax: the SAX handler block
10517 * @filename: the filename
10518 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10519 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010520 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010521 *
10522 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10523 * compressed document is provided by default if found at compile-time.
10524 * It use the given SAX function block to handle the parsing callback.
10525 * If sax is NULL, fallback to the default DOM tree building routines.
10526 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010527 * User data (void *) is stored within the parser context in the
10528 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010529 *
Owen Taylor3473f882001-02-23 17:55:21 +000010530 * Returns the resulting document tree
10531 */
10532
10533xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010534xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10535 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010536 xmlDocPtr ret;
10537 xmlParserCtxtPtr ctxt;
10538 char *directory = NULL;
10539
Daniel Veillard635ef722001-10-29 11:48:19 +000010540 xmlInitParser();
10541
Owen Taylor3473f882001-02-23 17:55:21 +000010542 ctxt = xmlCreateFileParserCtxt(filename);
10543 if (ctxt == NULL) {
10544 return(NULL);
10545 }
10546 if (sax != NULL) {
10547 if (ctxt->sax != NULL)
10548 xmlFree(ctxt->sax);
10549 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010550 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010551 if (data!=NULL) {
10552 ctxt->_private=data;
10553 }
Owen Taylor3473f882001-02-23 17:55:21 +000010554
10555 if ((ctxt->directory == NULL) && (directory == NULL))
10556 directory = xmlParserGetDirectory(filename);
10557 if ((ctxt->directory == NULL) && (directory != NULL))
10558 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10559
Daniel Veillarddad3f682002-11-17 16:47:27 +000010560 ctxt->recovery = recovery;
10561
Owen Taylor3473f882001-02-23 17:55:21 +000010562 xmlParseDocument(ctxt);
10563
10564 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10565 else {
10566 ret = NULL;
10567 xmlFreeDoc(ctxt->myDoc);
10568 ctxt->myDoc = NULL;
10569 }
10570 if (sax != NULL)
10571 ctxt->sax = NULL;
10572 xmlFreeParserCtxt(ctxt);
10573
10574 return(ret);
10575}
10576
10577/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010578 * xmlSAXParseFile:
10579 * @sax: the SAX handler block
10580 * @filename: the filename
10581 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10582 * documents
10583 *
10584 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10585 * compressed document is provided by default if found at compile-time.
10586 * It use the given SAX function block to handle the parsing callback.
10587 * If sax is NULL, fallback to the default DOM tree building routines.
10588 *
10589 * Returns the resulting document tree
10590 */
10591
10592xmlDocPtr
10593xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10594 int recovery) {
10595 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10596}
10597
10598/**
Owen Taylor3473f882001-02-23 17:55:21 +000010599 * xmlRecoverDoc:
10600 * @cur: a pointer to an array of xmlChar
10601 *
10602 * parse an XML in-memory document and build a tree.
10603 * In the case the document is not Well Formed, a tree is built anyway
10604 *
10605 * Returns the resulting document tree
10606 */
10607
10608xmlDocPtr
10609xmlRecoverDoc(xmlChar *cur) {
10610 return(xmlSAXParseDoc(NULL, cur, 1));
10611}
10612
10613/**
10614 * xmlParseFile:
10615 * @filename: the filename
10616 *
10617 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10618 * compressed document is provided by default if found at compile-time.
10619 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010620 * Returns the resulting document tree if the file was wellformed,
10621 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010622 */
10623
10624xmlDocPtr
10625xmlParseFile(const char *filename) {
10626 return(xmlSAXParseFile(NULL, filename, 0));
10627}
10628
10629/**
10630 * xmlRecoverFile:
10631 * @filename: the filename
10632 *
10633 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10634 * compressed document is provided by default if found at compile-time.
10635 * In the case the document is not Well Formed, a tree is built anyway
10636 *
10637 * Returns the resulting document tree
10638 */
10639
10640xmlDocPtr
10641xmlRecoverFile(const char *filename) {
10642 return(xmlSAXParseFile(NULL, filename, 1));
10643}
10644
10645
10646/**
10647 * xmlSetupParserForBuffer:
10648 * @ctxt: an XML parser context
10649 * @buffer: a xmlChar * buffer
10650 * @filename: a file name
10651 *
10652 * Setup the parser context to parse a new buffer; Clears any prior
10653 * contents from the parser context. The buffer parameter must not be
10654 * NULL, but the filename parameter can be
10655 */
10656void
10657xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10658 const char* filename)
10659{
10660 xmlParserInputPtr input;
10661
10662 input = xmlNewInputStream(ctxt);
10663 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010664 xmlGenericError(xmlGenericErrorContext,
10665 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010666 xmlFree(ctxt);
10667 return;
10668 }
10669
10670 xmlClearParserCtxt(ctxt);
10671 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000010672 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010673 input->base = buffer;
10674 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010675 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010676 inputPush(ctxt, input);
10677}
10678
10679/**
10680 * xmlSAXUserParseFile:
10681 * @sax: a SAX handler
10682 * @user_data: The user data returned on SAX callbacks
10683 * @filename: a file name
10684 *
10685 * parse an XML file and call the given SAX handler routines.
10686 * Automatic support for ZLIB/Compress compressed document is provided
10687 *
10688 * Returns 0 in case of success or a error number otherwise
10689 */
10690int
10691xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10692 const char *filename) {
10693 int ret = 0;
10694 xmlParserCtxtPtr ctxt;
10695
10696 ctxt = xmlCreateFileParserCtxt(filename);
10697 if (ctxt == NULL) return -1;
10698 if (ctxt->sax != &xmlDefaultSAXHandler)
10699 xmlFree(ctxt->sax);
10700 ctxt->sax = sax;
10701 if (user_data != NULL)
10702 ctxt->userData = user_data;
10703
10704 xmlParseDocument(ctxt);
10705
10706 if (ctxt->wellFormed)
10707 ret = 0;
10708 else {
10709 if (ctxt->errNo != 0)
10710 ret = ctxt->errNo;
10711 else
10712 ret = -1;
10713 }
10714 if (sax != NULL)
10715 ctxt->sax = NULL;
10716 xmlFreeParserCtxt(ctxt);
10717
10718 return ret;
10719}
10720
10721/************************************************************************
10722 * *
10723 * Front ends when parsing from memory *
10724 * *
10725 ************************************************************************/
10726
10727/**
10728 * xmlCreateMemoryParserCtxt:
10729 * @buffer: a pointer to a char array
10730 * @size: the size of the array
10731 *
10732 * Create a parser context for an XML in-memory document.
10733 *
10734 * Returns the new parser context or NULL
10735 */
10736xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010737xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010738 xmlParserCtxtPtr ctxt;
10739 xmlParserInputPtr input;
10740 xmlParserInputBufferPtr buf;
10741
10742 if (buffer == NULL)
10743 return(NULL);
10744 if (size <= 0)
10745 return(NULL);
10746
10747 ctxt = xmlNewParserCtxt();
10748 if (ctxt == NULL)
10749 return(NULL);
10750
10751 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010752 if (buf == NULL) {
10753 xmlFreeParserCtxt(ctxt);
10754 return(NULL);
10755 }
Owen Taylor3473f882001-02-23 17:55:21 +000010756
10757 input = xmlNewInputStream(ctxt);
10758 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010759 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010760 xmlFreeParserCtxt(ctxt);
10761 return(NULL);
10762 }
10763
10764 input->filename = NULL;
10765 input->buf = buf;
10766 input->base = input->buf->buffer->content;
10767 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010768 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010769
10770 inputPush(ctxt, input);
10771 return(ctxt);
10772}
10773
10774/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010775 * xmlSAXParseMemoryWithData:
10776 * @sax: the SAX handler block
10777 * @buffer: an pointer to a char array
10778 * @size: the size of the array
10779 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10780 * documents
10781 * @data: the userdata
10782 *
10783 * parse an XML in-memory block and use the given SAX function block
10784 * to handle the parsing callback. If sax is NULL, fallback to the default
10785 * DOM tree building routines.
10786 *
10787 * User data (void *) is stored within the parser context in the
10788 * context's _private member, so it is available nearly everywhere in libxml
10789 *
10790 * Returns the resulting document tree
10791 */
10792
10793xmlDocPtr
10794xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10795 int size, int recovery, void *data) {
10796 xmlDocPtr ret;
10797 xmlParserCtxtPtr ctxt;
10798
10799 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10800 if (ctxt == NULL) return(NULL);
10801 if (sax != NULL) {
10802 if (ctxt->sax != NULL)
10803 xmlFree(ctxt->sax);
10804 ctxt->sax = sax;
10805 }
10806 if (data!=NULL) {
10807 ctxt->_private=data;
10808 }
10809
Daniel Veillardadba5f12003-04-04 16:09:01 +000010810 ctxt->recovery = recovery;
10811
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010812 xmlParseDocument(ctxt);
10813
10814 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10815 else {
10816 ret = NULL;
10817 xmlFreeDoc(ctxt->myDoc);
10818 ctxt->myDoc = NULL;
10819 }
10820 if (sax != NULL)
10821 ctxt->sax = NULL;
10822 xmlFreeParserCtxt(ctxt);
10823
10824 return(ret);
10825}
10826
10827/**
Owen Taylor3473f882001-02-23 17:55:21 +000010828 * xmlSAXParseMemory:
10829 * @sax: the SAX handler block
10830 * @buffer: an pointer to a char array
10831 * @size: the size of the array
10832 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10833 * documents
10834 *
10835 * parse an XML in-memory block and use the given SAX function block
10836 * to handle the parsing callback. If sax is NULL, fallback to the default
10837 * DOM tree building routines.
10838 *
10839 * Returns the resulting document tree
10840 */
10841xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010842xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10843 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010844 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010845}
10846
10847/**
10848 * xmlParseMemory:
10849 * @buffer: an pointer to a char array
10850 * @size: the size of the array
10851 *
10852 * parse an XML in-memory block and build a tree.
10853 *
10854 * Returns the resulting document tree
10855 */
10856
Daniel Veillard50822cb2001-07-26 20:05:51 +000010857xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010858 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10859}
10860
10861/**
10862 * xmlRecoverMemory:
10863 * @buffer: an pointer to a char array
10864 * @size: the size of the array
10865 *
10866 * parse an XML in-memory block and build a tree.
10867 * In the case the document is not Well Formed, a tree is built anyway
10868 *
10869 * Returns the resulting document tree
10870 */
10871
Daniel Veillard50822cb2001-07-26 20:05:51 +000010872xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010873 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10874}
10875
10876/**
10877 * xmlSAXUserParseMemory:
10878 * @sax: a SAX handler
10879 * @user_data: The user data returned on SAX callbacks
10880 * @buffer: an in-memory XML document input
10881 * @size: the length of the XML document in bytes
10882 *
10883 * A better SAX parsing routine.
10884 * parse an XML in-memory buffer and call the given SAX handler routines.
10885 *
10886 * Returns 0 in case of success or a error number otherwise
10887 */
10888int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010889 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010890 int ret = 0;
10891 xmlParserCtxtPtr ctxt;
10892 xmlSAXHandlerPtr oldsax = NULL;
10893
Daniel Veillard9e923512002-08-14 08:48:52 +000010894 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010895 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10896 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010897 oldsax = ctxt->sax;
10898 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010899 if (user_data != NULL)
10900 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010901
10902 xmlParseDocument(ctxt);
10903
10904 if (ctxt->wellFormed)
10905 ret = 0;
10906 else {
10907 if (ctxt->errNo != 0)
10908 ret = ctxt->errNo;
10909 else
10910 ret = -1;
10911 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010912 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010913 xmlFreeParserCtxt(ctxt);
10914
10915 return ret;
10916}
10917
10918/**
10919 * xmlCreateDocParserCtxt:
10920 * @cur: a pointer to an array of xmlChar
10921 *
10922 * Creates a parser context for an XML in-memory document.
10923 *
10924 * Returns the new parser context or NULL
10925 */
10926xmlParserCtxtPtr
10927xmlCreateDocParserCtxt(xmlChar *cur) {
10928 int len;
10929
10930 if (cur == NULL)
10931 return(NULL);
10932 len = xmlStrlen(cur);
10933 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10934}
10935
10936/**
10937 * xmlSAXParseDoc:
10938 * @sax: the SAX handler block
10939 * @cur: a pointer to an array of xmlChar
10940 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10941 * documents
10942 *
10943 * parse an XML in-memory document and build a tree.
10944 * It use the given SAX function block to handle the parsing callback.
10945 * If sax is NULL, fallback to the default DOM tree building routines.
10946 *
10947 * Returns the resulting document tree
10948 */
10949
10950xmlDocPtr
10951xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10952 xmlDocPtr ret;
10953 xmlParserCtxtPtr ctxt;
10954
10955 if (cur == NULL) return(NULL);
10956
10957
10958 ctxt = xmlCreateDocParserCtxt(cur);
10959 if (ctxt == NULL) return(NULL);
10960 if (sax != NULL) {
10961 ctxt->sax = sax;
10962 ctxt->userData = NULL;
10963 }
10964
10965 xmlParseDocument(ctxt);
10966 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10967 else {
10968 ret = NULL;
10969 xmlFreeDoc(ctxt->myDoc);
10970 ctxt->myDoc = NULL;
10971 }
10972 if (sax != NULL)
10973 ctxt->sax = NULL;
10974 xmlFreeParserCtxt(ctxt);
10975
10976 return(ret);
10977}
10978
10979/**
10980 * xmlParseDoc:
10981 * @cur: a pointer to an array of xmlChar
10982 *
10983 * parse an XML in-memory document and build a tree.
10984 *
10985 * Returns the resulting document tree
10986 */
10987
10988xmlDocPtr
10989xmlParseDoc(xmlChar *cur) {
10990 return(xmlSAXParseDoc(NULL, cur, 0));
10991}
10992
Daniel Veillard8107a222002-01-13 14:10:10 +000010993/************************************************************************
10994 * *
10995 * Specific function to keep track of entities references *
10996 * and used by the XSLT debugger *
10997 * *
10998 ************************************************************************/
10999
11000static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11001
11002/**
11003 * xmlAddEntityReference:
11004 * @ent : A valid entity
11005 * @firstNode : A valid first node for children of entity
11006 * @lastNode : A valid last node of children entity
11007 *
11008 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11009 */
11010static void
11011xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11012 xmlNodePtr lastNode)
11013{
11014 if (xmlEntityRefFunc != NULL) {
11015 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11016 }
11017}
11018
11019
11020/**
11021 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011022 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011023 *
11024 * Set the function to call call back when a xml reference has been made
11025 */
11026void
11027xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11028{
11029 xmlEntityRefFunc = func;
11030}
Owen Taylor3473f882001-02-23 17:55:21 +000011031
11032/************************************************************************
11033 * *
11034 * Miscellaneous *
11035 * *
11036 ************************************************************************/
11037
11038#ifdef LIBXML_XPATH_ENABLED
11039#include <libxml/xpath.h>
11040#endif
11041
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011042extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011043static int xmlParserInitialized = 0;
11044
11045/**
11046 * xmlInitParser:
11047 *
11048 * Initialization function for the XML parser.
11049 * This is not reentrant. Call once before processing in case of
11050 * use in multithreaded programs.
11051 */
11052
11053void
11054xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011055 if (xmlParserInitialized != 0)
11056 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011057
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011058 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11059 (xmlGenericError == NULL))
11060 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011061 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011062 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011063 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011064 xmlInitCharEncodingHandlers();
11065 xmlInitializePredefinedEntities();
11066 xmlDefaultSAXHandlerInit();
11067 xmlRegisterDefaultInputCallbacks();
11068 xmlRegisterDefaultOutputCallbacks();
11069#ifdef LIBXML_HTML_ENABLED
11070 htmlInitAutoClose();
11071 htmlDefaultSAXHandlerInit();
11072#endif
11073#ifdef LIBXML_XPATH_ENABLED
11074 xmlXPathInit();
11075#endif
11076 xmlParserInitialized = 1;
11077}
11078
11079/**
11080 * xmlCleanupParser:
11081 *
11082 * Cleanup function for the XML parser. It tries to reclaim all
11083 * parsing related global memory allocated for the parser processing.
11084 * It doesn't deallocate any document related memory. Calling this
11085 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011086 * One should call xmlCleanupParser() only when the process has
11087 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011088 */
11089
11090void
11091xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011092 if (!xmlParserInitialized)
11093 return;
11094
Owen Taylor3473f882001-02-23 17:55:21 +000011095 xmlCleanupCharEncodingHandlers();
11096 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011097#ifdef LIBXML_CATALOG_ENABLED
11098 xmlCatalogCleanup();
11099#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011100 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011101 xmlCleanupGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011102 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011103}