blob: 96acf99371b1b5ca68e2df6b8954c1ab663a9cde [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard21a0f912001-02-25 19:54:14 +000088#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000089#define XML_PARSER_BUFFER_SIZE 100
90
Daniel Veillard5997aca2002-03-18 18:36:20 +000091#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
92
Owen Taylor3473f882001-02-23 17:55:21 +000093/*
Owen Taylor3473f882001-02-23 17:55:21 +000094 * List of XML prefixed PI allowed by W3C specs
95 */
96
Daniel Veillardb44025c2001-10-11 22:55:55 +000097static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000098 "xml-stylesheet",
99 NULL
100};
101
102/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000103xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
104 const xmlChar **str);
105
Daniel Veillard257d9102001-05-08 10:41:44 +0000106static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000107xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
108 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000109 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000111
Daniel Veillard8107a222002-01-13 14:10:10 +0000112static void
113xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
114 xmlNodePtr lastNode);
115
Daniel Veillard328f48c2002-11-15 15:24:34 +0000116static int
117xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
118 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000119/************************************************************************
120 * *
121 * Parser stacks related functions and macros *
122 * *
123 ************************************************************************/
124
125xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
126 const xmlChar ** str);
127
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128/**
129 * inputPush:
130 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000131 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000132 *
133 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000134 *
135 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000137extern int
138inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
139{
140 if (ctxt->inputNr >= ctxt->inputMax) {
141 ctxt->inputMax *= 2;
142 ctxt->inputTab =
143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
144 ctxt->inputMax *
145 sizeof(ctxt->inputTab[0]));
146 if (ctxt->inputTab == NULL) {
147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
148 return (0);
149 }
150 }
151 ctxt->inputTab[ctxt->inputNr] = value;
152 ctxt->input = value;
153 return (ctxt->inputNr++);
154}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000155/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000156 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000157 * @ctxt: an XML parser context
158 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000159 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000160 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000161 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000163extern xmlParserInputPtr
164inputPop(xmlParserCtxtPtr ctxt)
165{
166 xmlParserInputPtr ret;
167
168 if (ctxt->inputNr <= 0)
169 return (0);
170 ctxt->inputNr--;
171 if (ctxt->inputNr > 0)
172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
173 else
174 ctxt->input = NULL;
175 ret = ctxt->inputTab[ctxt->inputNr];
176 ctxt->inputTab[ctxt->inputNr] = 0;
177 return (ret);
178}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179/**
180 * nodePush:
181 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000182 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 *
184 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000185 *
186 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000187 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000188extern int
189nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
190{
191 if (ctxt->nodeNr >= ctxt->nodeMax) {
192 ctxt->nodeMax *= 2;
193 ctxt->nodeTab =
194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
195 ctxt->nodeMax *
196 sizeof(ctxt->nodeTab[0]));
197 if (ctxt->nodeTab == NULL) {
198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
199 return (0);
200 }
201 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202#ifdef MAX_DEPTH
203 if (ctxt->nodeNr > MAX_DEPTH) {
204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
205 ctxt->sax->error(ctxt->userData,
206 "Excessive depth in document: change MAX_DEPTH = %d\n",
207 MAX_DEPTH);
208 ctxt->wellFormed = 0;
209 ctxt->instate = XML_PARSER_EOF;
210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
211 return(0);
212 }
213#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000214 ctxt->nodeTab[ctxt->nodeNr] = value;
215 ctxt->node = value;
216 return (ctxt->nodeNr++);
217}
218/**
219 * nodePop:
220 * @ctxt: an XML parser context
221 *
222 * Pops the top element node from the node stack
223 *
224 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000226extern xmlNodePtr
227nodePop(xmlParserCtxtPtr ctxt)
228{
229 xmlNodePtr ret;
230
231 if (ctxt->nodeNr <= 0)
232 return (0);
233 ctxt->nodeNr--;
234 if (ctxt->nodeNr > 0)
235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
236 else
237 ctxt->node = NULL;
238 ret = ctxt->nodeTab[ctxt->nodeNr];
239 ctxt->nodeTab[ctxt->nodeNr] = 0;
240 return (ret);
241}
242/**
243 * namePush:
244 * @ctxt: an XML parser context
245 * @value: the element name
246 *
247 * Pushes a new element name on top of the name stack
248 *
249 * Returns 0 in case of error, the index in the stack otherwise
250 */
251extern int
252namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
253{
254 if (ctxt->nameNr >= ctxt->nameMax) {
255 ctxt->nameMax *= 2;
256 ctxt->nameTab =
257 (xmlChar * *)xmlRealloc(ctxt->nameTab,
258 ctxt->nameMax *
259 sizeof(ctxt->nameTab[0]));
260 if (ctxt->nameTab == NULL) {
261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
262 return (0);
263 }
264 }
265 ctxt->nameTab[ctxt->nameNr] = value;
266 ctxt->name = value;
267 return (ctxt->nameNr++);
268}
269/**
270 * namePop:
271 * @ctxt: an XML parser context
272 *
273 * Pops the top element name from the name stack
274 *
275 * Returns the name just removed
276 */
277extern xmlChar *
278namePop(xmlParserCtxtPtr ctxt)
279{
280 xmlChar *ret;
281
282 if (ctxt->nameNr <= 0)
283 return (0);
284 ctxt->nameNr--;
285 if (ctxt->nameNr > 0)
286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
287 else
288 ctxt->name = NULL;
289 ret = ctxt->nameTab[ctxt->nameNr];
290 ctxt->nameTab[ctxt->nameNr] = 0;
291 return (ret);
292}
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000294static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000295 if (ctxt->spaceNr >= ctxt->spaceMax) {
296 ctxt->spaceMax *= 2;
297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
299 if (ctxt->spaceTab == NULL) {
300 xmlGenericError(xmlGenericErrorContext,
301 "realloc failed !\n");
302 return(0);
303 }
304 }
305 ctxt->spaceTab[ctxt->spaceNr] = val;
306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
307 return(ctxt->spaceNr++);
308}
309
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000311 int ret;
312 if (ctxt->spaceNr <= 0) return(0);
313 ctxt->spaceNr--;
314 if (ctxt->spaceNr > 0)
315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
316 else
317 ctxt->space = NULL;
318 ret = ctxt->spaceTab[ctxt->spaceNr];
319 ctxt->spaceTab[ctxt->spaceNr] = -1;
320 return(ret);
321}
322
323/*
324 * Macros for accessing the content. Those should be used only by the parser,
325 * and not exported.
326 *
327 * Dirty macros, i.e. one often need to make assumption on the context to
328 * use them
329 *
330 * CUR_PTR return the current pointer to the xmlChar to be parsed.
331 * To be used with extreme caution since operations consuming
332 * characters may move the input buffer to a different location !
333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
334 * This should be used internally by the parser
335 * only to compare to ASCII values otherwise it would break when
336 * running with UTF-8 encoding.
337 * RAW same as CUR but in the input buffer, bypass any token
338 * extraction that may have been done
339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
340 * to compare on ASCII based substring.
341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +0000342 * strings without newlines within the parser.
343 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
344 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +0000345 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
346 *
347 * NEXT Skip to the next character, this does the proper decoding
348 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +0000349 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +0000350 * CUR_CHAR(l) returns the current unicode character (int), set l
351 * to the number of xmlChars used for the encoding [0-5].
352 * CUR_SCHAR same but operate on a string instead of the context
353 * COPY_BUF copy the current unicode char to the target buffer, increment
354 * the index
355 * GROW, SHRINK handling of input buffers
356 */
357
Daniel Veillardfdc91562002-07-01 21:52:03 +0000358#define RAW (*ctxt->input->cur)
359#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000360#define NXT(val) ctxt->input->cur[(val)]
361#define CUR_PTR ctxt->input->cur
362
363#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000364 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +0000365 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000366 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000367 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
368 xmlPopInput(ctxt); \
369 } while (0)
370
Daniel Veillarda880b122003-04-21 21:36:41 +0000371#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillardb19ba832003-08-14 00:33:46 +0000372 (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) && \
373 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +0000374 xmlSHRINK (ctxt);
375
376static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
377 xmlParserInputShrink(ctxt->input);
378 if ((*ctxt->input->cur == 0) &&
379 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
380 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000381 }
Owen Taylor3473f882001-02-23 17:55:21 +0000382
Daniel Veillarda880b122003-04-21 21:36:41 +0000383#define GROW if ((ctxt->progressive == 0) && \
384 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +0000385 xmlGROW (ctxt);
386
387static void xmlGROW (xmlParserCtxtPtr ctxt) {
388 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
389 if ((*ctxt->input->cur == 0) &&
390 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
391 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +0000392}
Owen Taylor3473f882001-02-23 17:55:21 +0000393
394#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
395
396#define NEXT xmlNextChar(ctxt)
397
Daniel Veillard21a0f912001-02-25 19:54:14 +0000398#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000399 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000400 ctxt->input->cur++; \
401 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000402 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000403 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
404 }
405
Owen Taylor3473f882001-02-23 17:55:21 +0000406#define NEXTL(l) do { \
407 if (*(ctxt->input->cur) == '\n') { \
408 ctxt->input->line++; ctxt->input->col = 1; \
409 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000410 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000411 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000412 } while (0)
413
414#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
415#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
416
417#define COPY_BUF(l,b,i,v) \
418 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000419 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000420
421/**
422 * xmlSkipBlankChars:
423 * @ctxt: the XML parser context
424 *
425 * skip all blanks character found at that point in the input streams.
426 * It pops up finished entities in the process if allowable at that point.
427 *
428 * Returns the number of space chars skipped
429 */
430
431int
432xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000433 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000434
435 /*
436 * It's Okay to use CUR/NEXT here since all the blanks are on
437 * the ASCII range.
438 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000439 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
440 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000441 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000442 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000443 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000444 cur = ctxt->input->cur;
445 while (IS_BLANK(*cur)) {
446 if (*cur == '\n') {
447 ctxt->input->line++; ctxt->input->col = 1;
448 }
449 cur++;
450 res++;
451 if (*cur == 0) {
452 ctxt->input->cur = cur;
453 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
454 cur = ctxt->input->cur;
455 }
456 }
457 ctxt->input->cur = cur;
458 } else {
459 int cur;
460 do {
461 cur = CUR;
462 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
463 NEXT;
464 cur = CUR;
465 res++;
466 }
467 while ((cur == 0) && (ctxt->inputNr > 1) &&
468 (ctxt->instate != XML_PARSER_COMMENT)) {
469 xmlPopInput(ctxt);
470 cur = CUR;
471 }
472 /*
473 * Need to handle support of entities branching here
474 */
475 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
476 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
477 }
Owen Taylor3473f882001-02-23 17:55:21 +0000478 return(res);
479}
480
481/************************************************************************
482 * *
483 * Commodity functions to handle entities *
484 * *
485 ************************************************************************/
486
487/**
488 * xmlPopInput:
489 * @ctxt: an XML parser context
490 *
491 * xmlPopInput: the current input pointed by ctxt->input came to an end
492 * pop it and return the next char.
493 *
494 * Returns the current xmlChar in the parser context
495 */
496xmlChar
497xmlPopInput(xmlParserCtxtPtr ctxt) {
498 if (ctxt->inputNr == 1) return(0); /* End of main Input */
499 if (xmlParserDebugEntities)
500 xmlGenericError(xmlGenericErrorContext,
501 "Popping input %d\n", ctxt->inputNr);
502 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000503 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000504 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
505 return(xmlPopInput(ctxt));
506 return(CUR);
507}
508
509/**
510 * xmlPushInput:
511 * @ctxt: an XML parser context
512 * @input: an XML parser input fragment (entity, XML fragment ...).
513 *
514 * xmlPushInput: switch to a new input stream which is stacked on top
515 * of the previous one(s).
516 */
517void
518xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
519 if (input == NULL) return;
520
521 if (xmlParserDebugEntities) {
522 if ((ctxt->input != NULL) && (ctxt->input->filename))
523 xmlGenericError(xmlGenericErrorContext,
524 "%s(%d): ", ctxt->input->filename,
525 ctxt->input->line);
526 xmlGenericError(xmlGenericErrorContext,
527 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
528 }
529 inputPush(ctxt, input);
530 GROW;
531}
532
533/**
534 * xmlParseCharRef:
535 * @ctxt: an XML parser context
536 *
537 * parse Reference declarations
538 *
539 * [66] CharRef ::= '&#' [0-9]+ ';' |
540 * '&#x' [0-9a-fA-F]+ ';'
541 *
542 * [ WFC: Legal Character ]
543 * Characters referred to using character references must match the
544 * production for Char.
545 *
546 * Returns the value parsed (as an int), 0 in case of error
547 */
548int
549xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000550 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000551 int count = 0;
552
Owen Taylor3473f882001-02-23 17:55:21 +0000553 /*
554 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
555 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000556 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000557 (NXT(2) == 'x')) {
558 SKIP(3);
559 GROW;
560 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000561 if (count++ > 20) {
562 count = 0;
563 GROW;
564 }
565 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000566 val = val * 16 + (CUR - '0');
567 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
568 val = val * 16 + (CUR - 'a') + 10;
569 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
570 val = val * 16 + (CUR - 'A') + 10;
571 else {
572 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
574 ctxt->sax->error(ctxt->userData,
575 "xmlParseCharRef: invalid hexadecimal value\n");
576 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000577 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000578 val = 0;
579 break;
580 }
581 NEXT;
582 count++;
583 }
584 if (RAW == ';') {
585 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000586 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000587 ctxt->nbChars ++;
588 ctxt->input->cur++;
589 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000590 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000591 SKIP(2);
592 GROW;
593 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000594 if (count++ > 20) {
595 count = 0;
596 GROW;
597 }
598 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000599 val = val * 10 + (CUR - '0');
600 else {
601 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
603 ctxt->sax->error(ctxt->userData,
604 "xmlParseCharRef: invalid decimal value\n");
605 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000606 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000607 val = 0;
608 break;
609 }
610 NEXT;
611 count++;
612 }
613 if (RAW == ';') {
614 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000615 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000616 ctxt->nbChars ++;
617 ctxt->input->cur++;
618 }
619 } else {
620 ctxt->errNo = XML_ERR_INVALID_CHARREF;
621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
622 ctxt->sax->error(ctxt->userData,
623 "xmlParseCharRef: invalid value\n");
624 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000625 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000626 }
627
628 /*
629 * [ WFC: Legal Character ]
630 * Characters referred to using character references must match the
631 * production for Char.
632 */
633 if (IS_CHAR(val)) {
634 return(val);
635 } else {
636 ctxt->errNo = XML_ERR_INVALID_CHAR;
637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000638 ctxt->sax->error(ctxt->userData,
639 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000640 val);
641 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000642 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000643 }
644 return(0);
645}
646
647/**
648 * xmlParseStringCharRef:
649 * @ctxt: an XML parser context
650 * @str: a pointer to an index in the string
651 *
652 * parse Reference declarations, variant parsing from a string rather
653 * than an an input flow.
654 *
655 * [66] CharRef ::= '&#' [0-9]+ ';' |
656 * '&#x' [0-9a-fA-F]+ ';'
657 *
658 * [ WFC: Legal Character ]
659 * Characters referred to using character references must match the
660 * production for Char.
661 *
662 * Returns the value parsed (as an int), 0 in case of error, str will be
663 * updated to the current value of the index
664 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000665static int
Owen Taylor3473f882001-02-23 17:55:21 +0000666xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
667 const xmlChar *ptr;
668 xmlChar cur;
669 int val = 0;
670
671 if ((str == NULL) || (*str == NULL)) return(0);
672 ptr = *str;
673 cur = *ptr;
674 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
675 ptr += 3;
676 cur = *ptr;
677 while (cur != ';') { /* Non input consuming loop */
678 if ((cur >= '0') && (cur <= '9'))
679 val = val * 16 + (cur - '0');
680 else if ((cur >= 'a') && (cur <= 'f'))
681 val = val * 16 + (cur - 'a') + 10;
682 else if ((cur >= 'A') && (cur <= 'F'))
683 val = val * 16 + (cur - 'A') + 10;
684 else {
685 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
687 ctxt->sax->error(ctxt->userData,
688 "xmlParseStringCharRef: invalid hexadecimal value\n");
689 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000690 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000691 val = 0;
692 break;
693 }
694 ptr++;
695 cur = *ptr;
696 }
697 if (cur == ';')
698 ptr++;
699 } else if ((cur == '&') && (ptr[1] == '#')){
700 ptr += 2;
701 cur = *ptr;
702 while (cur != ';') { /* Non input consuming loops */
703 if ((cur >= '0') && (cur <= '9'))
704 val = val * 10 + (cur - '0');
705 else {
706 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
708 ctxt->sax->error(ctxt->userData,
709 "xmlParseStringCharRef: invalid decimal value\n");
710 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000711 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000712 val = 0;
713 break;
714 }
715 ptr++;
716 cur = *ptr;
717 }
718 if (cur == ';')
719 ptr++;
720 } else {
721 ctxt->errNo = XML_ERR_INVALID_CHARREF;
722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
723 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000724 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000725 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000726 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000727 return(0);
728 }
729 *str = ptr;
730
731 /*
732 * [ WFC: Legal Character ]
733 * Characters referred to using character references must match the
734 * production for Char.
735 */
736 if (IS_CHAR(val)) {
737 return(val);
738 } else {
739 ctxt->errNo = XML_ERR_INVALID_CHAR;
740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
741 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000742 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000743 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000744 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000745 }
746 return(0);
747}
748
749/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000750 * xmlNewBlanksWrapperInputStream:
751 * @ctxt: an XML parser context
752 * @entity: an Entity pointer
753 *
754 * Create a new input stream for wrapping
755 * blanks around a PEReference
756 *
757 * Returns the new input stream or NULL
758 */
759
760static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
761
Daniel Veillardf4862f02002-09-10 11:13:43 +0000762static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000763xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
764 xmlParserInputPtr input;
765 xmlChar *buffer;
766 size_t length;
767 if (entity == NULL) {
768 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
770 ctxt->sax->error(ctxt->userData,
771 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
772 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
773 return(NULL);
774 }
775 if (xmlParserDebugEntities)
776 xmlGenericError(xmlGenericErrorContext,
777 "new blanks wrapper for entity: %s\n", entity->name);
778 input = xmlNewInputStream(ctxt);
779 if (input == NULL) {
780 return(NULL);
781 }
782 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000783 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +0000784 if (buffer == NULL) {
785 return(NULL);
786 }
787 buffer [0] = ' ';
788 buffer [1] = '%';
789 buffer [length-3] = ';';
790 buffer [length-2] = ' ';
791 buffer [length-1] = 0;
792 memcpy(buffer + 2, entity->name, length - 5);
793 input->free = deallocblankswrapper;
794 input->base = buffer;
795 input->cur = buffer;
796 input->length = length;
797 input->end = &buffer[length];
798 return(input);
799}
800
801/**
Owen Taylor3473f882001-02-23 17:55:21 +0000802 * xmlParserHandlePEReference:
803 * @ctxt: the parser context
804 *
805 * [69] PEReference ::= '%' Name ';'
806 *
807 * [ WFC: No Recursion ]
808 * A parsed entity must not contain a recursive
809 * reference to itself, either directly or indirectly.
810 *
811 * [ WFC: Entity Declared ]
812 * In a document without any DTD, a document with only an internal DTD
813 * subset which contains no parameter entity references, or a document
814 * with "standalone='yes'", ... ... The declaration of a parameter
815 * entity must precede any reference to it...
816 *
817 * [ VC: Entity Declared ]
818 * In a document with an external subset or external parameter entities
819 * with "standalone='no'", ... ... The declaration of a parameter entity
820 * must precede any reference to it...
821 *
822 * [ WFC: In DTD ]
823 * Parameter-entity references may only appear in the DTD.
824 * NOTE: misleading but this is handled.
825 *
826 * A PEReference may have been detected in the current input stream
827 * the handling is done accordingly to
828 * http://www.w3.org/TR/REC-xml#entproc
829 * i.e.
830 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000831 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000832 */
833void
834xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
835 xmlChar *name;
836 xmlEntityPtr entity = NULL;
837 xmlParserInputPtr input;
838
Owen Taylor3473f882001-02-23 17:55:21 +0000839 if (RAW != '%') return;
840 switch(ctxt->instate) {
841 case XML_PARSER_CDATA_SECTION:
842 return;
843 case XML_PARSER_COMMENT:
844 return;
845 case XML_PARSER_START_TAG:
846 return;
847 case XML_PARSER_END_TAG:
848 return;
849 case XML_PARSER_EOF:
850 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
852 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
853 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000854 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000855 return;
856 case XML_PARSER_PROLOG:
857 case XML_PARSER_START:
858 case XML_PARSER_MISC:
859 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
861 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
862 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000863 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000864 return;
865 case XML_PARSER_ENTITY_DECL:
866 case XML_PARSER_CONTENT:
867 case XML_PARSER_ATTRIBUTE_VALUE:
868 case XML_PARSER_PI:
869 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000870 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000871 /* we just ignore it there */
872 return;
873 case XML_PARSER_EPILOG:
874 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
876 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
877 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000878 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000879 return;
880 case XML_PARSER_ENTITY_VALUE:
881 /*
882 * NOTE: in the case of entity values, we don't do the
883 * substitution here since we need the literal
884 * entity value to be able to save the internal
885 * subset of the document.
886 * This will be handled by xmlStringDecodeEntities
887 */
888 return;
889 case XML_PARSER_DTD:
890 /*
891 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
892 * In the internal DTD subset, parameter-entity references
893 * can occur only where markup declarations can occur, not
894 * within markup declarations.
895 * In that case this is handled in xmlParseMarkupDecl
896 */
897 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
898 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000899 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
900 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000901 break;
902 case XML_PARSER_IGNORE:
903 return;
904 }
905
906 NEXT;
907 name = xmlParseName(ctxt);
908 if (xmlParserDebugEntities)
909 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000910 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000911 if (name == NULL) {
912 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000914 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000915 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000916 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000917 } else {
918 if (RAW == ';') {
919 NEXT;
920 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
921 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
922 if (entity == NULL) {
923
924 /*
925 * [ WFC: Entity Declared ]
926 * In a document without any DTD, a document with only an
927 * internal DTD subset which contains no parameter entity
928 * references, or a document with "standalone='yes'", ...
929 * ... The declaration of a parameter entity must precede
930 * any reference to it...
931 */
932 if ((ctxt->standalone == 1) ||
933 ((ctxt->hasExternalSubset == 0) &&
934 (ctxt->hasPErefs == 0))) {
935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
936 ctxt->sax->error(ctxt->userData,
937 "PEReference: %%%s; not found\n", name);
938 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000939 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000940 } else {
941 /*
942 * [ VC: Entity Declared ]
943 * In a document with an external subset or external
944 * parameter entities with "standalone='no'", ...
945 * ... The declaration of a parameter entity must precede
946 * any reference to it...
947 */
948 if ((!ctxt->disableSAX) &&
949 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
950 ctxt->vctxt.error(ctxt->vctxt.userData,
951 "PEReference: %%%s; not found\n", name);
952 } else if ((!ctxt->disableSAX) &&
953 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
954 ctxt->sax->warning(ctxt->userData,
955 "PEReference: %%%s; not found\n", name);
956 ctxt->valid = 0;
957 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000958 } else if (ctxt->input->free != deallocblankswrapper) {
959 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
960 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000961 } else {
962 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
963 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000964 xmlChar start[4];
965 xmlCharEncoding enc;
966
Owen Taylor3473f882001-02-23 17:55:21 +0000967 /*
968 * handle the extra spaces added before and after
969 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000970 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000971 */
972 input = xmlNewEntityInputStream(ctxt, entity);
973 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000974
975 /*
976 * Get the 4 first bytes and decode the charset
977 * if enc != XML_CHAR_ENCODING_NONE
978 * plug some encoding conversion routines.
979 */
980 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000981 if (entity->length >= 4) {
982 start[0] = RAW;
983 start[1] = NXT(1);
984 start[2] = NXT(2);
985 start[3] = NXT(3);
986 enc = xmlDetectCharEncoding(start, 4);
987 if (enc != XML_CHAR_ENCODING_NONE) {
988 xmlSwitchEncoding(ctxt, enc);
989 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000990 }
991
Owen Taylor3473f882001-02-23 17:55:21 +0000992 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
993 (RAW == '<') && (NXT(1) == '?') &&
994 (NXT(2) == 'x') && (NXT(3) == 'm') &&
995 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
996 xmlParseTextDecl(ctxt);
997 }
Owen Taylor3473f882001-02-23 17:55:21 +0000998 } else {
999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1000 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001001 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001002 name);
1003 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001004 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001005 }
1006 }
1007 } else {
1008 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1010 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001011 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001012 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001013 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001014 }
1015 xmlFree(name);
1016 }
1017}
1018
1019/*
1020 * Macro used to grow the current buffer.
1021 */
1022#define growBuffer(buffer) { \
1023 buffer##_size *= 2; \
1024 buffer = (xmlChar *) \
1025 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1026 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001027 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001028 return(NULL); \
1029 } \
1030}
1031
1032/**
1033 * xmlStringDecodeEntities:
1034 * @ctxt: the parser context
1035 * @str: the input string
1036 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1037 * @end: an end marker xmlChar, 0 if none
1038 * @end2: an end marker xmlChar, 0 if none
1039 * @end3: an end marker xmlChar, 0 if none
1040 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001041 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001042 *
1043 * [67] Reference ::= EntityRef | CharRef
1044 *
1045 * [69] PEReference ::= '%' Name ';'
1046 *
1047 * Returns A newly allocated string with the substitution done. The caller
1048 * must deallocate it !
1049 */
1050xmlChar *
1051xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1052 xmlChar end, xmlChar end2, xmlChar end3) {
1053 xmlChar *buffer = NULL;
1054 int buffer_size = 0;
1055
1056 xmlChar *current = NULL;
1057 xmlEntityPtr ent;
1058 int c,l;
1059 int nbchars = 0;
1060
1061 if (str == NULL)
1062 return(NULL);
1063
1064 if (ctxt->depth > 40) {
1065 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1067 ctxt->sax->error(ctxt->userData,
1068 "Detected entity reference loop\n");
1069 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001070 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001071 return(NULL);
1072 }
1073
1074 /*
1075 * allocate a translation buffer.
1076 */
1077 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001078 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001079 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001080 xmlGenericError(xmlGenericErrorContext,
1081 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001082 return(NULL);
1083 }
1084
1085 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001086 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001087 * we are operating on already parsed values.
1088 */
1089 c = CUR_SCHAR(str, l);
1090 while ((c != 0) && (c != end) && /* non input consuming loop */
1091 (c != end2) && (c != end3)) {
1092
1093 if (c == 0) break;
1094 if ((c == '&') && (str[1] == '#')) {
1095 int val = xmlParseStringCharRef(ctxt, &str);
1096 if (val != 0) {
1097 COPY_BUF(0,buffer,nbchars,val);
1098 }
1099 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1100 if (xmlParserDebugEntities)
1101 xmlGenericError(xmlGenericErrorContext,
1102 "String decoding Entity Reference: %.30s\n",
1103 str);
1104 ent = xmlParseStringEntityRef(ctxt, &str);
1105 if ((ent != NULL) &&
1106 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1107 if (ent->content != NULL) {
1108 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1109 } else {
1110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1111 ctxt->sax->error(ctxt->userData,
1112 "internal error entity has no content\n");
1113 }
1114 } else if ((ent != NULL) && (ent->content != NULL)) {
1115 xmlChar *rep;
1116
1117 ctxt->depth++;
1118 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1119 0, 0, 0);
1120 ctxt->depth--;
1121 if (rep != NULL) {
1122 current = rep;
1123 while (*current != 0) { /* non input consuming loop */
1124 buffer[nbchars++] = *current++;
1125 if (nbchars >
1126 buffer_size - XML_PARSER_BUFFER_SIZE) {
1127 growBuffer(buffer);
1128 }
1129 }
1130 xmlFree(rep);
1131 }
1132 } else if (ent != NULL) {
1133 int i = xmlStrlen(ent->name);
1134 const xmlChar *cur = ent->name;
1135
1136 buffer[nbchars++] = '&';
1137 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1138 growBuffer(buffer);
1139 }
1140 for (;i > 0;i--)
1141 buffer[nbchars++] = *cur++;
1142 buffer[nbchars++] = ';';
1143 }
1144 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1145 if (xmlParserDebugEntities)
1146 xmlGenericError(xmlGenericErrorContext,
1147 "String decoding PE Reference: %.30s\n", str);
1148 ent = xmlParseStringPEReference(ctxt, &str);
1149 if (ent != NULL) {
1150 xmlChar *rep;
1151
1152 ctxt->depth++;
1153 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1154 0, 0, 0);
1155 ctxt->depth--;
1156 if (rep != NULL) {
1157 current = rep;
1158 while (*current != 0) { /* non input consuming loop */
1159 buffer[nbchars++] = *current++;
1160 if (nbchars >
1161 buffer_size - XML_PARSER_BUFFER_SIZE) {
1162 growBuffer(buffer);
1163 }
1164 }
1165 xmlFree(rep);
1166 }
1167 }
1168 } else {
1169 COPY_BUF(l,buffer,nbchars,c);
1170 str += l;
1171 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1172 growBuffer(buffer);
1173 }
1174 }
1175 c = CUR_SCHAR(str, l);
1176 }
1177 buffer[nbchars++] = 0;
1178 return(buffer);
1179}
1180
1181
1182/************************************************************************
1183 * *
1184 * Commodity functions to handle xmlChars *
1185 * *
1186 ************************************************************************/
1187
1188/**
1189 * xmlStrndup:
1190 * @cur: the input xmlChar *
1191 * @len: the len of @cur
1192 *
1193 * a strndup for array of xmlChar's
1194 *
1195 * Returns a new xmlChar * or NULL
1196 */
1197xmlChar *
1198xmlStrndup(const xmlChar *cur, int len) {
1199 xmlChar *ret;
1200
1201 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001202 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001203 if (ret == NULL) {
1204 xmlGenericError(xmlGenericErrorContext,
1205 "malloc of %ld byte failed\n",
1206 (len + 1) * (long)sizeof(xmlChar));
1207 return(NULL);
1208 }
1209 memcpy(ret, cur, len * sizeof(xmlChar));
1210 ret[len] = 0;
1211 return(ret);
1212}
1213
1214/**
1215 * xmlStrdup:
1216 * @cur: the input xmlChar *
1217 *
1218 * a strdup for array of xmlChar's. Since they are supposed to be
1219 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1220 * a termination mark of '0'.
1221 *
1222 * Returns a new xmlChar * or NULL
1223 */
1224xmlChar *
1225xmlStrdup(const xmlChar *cur) {
1226 const xmlChar *p = cur;
1227
1228 if (cur == NULL) return(NULL);
1229 while (*p != 0) p++; /* non input consuming */
1230 return(xmlStrndup(cur, p - cur));
1231}
1232
1233/**
1234 * xmlCharStrndup:
1235 * @cur: the input char *
1236 * @len: the len of @cur
1237 *
1238 * a strndup for char's to xmlChar's
1239 *
1240 * Returns a new xmlChar * or NULL
1241 */
1242
1243xmlChar *
1244xmlCharStrndup(const char *cur, int len) {
1245 int i;
1246 xmlChar *ret;
1247
1248 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001249 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001250 if (ret == NULL) {
1251 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1252 (len + 1) * (long)sizeof(xmlChar));
1253 return(NULL);
1254 }
1255 for (i = 0;i < len;i++)
1256 ret[i] = (xmlChar) cur[i];
1257 ret[len] = 0;
1258 return(ret);
1259}
1260
1261/**
1262 * xmlCharStrdup:
1263 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001264 *
1265 * a strdup for char's to xmlChar's
1266 *
1267 * Returns a new xmlChar * or NULL
1268 */
1269
1270xmlChar *
1271xmlCharStrdup(const char *cur) {
1272 const char *p = cur;
1273
1274 if (cur == NULL) return(NULL);
1275 while (*p != '\0') p++; /* non input consuming */
1276 return(xmlCharStrndup(cur, p - cur));
1277}
1278
1279/**
1280 * xmlStrcmp:
1281 * @str1: the first xmlChar *
1282 * @str2: the second xmlChar *
1283 *
1284 * a strcmp for xmlChar's
1285 *
1286 * Returns the integer result of the comparison
1287 */
1288
1289int
1290xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1291 register int tmp;
1292
1293 if (str1 == str2) return(0);
1294 if (str1 == NULL) return(-1);
1295 if (str2 == NULL) return(1);
1296 do {
1297 tmp = *str1++ - *str2;
1298 if (tmp != 0) return(tmp);
1299 } while (*str2++ != 0);
1300 return 0;
1301}
1302
1303/**
1304 * xmlStrEqual:
1305 * @str1: the first xmlChar *
1306 * @str2: the second xmlChar *
1307 *
1308 * Check if both string are equal of have same content
1309 * Should be a bit more readable and faster than xmlStrEqual()
1310 *
1311 * Returns 1 if they are equal, 0 if they are different
1312 */
1313
1314int
1315xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1316 if (str1 == str2) return(1);
1317 if (str1 == NULL) return(0);
1318 if (str2 == NULL) return(0);
1319 do {
1320 if (*str1++ != *str2) return(0);
1321 } while (*str2++);
1322 return(1);
1323}
1324
1325/**
1326 * xmlStrncmp:
1327 * @str1: the first xmlChar *
1328 * @str2: the second xmlChar *
1329 * @len: the max comparison length
1330 *
1331 * a strncmp for xmlChar's
1332 *
1333 * Returns the integer result of the comparison
1334 */
1335
1336int
1337xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1338 register int tmp;
1339
1340 if (len <= 0) return(0);
1341 if (str1 == str2) return(0);
1342 if (str1 == NULL) return(-1);
1343 if (str2 == NULL) return(1);
1344 do {
1345 tmp = *str1++ - *str2;
1346 if (tmp != 0 || --len == 0) return(tmp);
1347 } while (*str2++ != 0);
1348 return 0;
1349}
1350
Daniel Veillardb44025c2001-10-11 22:55:55 +00001351static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001352 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1353 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1354 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1355 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1356 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1357 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1358 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1359 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1360 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1361 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1362 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1363 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1364 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1365 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1366 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1367 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1368 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1369 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1370 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1371 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1372 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1373 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1374 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1375 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1376 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1377 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1378 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1379 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1380 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1381 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1382 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1383 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1384};
1385
1386/**
1387 * xmlStrcasecmp:
1388 * @str1: the first xmlChar *
1389 * @str2: the second xmlChar *
1390 *
1391 * a strcasecmp for xmlChar's
1392 *
1393 * Returns the integer result of the comparison
1394 */
1395
1396int
1397xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1398 register int tmp;
1399
1400 if (str1 == str2) return(0);
1401 if (str1 == NULL) return(-1);
1402 if (str2 == NULL) return(1);
1403 do {
1404 tmp = casemap[*str1++] - casemap[*str2];
1405 if (tmp != 0) return(tmp);
1406 } while (*str2++ != 0);
1407 return 0;
1408}
1409
1410/**
1411 * xmlStrncasecmp:
1412 * @str1: the first xmlChar *
1413 * @str2: the second xmlChar *
1414 * @len: the max comparison length
1415 *
1416 * a strncasecmp for xmlChar's
1417 *
1418 * Returns the integer result of the comparison
1419 */
1420
1421int
1422xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1423 register int tmp;
1424
1425 if (len <= 0) return(0);
1426 if (str1 == str2) return(0);
1427 if (str1 == NULL) return(-1);
1428 if (str2 == NULL) return(1);
1429 do {
1430 tmp = casemap[*str1++] - casemap[*str2];
1431 if (tmp != 0 || --len == 0) return(tmp);
1432 } while (*str2++ != 0);
1433 return 0;
1434}
1435
1436/**
1437 * xmlStrchr:
1438 * @str: the xmlChar * array
1439 * @val: the xmlChar to search
1440 *
1441 * a strchr for xmlChar's
1442 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001443 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001444 */
1445
1446const xmlChar *
1447xmlStrchr(const xmlChar *str, xmlChar val) {
1448 if (str == NULL) return(NULL);
1449 while (*str != 0) { /* non input consuming */
1450 if (*str == val) return((xmlChar *) str);
1451 str++;
1452 }
1453 return(NULL);
1454}
1455
1456/**
1457 * xmlStrstr:
1458 * @str: the xmlChar * array (haystack)
1459 * @val: the xmlChar to search (needle)
1460 *
1461 * a strstr for xmlChar's
1462 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001463 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001464 */
1465
1466const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001467xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001468 int n;
1469
1470 if (str == NULL) return(NULL);
1471 if (val == NULL) return(NULL);
1472 n = xmlStrlen(val);
1473
1474 if (n == 0) return(str);
1475 while (*str != 0) { /* non input consuming */
1476 if (*str == *val) {
1477 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1478 }
1479 str++;
1480 }
1481 return(NULL);
1482}
1483
1484/**
1485 * xmlStrcasestr:
1486 * @str: the xmlChar * array (haystack)
1487 * @val: the xmlChar to search (needle)
1488 *
1489 * a case-ignoring strstr for xmlChar's
1490 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001491 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001492 */
1493
1494const xmlChar *
1495xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1496 int n;
1497
1498 if (str == NULL) return(NULL);
1499 if (val == NULL) return(NULL);
1500 n = xmlStrlen(val);
1501
1502 if (n == 0) return(str);
1503 while (*str != 0) { /* non input consuming */
1504 if (casemap[*str] == casemap[*val])
1505 if (!xmlStrncasecmp(str, val, n)) return(str);
1506 str++;
1507 }
1508 return(NULL);
1509}
1510
1511/**
1512 * xmlStrsub:
1513 * @str: the xmlChar * array (haystack)
1514 * @start: the index of the first char (zero based)
1515 * @len: the length of the substring
1516 *
1517 * Extract a substring of a given string
1518 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001519 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001520 */
1521
1522xmlChar *
1523xmlStrsub(const xmlChar *str, int start, int len) {
1524 int i;
1525
1526 if (str == NULL) return(NULL);
1527 if (start < 0) return(NULL);
1528 if (len < 0) return(NULL);
1529
1530 for (i = 0;i < start;i++) {
1531 if (*str == 0) return(NULL);
1532 str++;
1533 }
1534 if (*str == 0) return(NULL);
1535 return(xmlStrndup(str, len));
1536}
1537
1538/**
1539 * xmlStrlen:
1540 * @str: the xmlChar * array
1541 *
1542 * length of a xmlChar's string
1543 *
1544 * Returns the number of xmlChar contained in the ARRAY.
1545 */
1546
1547int
1548xmlStrlen(const xmlChar *str) {
1549 int len = 0;
1550
1551 if (str == NULL) return(0);
1552 while (*str != 0) { /* non input consuming */
1553 str++;
1554 len++;
1555 }
1556 return(len);
1557}
1558
1559/**
1560 * xmlStrncat:
1561 * @cur: the original xmlChar * array
1562 * @add: the xmlChar * array added
1563 * @len: the length of @add
1564 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001565 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001566 * first bytes of @add.
1567 *
1568 * Returns a new xmlChar *, the original @cur is reallocated if needed
1569 * and should not be freed
1570 */
1571
1572xmlChar *
1573xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1574 int size;
1575 xmlChar *ret;
1576
1577 if ((add == NULL) || (len == 0))
1578 return(cur);
1579 if (cur == NULL)
1580 return(xmlStrndup(add, len));
1581
1582 size = xmlStrlen(cur);
1583 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1584 if (ret == NULL) {
1585 xmlGenericError(xmlGenericErrorContext,
1586 "xmlStrncat: realloc of %ld byte failed\n",
1587 (size + len + 1) * (long)sizeof(xmlChar));
1588 return(cur);
1589 }
1590 memcpy(&ret[size], add, len * sizeof(xmlChar));
1591 ret[size + len] = 0;
1592 return(ret);
1593}
1594
1595/**
1596 * xmlStrcat:
1597 * @cur: the original xmlChar * array
1598 * @add: the xmlChar * array added
1599 *
1600 * a strcat for array of xmlChar's. Since they are supposed to be
1601 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1602 * a termination mark of '0'.
1603 *
1604 * Returns a new xmlChar * containing the concatenated string.
1605 */
1606xmlChar *
1607xmlStrcat(xmlChar *cur, const xmlChar *add) {
1608 const xmlChar *p = add;
1609
1610 if (add == NULL) return(cur);
1611 if (cur == NULL)
1612 return(xmlStrdup(add));
1613
1614 while (*p != 0) p++; /* non input consuming */
1615 return(xmlStrncat(cur, add, p - add));
1616}
1617
1618/************************************************************************
1619 * *
1620 * Commodity functions, cleanup needed ? *
1621 * *
1622 ************************************************************************/
1623
1624/**
1625 * areBlanks:
1626 * @ctxt: an XML parser context
1627 * @str: a xmlChar *
1628 * @len: the size of @str
1629 *
1630 * Is this a sequence of blank chars that one can ignore ?
1631 *
1632 * Returns 1 if ignorable 0 otherwise.
1633 */
1634
1635static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1636 int i, ret;
1637 xmlNodePtr lastChild;
1638
Daniel Veillard05c13a22001-09-09 08:38:09 +00001639 /*
1640 * Don't spend time trying to differentiate them, the same callback is
1641 * used !
1642 */
1643 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001644 return(0);
1645
Owen Taylor3473f882001-02-23 17:55:21 +00001646 /*
1647 * Check for xml:space value.
1648 */
1649 if (*(ctxt->space) == 1)
1650 return(0);
1651
1652 /*
1653 * Check that the string is made of blanks
1654 */
1655 for (i = 0;i < len;i++)
1656 if (!(IS_BLANK(str[i]))) return(0);
1657
1658 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001659 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001660 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001661 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001662 if (ctxt->myDoc != NULL) {
1663 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1664 if (ret == 0) return(1);
1665 if (ret == 1) return(0);
1666 }
1667
1668 /*
1669 * Otherwise, heuristic :-\
1670 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001671 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001672 if ((ctxt->node->children == NULL) &&
1673 (RAW == '<') && (NXT(1) == '/')) return(0);
1674
1675 lastChild = xmlGetLastChild(ctxt->node);
1676 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001677 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1678 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001679 } else if (xmlNodeIsText(lastChild))
1680 return(0);
1681 else if ((ctxt->node->children != NULL) &&
1682 (xmlNodeIsText(ctxt->node->children)))
1683 return(0);
1684 return(1);
1685}
1686
Owen Taylor3473f882001-02-23 17:55:21 +00001687/************************************************************************
1688 * *
1689 * Extra stuff for namespace support *
1690 * Relates to http://www.w3.org/TR/WD-xml-names *
1691 * *
1692 ************************************************************************/
1693
1694/**
1695 * xmlSplitQName:
1696 * @ctxt: an XML parser context
1697 * @name: an XML parser context
1698 * @prefix: a xmlChar **
1699 *
1700 * parse an UTF8 encoded XML qualified name string
1701 *
1702 * [NS 5] QName ::= (Prefix ':')? LocalPart
1703 *
1704 * [NS 6] Prefix ::= NCName
1705 *
1706 * [NS 7] LocalPart ::= NCName
1707 *
1708 * Returns the local part, and prefix is updated
1709 * to get the Prefix if any.
1710 */
1711
1712xmlChar *
1713xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1714 xmlChar buf[XML_MAX_NAMELEN + 5];
1715 xmlChar *buffer = NULL;
1716 int len = 0;
1717 int max = XML_MAX_NAMELEN;
1718 xmlChar *ret = NULL;
1719 const xmlChar *cur = name;
1720 int c;
1721
1722 *prefix = NULL;
1723
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00001724 if (cur == NULL) return(NULL);
1725
Owen Taylor3473f882001-02-23 17:55:21 +00001726#ifndef XML_XML_NAMESPACE
1727 /* xml: prefix is not really a namespace */
1728 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1729 (cur[2] == 'l') && (cur[3] == ':'))
1730 return(xmlStrdup(name));
1731#endif
1732
Daniel Veillard597bc482003-07-24 16:08:28 +00001733 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00001734 if (cur[0] == ':')
1735 return(xmlStrdup(name));
1736
1737 c = *cur++;
1738 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1739 buf[len++] = c;
1740 c = *cur++;
1741 }
1742 if (len >= max) {
1743 /*
1744 * Okay someone managed to make a huge name, so he's ready to pay
1745 * for the processing speed.
1746 */
1747 max = len * 2;
1748
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001749 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001750 if (buffer == NULL) {
1751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1752 ctxt->sax->error(ctxt->userData,
1753 "xmlSplitQName: out of memory\n");
1754 return(NULL);
1755 }
1756 memcpy(buffer, buf, len);
1757 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1758 if (len + 10 > max) {
1759 max *= 2;
1760 buffer = (xmlChar *) xmlRealloc(buffer,
1761 max * sizeof(xmlChar));
1762 if (buffer == NULL) {
1763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1764 ctxt->sax->error(ctxt->userData,
1765 "xmlSplitQName: out of memory\n");
1766 return(NULL);
1767 }
1768 }
1769 buffer[len++] = c;
1770 c = *cur++;
1771 }
1772 buffer[len] = 0;
1773 }
1774
Daniel Veillard597bc482003-07-24 16:08:28 +00001775 /* nasty but well=formed
1776 if ((c == ':') && (*cur == 0)) {
1777 return(xmlStrdup(name));
1778 } */
1779
Owen Taylor3473f882001-02-23 17:55:21 +00001780 if (buffer == NULL)
1781 ret = xmlStrndup(buf, len);
1782 else {
1783 ret = buffer;
1784 buffer = NULL;
1785 max = XML_MAX_NAMELEN;
1786 }
1787
1788
1789 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001790 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001791 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00001792 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00001793 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00001794 }
Owen Taylor3473f882001-02-23 17:55:21 +00001795 len = 0;
1796
Daniel Veillardbb284f42002-10-16 18:02:47 +00001797 /*
1798 * Check that the first character is proper to start
1799 * a new name
1800 */
1801 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1802 ((c >= 0x41) && (c <= 0x5A)) ||
1803 (c == '_') || (c == ':'))) {
1804 int l;
1805 int first = CUR_SCHAR(cur, l);
1806
1807 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001808 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1809 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001810 ctxt->sax->error(ctxt->userData,
1811 "Name %s is not XML Namespace compliant\n",
1812 name);
1813 }
1814 }
1815 cur++;
1816
Owen Taylor3473f882001-02-23 17:55:21 +00001817 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1818 buf[len++] = c;
1819 c = *cur++;
1820 }
1821 if (len >= max) {
1822 /*
1823 * Okay someone managed to make a huge name, so he's ready to pay
1824 * for the processing speed.
1825 */
1826 max = len * 2;
1827
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001828 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001829 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001830 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1831 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001832 ctxt->sax->error(ctxt->userData,
1833 "xmlSplitQName: out of memory\n");
1834 return(NULL);
1835 }
1836 memcpy(buffer, buf, len);
1837 while (c != 0) { /* tested bigname2.xml */
1838 if (len + 10 > max) {
1839 max *= 2;
1840 buffer = (xmlChar *) xmlRealloc(buffer,
1841 max * sizeof(xmlChar));
1842 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001843 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1844 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001845 ctxt->sax->error(ctxt->userData,
1846 "xmlSplitQName: out of memory\n");
1847 return(NULL);
1848 }
1849 }
1850 buffer[len++] = c;
1851 c = *cur++;
1852 }
1853 buffer[len] = 0;
1854 }
1855
1856 if (buffer == NULL)
1857 ret = xmlStrndup(buf, len);
1858 else {
1859 ret = buffer;
1860 }
1861 }
1862
1863 return(ret);
1864}
1865
1866/************************************************************************
1867 * *
1868 * The parser itself *
1869 * Relates to http://www.w3.org/TR/REC-xml *
1870 * *
1871 ************************************************************************/
1872
Daniel Veillard76d66f42001-05-16 21:05:17 +00001873static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001874/**
1875 * xmlParseName:
1876 * @ctxt: an XML parser context
1877 *
1878 * parse an XML name.
1879 *
1880 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1881 * CombiningChar | Extender
1882 *
1883 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1884 *
1885 * [6] Names ::= Name (S Name)*
1886 *
1887 * Returns the Name parsed or NULL
1888 */
1889
1890xmlChar *
1891xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001892 const xmlChar *in;
1893 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001894 int count = 0;
1895
1896 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001897
1898 /*
1899 * Accelerator for simple ASCII names
1900 */
1901 in = ctxt->input->cur;
1902 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1903 ((*in >= 0x41) && (*in <= 0x5A)) ||
1904 (*in == '_') || (*in == ':')) {
1905 in++;
1906 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1907 ((*in >= 0x41) && (*in <= 0x5A)) ||
1908 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001909 (*in == '_') || (*in == '-') ||
1910 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001911 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001912 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001913 count = in - ctxt->input->cur;
1914 ret = xmlStrndup(ctxt->input->cur, count);
1915 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001916 ctxt->nbChars += count;
1917 ctxt->input->col += count;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00001918 if (ret == NULL) {
1919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1920 ctxt->sax->error(ctxt->userData,
1921 "XML parser: out of memory\n");
1922 ctxt->errNo = XML_ERR_NO_MEMORY;
1923 ctxt->instate = XML_PARSER_EOF;
1924 ctxt->disableSAX = 1;
1925 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001926 return(ret);
1927 }
1928 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001929 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001930}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001931
Daniel Veillard46de64e2002-05-29 08:21:33 +00001932/**
1933 * xmlParseNameAndCompare:
1934 * @ctxt: an XML parser context
1935 *
1936 * parse an XML name and compares for match
1937 * (specialized for endtag parsing)
1938 *
1939 *
1940 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1941 * and the name for mismatch
1942 */
1943
Daniel Veillardf4862f02002-09-10 11:13:43 +00001944static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001945xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1946 const xmlChar *cmp = other;
1947 const xmlChar *in;
1948 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001949
1950 GROW;
1951
1952 in = ctxt->input->cur;
1953 while (*in != 0 && *in == *cmp) {
1954 ++in;
1955 ++cmp;
1956 }
1957 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1958 /* success */
1959 ctxt->input->cur = in;
1960 return (xmlChar*) 1;
1961 }
1962 /* failure (or end of input buffer), check with full function */
1963 ret = xmlParseName (ctxt);
1964 if (ret != 0 && xmlStrEqual (ret, other)) {
1965 xmlFree (ret);
1966 return (xmlChar*) 1;
1967 }
1968 return ret;
1969}
1970
Daniel Veillard76d66f42001-05-16 21:05:17 +00001971static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001972xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1973 xmlChar buf[XML_MAX_NAMELEN + 5];
1974 int len = 0, l;
1975 int c;
1976 int count = 0;
1977
1978 /*
1979 * Handler for more complex cases
1980 */
1981 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001982 c = CUR_CHAR(l);
1983 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1984 (!IS_LETTER(c) && (c != '_') &&
1985 (c != ':'))) {
1986 return(NULL);
1987 }
1988
1989 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1990 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1991 (c == '.') || (c == '-') ||
1992 (c == '_') || (c == ':') ||
1993 (IS_COMBINING(c)) ||
1994 (IS_EXTENDER(c)))) {
1995 if (count++ > 100) {
1996 count = 0;
1997 GROW;
1998 }
1999 COPY_BUF(l,buf,len,c);
2000 NEXTL(l);
2001 c = CUR_CHAR(l);
2002 if (len >= XML_MAX_NAMELEN) {
2003 /*
2004 * Okay someone managed to make a huge name, so he's ready to pay
2005 * for the processing speed.
2006 */
2007 xmlChar *buffer;
2008 int max = len * 2;
2009
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002010 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002011 if (buffer == NULL) {
2012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2013 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00002014 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002015 return(NULL);
2016 }
2017 memcpy(buffer, buf, len);
2018 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
2019 (c == '.') || (c == '-') ||
2020 (c == '_') || (c == ':') ||
2021 (IS_COMBINING(c)) ||
2022 (IS_EXTENDER(c))) {
2023 if (count++ > 100) {
2024 count = 0;
2025 GROW;
2026 }
2027 if (len + 10 > max) {
2028 max *= 2;
2029 buffer = (xmlChar *) xmlRealloc(buffer,
2030 max * sizeof(xmlChar));
2031 if (buffer == NULL) {
2032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2033 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00002034 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002035 return(NULL);
2036 }
2037 }
2038 COPY_BUF(l,buffer,len,c);
2039 NEXTL(l);
2040 c = CUR_CHAR(l);
2041 }
2042 buffer[len] = 0;
2043 return(buffer);
2044 }
2045 }
2046 return(xmlStrndup(buf, len));
2047}
2048
2049/**
2050 * xmlParseStringName:
2051 * @ctxt: an XML parser context
2052 * @str: a pointer to the string pointer (IN/OUT)
2053 *
2054 * parse an XML name.
2055 *
2056 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2057 * CombiningChar | Extender
2058 *
2059 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2060 *
2061 * [6] Names ::= Name (S Name)*
2062 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002063 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002064 * is updated to the current location in the string.
2065 */
2066
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002067static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002068xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2069 xmlChar buf[XML_MAX_NAMELEN + 5];
2070 const xmlChar *cur = *str;
2071 int len = 0, l;
2072 int c;
2073
2074 c = CUR_SCHAR(cur, l);
2075 if (!IS_LETTER(c) && (c != '_') &&
2076 (c != ':')) {
2077 return(NULL);
2078 }
2079
2080 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2081 (c == '.') || (c == '-') ||
2082 (c == '_') || (c == ':') ||
2083 (IS_COMBINING(c)) ||
2084 (IS_EXTENDER(c))) {
2085 COPY_BUF(l,buf,len,c);
2086 cur += l;
2087 c = CUR_SCHAR(cur, l);
2088 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2089 /*
2090 * Okay someone managed to make a huge name, so he's ready to pay
2091 * for the processing speed.
2092 */
2093 xmlChar *buffer;
2094 int max = len * 2;
2095
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002096 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002097 if (buffer == NULL) {
2098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2099 ctxt->sax->error(ctxt->userData,
2100 "xmlParseStringName: out of memory\n");
2101 return(NULL);
2102 }
2103 memcpy(buffer, buf, len);
2104 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2105 (c == '.') || (c == '-') ||
2106 (c == '_') || (c == ':') ||
2107 (IS_COMBINING(c)) ||
2108 (IS_EXTENDER(c))) {
2109 if (len + 10 > max) {
2110 max *= 2;
2111 buffer = (xmlChar *) xmlRealloc(buffer,
2112 max * sizeof(xmlChar));
2113 if (buffer == NULL) {
2114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2115 ctxt->sax->error(ctxt->userData,
2116 "xmlParseStringName: out of memory\n");
2117 return(NULL);
2118 }
2119 }
2120 COPY_BUF(l,buffer,len,c);
2121 cur += l;
2122 c = CUR_SCHAR(cur, l);
2123 }
2124 buffer[len] = 0;
2125 *str = cur;
2126 return(buffer);
2127 }
2128 }
2129 *str = cur;
2130 return(xmlStrndup(buf, len));
2131}
2132
2133/**
2134 * xmlParseNmtoken:
2135 * @ctxt: an XML parser context
2136 *
2137 * parse an XML Nmtoken.
2138 *
2139 * [7] Nmtoken ::= (NameChar)+
2140 *
2141 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2142 *
2143 * Returns the Nmtoken parsed or NULL
2144 */
2145
2146xmlChar *
2147xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2148 xmlChar buf[XML_MAX_NAMELEN + 5];
2149 int len = 0, l;
2150 int c;
2151 int count = 0;
2152
2153 GROW;
2154 c = CUR_CHAR(l);
2155
2156 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2157 (c == '.') || (c == '-') ||
2158 (c == '_') || (c == ':') ||
2159 (IS_COMBINING(c)) ||
2160 (IS_EXTENDER(c))) {
2161 if (count++ > 100) {
2162 count = 0;
2163 GROW;
2164 }
2165 COPY_BUF(l,buf,len,c);
2166 NEXTL(l);
2167 c = CUR_CHAR(l);
2168 if (len >= XML_MAX_NAMELEN) {
2169 /*
2170 * Okay someone managed to make a huge token, so he's ready to pay
2171 * for the processing speed.
2172 */
2173 xmlChar *buffer;
2174 int max = len * 2;
2175
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002176 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002177 if (buffer == NULL) {
2178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2179 ctxt->sax->error(ctxt->userData,
2180 "xmlParseNmtoken: out of memory\n");
2181 return(NULL);
2182 }
2183 memcpy(buffer, buf, len);
2184 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2185 (c == '.') || (c == '-') ||
2186 (c == '_') || (c == ':') ||
2187 (IS_COMBINING(c)) ||
2188 (IS_EXTENDER(c))) {
2189 if (count++ > 100) {
2190 count = 0;
2191 GROW;
2192 }
2193 if (len + 10 > max) {
2194 max *= 2;
2195 buffer = (xmlChar *) xmlRealloc(buffer,
2196 max * sizeof(xmlChar));
2197 if (buffer == NULL) {
2198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2199 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002200 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002201 return(NULL);
2202 }
2203 }
2204 COPY_BUF(l,buffer,len,c);
2205 NEXTL(l);
2206 c = CUR_CHAR(l);
2207 }
2208 buffer[len] = 0;
2209 return(buffer);
2210 }
2211 }
2212 if (len == 0)
2213 return(NULL);
2214 return(xmlStrndup(buf, len));
2215}
2216
2217/**
2218 * xmlParseEntityValue:
2219 * @ctxt: an XML parser context
2220 * @orig: if non-NULL store a copy of the original entity value
2221 *
2222 * parse a value for ENTITY declarations
2223 *
2224 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2225 * "'" ([^%&'] | PEReference | Reference)* "'"
2226 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002227 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002228 */
2229
2230xmlChar *
2231xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2232 xmlChar *buf = NULL;
2233 int len = 0;
2234 int size = XML_PARSER_BUFFER_SIZE;
2235 int c, l;
2236 xmlChar stop;
2237 xmlChar *ret = NULL;
2238 const xmlChar *cur = NULL;
2239 xmlParserInputPtr input;
2240
2241 if (RAW == '"') stop = '"';
2242 else if (RAW == '\'') stop = '\'';
2243 else {
2244 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2246 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2247 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002248 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002249 return(NULL);
2250 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002251 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002252 if (buf == NULL) {
2253 xmlGenericError(xmlGenericErrorContext,
2254 "malloc of %d byte failed\n", size);
2255 return(NULL);
2256 }
2257
2258 /*
2259 * The content of the entity definition is copied in a buffer.
2260 */
2261
2262 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2263 input = ctxt->input;
2264 GROW;
2265 NEXT;
2266 c = CUR_CHAR(l);
2267 /*
2268 * NOTE: 4.4.5 Included in Literal
2269 * When a parameter entity reference appears in a literal entity
2270 * value, ... a single or double quote character in the replacement
2271 * text is always treated as a normal data character and will not
2272 * terminate the literal.
2273 * In practice it means we stop the loop only when back at parsing
2274 * the initial entity and the quote is found
2275 */
2276 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2277 (ctxt->input != input))) {
2278 if (len + 5 >= size) {
2279 size *= 2;
2280 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2281 if (buf == NULL) {
2282 xmlGenericError(xmlGenericErrorContext,
2283 "realloc of %d byte failed\n", size);
2284 return(NULL);
2285 }
2286 }
2287 COPY_BUF(l,buf,len,c);
2288 NEXTL(l);
2289 /*
2290 * Pop-up of finished entities.
2291 */
2292 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2293 xmlPopInput(ctxt);
2294
2295 GROW;
2296 c = CUR_CHAR(l);
2297 if (c == 0) {
2298 GROW;
2299 c = CUR_CHAR(l);
2300 }
2301 }
2302 buf[len] = 0;
2303
2304 /*
2305 * Raise problem w.r.t. '&' and '%' being used in non-entities
2306 * reference constructs. Note Charref will be handled in
2307 * xmlStringDecodeEntities()
2308 */
2309 cur = buf;
2310 while (*cur != 0) { /* non input consuming */
2311 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2312 xmlChar *name;
2313 xmlChar tmp = *cur;
2314
2315 cur++;
2316 name = xmlParseStringName(ctxt, &cur);
2317 if ((name == NULL) || (*cur != ';')) {
2318 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2320 ctxt->sax->error(ctxt->userData,
2321 "EntityValue: '%c' forbidden except for entities references\n",
2322 tmp);
2323 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002324 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002325 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002326 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2327 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002328 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2330 ctxt->sax->error(ctxt->userData,
2331 "EntityValue: PEReferences forbidden in internal subset\n",
2332 tmp);
2333 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002334 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002335 }
2336 if (name != NULL)
2337 xmlFree(name);
2338 }
2339 cur++;
2340 }
2341
2342 /*
2343 * Then PEReference entities are substituted.
2344 */
2345 if (c != stop) {
2346 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2348 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2349 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002350 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002351 xmlFree(buf);
2352 } else {
2353 NEXT;
2354 /*
2355 * NOTE: 4.4.7 Bypassed
2356 * When a general entity reference appears in the EntityValue in
2357 * an entity declaration, it is bypassed and left as is.
2358 * so XML_SUBSTITUTE_REF is not set here.
2359 */
2360 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2361 0, 0, 0);
2362 if (orig != NULL)
2363 *orig = buf;
2364 else
2365 xmlFree(buf);
2366 }
2367
2368 return(ret);
2369}
2370
2371/**
2372 * xmlParseAttValue:
2373 * @ctxt: an XML parser context
2374 *
2375 * parse a value for an attribute
2376 * Note: the parser won't do substitution of entities here, this
2377 * will be handled later in xmlStringGetNodeList
2378 *
2379 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2380 * "'" ([^<&'] | Reference)* "'"
2381 *
2382 * 3.3.3 Attribute-Value Normalization:
2383 * Before the value of an attribute is passed to the application or
2384 * checked for validity, the XML processor must normalize it as follows:
2385 * - a character reference is processed by appending the referenced
2386 * character to the attribute value
2387 * - an entity reference is processed by recursively processing the
2388 * replacement text of the entity
2389 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2390 * appending #x20 to the normalized value, except that only a single
2391 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2392 * parsed entity or the literal entity value of an internal parsed entity
2393 * - other characters are processed by appending them to the normalized value
2394 * If the declared value is not CDATA, then the XML processor must further
2395 * process the normalized attribute value by discarding any leading and
2396 * trailing space (#x20) characters, and by replacing sequences of space
2397 * (#x20) characters by a single space (#x20) character.
2398 * All attributes for which no declaration has been read should be treated
2399 * by a non-validating parser as if declared CDATA.
2400 *
2401 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2402 */
2403
2404xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002405xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2406
2407xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002408xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2409 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002410 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002411 xmlChar *ret = NULL;
2412 SHRINK;
2413 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002414 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002415 if (*in != '"' && *in != '\'') {
2416 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2417 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2418 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2419 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002420 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002421 return(NULL);
2422 }
2423 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2424 limit = *in;
2425 ++in;
2426
2427 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2428 *in != '&' && *in != '<'
2429 ) {
2430 ++in;
2431 }
2432 if (*in != limit) {
2433 return xmlParseAttValueComplex(ctxt);
2434 }
2435 ++in;
2436 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2437 CUR_PTR = in;
2438 return ret;
2439}
2440
Daniel Veillard01c13b52002-12-10 15:19:08 +00002441/**
2442 * xmlParseAttValueComplex:
2443 * @ctxt: an XML parser context
2444 *
2445 * parse a value for an attribute, this is the fallback function
2446 * of xmlParseAttValue() when the attribute parsing requires handling
2447 * of non-ASCII characters.
2448 *
2449 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2450 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002451xmlChar *
2452xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2453 xmlChar limit = 0;
2454 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002455 int len = 0;
2456 int buf_size = 0;
2457 int c, l;
2458 xmlChar *current = NULL;
2459 xmlEntityPtr ent;
2460
2461
2462 SHRINK;
2463 if (NXT(0) == '"') {
2464 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2465 limit = '"';
2466 NEXT;
2467 } else if (NXT(0) == '\'') {
2468 limit = '\'';
2469 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2470 NEXT;
2471 } else {
2472 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2473 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2474 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2475 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002476 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002477 return(NULL);
2478 }
2479
2480 /*
2481 * allocate a translation buffer.
2482 */
2483 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002484 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002485 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002486 xmlGenericError(xmlGenericErrorContext,
2487 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002488 return(NULL);
2489 }
2490
2491 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002492 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002493 */
2494 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002495 while ((NXT(0) != limit) && /* checked */
2496 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002497 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002498 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002499 if (NXT(1) == '#') {
2500 int val = xmlParseCharRef(ctxt);
2501 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002502 if (ctxt->replaceEntities) {
2503 if (len > buf_size - 10) {
2504 growBuffer(buf);
2505 }
2506 buf[len++] = '&';
2507 } else {
2508 /*
2509 * The reparsing will be done in xmlStringGetNodeList()
2510 * called by the attribute() function in SAX.c
2511 */
2512 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002513
Daniel Veillard319a7422001-09-11 09:27:09 +00002514 if (len > buf_size - 10) {
2515 growBuffer(buf);
2516 }
2517 current = &buffer[0];
2518 while (*current != 0) { /* non input consuming */
2519 buf[len++] = *current++;
2520 }
Owen Taylor3473f882001-02-23 17:55:21 +00002521 }
2522 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002523 if (len > buf_size - 10) {
2524 growBuffer(buf);
2525 }
Owen Taylor3473f882001-02-23 17:55:21 +00002526 len += xmlCopyChar(0, &buf[len], val);
2527 }
2528 } else {
2529 ent = xmlParseEntityRef(ctxt);
2530 if ((ent != NULL) &&
2531 (ctxt->replaceEntities != 0)) {
2532 xmlChar *rep;
2533
2534 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2535 rep = xmlStringDecodeEntities(ctxt, ent->content,
2536 XML_SUBSTITUTE_REF, 0, 0, 0);
2537 if (rep != NULL) {
2538 current = rep;
2539 while (*current != 0) { /* non input consuming */
2540 buf[len++] = *current++;
2541 if (len > buf_size - 10) {
2542 growBuffer(buf);
2543 }
2544 }
2545 xmlFree(rep);
2546 }
2547 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002548 if (len > buf_size - 10) {
2549 growBuffer(buf);
2550 }
Owen Taylor3473f882001-02-23 17:55:21 +00002551 if (ent->content != NULL)
2552 buf[len++] = ent->content[0];
2553 }
2554 } else if (ent != NULL) {
2555 int i = xmlStrlen(ent->name);
2556 const xmlChar *cur = ent->name;
2557
2558 /*
2559 * This may look absurd but is needed to detect
2560 * entities problems
2561 */
2562 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2563 (ent->content != NULL)) {
2564 xmlChar *rep;
2565 rep = xmlStringDecodeEntities(ctxt, ent->content,
2566 XML_SUBSTITUTE_REF, 0, 0, 0);
2567 if (rep != NULL)
2568 xmlFree(rep);
2569 }
2570
2571 /*
2572 * Just output the reference
2573 */
2574 buf[len++] = '&';
2575 if (len > buf_size - i - 10) {
2576 growBuffer(buf);
2577 }
2578 for (;i > 0;i--)
2579 buf[len++] = *cur++;
2580 buf[len++] = ';';
2581 }
2582 }
2583 } else {
2584 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2585 COPY_BUF(l,buf,len,0x20);
2586 if (len > buf_size - 10) {
2587 growBuffer(buf);
2588 }
2589 } else {
2590 COPY_BUF(l,buf,len,c);
2591 if (len > buf_size - 10) {
2592 growBuffer(buf);
2593 }
2594 }
2595 NEXTL(l);
2596 }
2597 GROW;
2598 c = CUR_CHAR(l);
2599 }
2600 buf[len++] = 0;
2601 if (RAW == '<') {
2602 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2604 ctxt->sax->error(ctxt->userData,
2605 "Unescaped '<' not allowed in attributes values\n");
2606 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002607 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002608 } else if (RAW != limit) {
2609 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2611 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2612 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002613 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002614 } else
2615 NEXT;
2616 return(buf);
2617}
2618
2619/**
2620 * xmlParseSystemLiteral:
2621 * @ctxt: an XML parser context
2622 *
2623 * parse an XML Literal
2624 *
2625 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2626 *
2627 * Returns the SystemLiteral parsed or NULL
2628 */
2629
2630xmlChar *
2631xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2632 xmlChar *buf = NULL;
2633 int len = 0;
2634 int size = XML_PARSER_BUFFER_SIZE;
2635 int cur, l;
2636 xmlChar stop;
2637 int state = ctxt->instate;
2638 int count = 0;
2639
2640 SHRINK;
2641 if (RAW == '"') {
2642 NEXT;
2643 stop = '"';
2644 } else if (RAW == '\'') {
2645 NEXT;
2646 stop = '\'';
2647 } else {
2648 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2650 ctxt->sax->error(ctxt->userData,
2651 "SystemLiteral \" or ' expected\n");
2652 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002653 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002654 return(NULL);
2655 }
2656
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002657 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002658 if (buf == NULL) {
2659 xmlGenericError(xmlGenericErrorContext,
2660 "malloc of %d byte failed\n", size);
2661 return(NULL);
2662 }
2663 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2664 cur = CUR_CHAR(l);
2665 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2666 if (len + 5 >= size) {
2667 size *= 2;
2668 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2669 if (buf == NULL) {
2670 xmlGenericError(xmlGenericErrorContext,
2671 "realloc of %d byte failed\n", size);
2672 ctxt->instate = (xmlParserInputState) state;
2673 return(NULL);
2674 }
2675 }
2676 count++;
2677 if (count > 50) {
2678 GROW;
2679 count = 0;
2680 }
2681 COPY_BUF(l,buf,len,cur);
2682 NEXTL(l);
2683 cur = CUR_CHAR(l);
2684 if (cur == 0) {
2685 GROW;
2686 SHRINK;
2687 cur = CUR_CHAR(l);
2688 }
2689 }
2690 buf[len] = 0;
2691 ctxt->instate = (xmlParserInputState) state;
2692 if (!IS_CHAR(cur)) {
2693 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2695 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2696 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002697 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002698 } else {
2699 NEXT;
2700 }
2701 return(buf);
2702}
2703
2704/**
2705 * xmlParsePubidLiteral:
2706 * @ctxt: an XML parser context
2707 *
2708 * parse an XML public literal
2709 *
2710 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2711 *
2712 * Returns the PubidLiteral parsed or NULL.
2713 */
2714
2715xmlChar *
2716xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2717 xmlChar *buf = NULL;
2718 int len = 0;
2719 int size = XML_PARSER_BUFFER_SIZE;
2720 xmlChar cur;
2721 xmlChar stop;
2722 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002723 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002724
2725 SHRINK;
2726 if (RAW == '"') {
2727 NEXT;
2728 stop = '"';
2729 } else if (RAW == '\'') {
2730 NEXT;
2731 stop = '\'';
2732 } else {
2733 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2735 ctxt->sax->error(ctxt->userData,
2736 "SystemLiteral \" or ' expected\n");
2737 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002738 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002739 return(NULL);
2740 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002741 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002742 if (buf == NULL) {
2743 xmlGenericError(xmlGenericErrorContext,
2744 "malloc of %d byte failed\n", size);
2745 return(NULL);
2746 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002747 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002748 cur = CUR;
2749 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2750 if (len + 1 >= size) {
2751 size *= 2;
2752 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2753 if (buf == NULL) {
2754 xmlGenericError(xmlGenericErrorContext,
2755 "realloc of %d byte failed\n", size);
2756 return(NULL);
2757 }
2758 }
2759 buf[len++] = cur;
2760 count++;
2761 if (count > 50) {
2762 GROW;
2763 count = 0;
2764 }
2765 NEXT;
2766 cur = CUR;
2767 if (cur == 0) {
2768 GROW;
2769 SHRINK;
2770 cur = CUR;
2771 }
2772 }
2773 buf[len] = 0;
2774 if (cur != stop) {
2775 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2777 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2778 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002779 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002780 } else {
2781 NEXT;
2782 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002783 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002784 return(buf);
2785}
2786
Daniel Veillard48b2f892001-02-25 16:11:03 +00002787void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002788/**
2789 * xmlParseCharData:
2790 * @ctxt: an XML parser context
2791 * @cdata: int indicating whether we are within a CDATA section
2792 *
2793 * parse a CharData section.
2794 * if we are within a CDATA section ']]>' marks an end of section.
2795 *
2796 * The right angle bracket (>) may be represented using the string "&gt;",
2797 * and must, for compatibility, be escaped using "&gt;" or a character
2798 * reference when it appears in the string "]]>" in content, when that
2799 * string is not marking the end of a CDATA section.
2800 *
2801 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2802 */
2803
2804void
2805xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002806 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002807 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002808 int line = ctxt->input->line;
2809 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002810
2811 SHRINK;
2812 GROW;
2813 /*
2814 * Accelerated common case where input don't need to be
2815 * modified before passing it to the handler.
2816 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002817 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002818 in = ctxt->input->cur;
2819 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002820get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002821 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2822 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002823 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002824 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002825 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002826 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002827 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002828 ctxt->input->line++;
2829 in++;
2830 }
2831 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002832 }
2833 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002834 if ((in[1] == ']') && (in[2] == '>')) {
2835 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2837 ctxt->sax->error(ctxt->userData,
2838 "Sequence ']]>' not allowed in content\n");
2839 ctxt->input->cur = in;
2840 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002841 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002842 return;
2843 }
2844 in++;
2845 goto get_more;
2846 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002847 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002848 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002849 if (IS_BLANK(*ctxt->input->cur)) {
2850 const xmlChar *tmp = ctxt->input->cur;
2851 ctxt->input->cur = in;
2852 if (areBlanks(ctxt, tmp, nbchar)) {
2853 if (ctxt->sax->ignorableWhitespace != NULL)
2854 ctxt->sax->ignorableWhitespace(ctxt->userData,
2855 tmp, nbchar);
2856 } else {
2857 if (ctxt->sax->characters != NULL)
2858 ctxt->sax->characters(ctxt->userData,
2859 tmp, nbchar);
2860 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002861 line = ctxt->input->line;
2862 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002863 } else {
2864 if (ctxt->sax->characters != NULL)
2865 ctxt->sax->characters(ctxt->userData,
2866 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002867 line = ctxt->input->line;
2868 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002869 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002870 }
2871 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002872 if (*in == 0xD) {
2873 in++;
2874 if (*in == 0xA) {
2875 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002876 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002877 ctxt->input->line++;
2878 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002879 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002880 in--;
2881 }
2882 if (*in == '<') {
2883 return;
2884 }
2885 if (*in == '&') {
2886 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002887 }
2888 SHRINK;
2889 GROW;
2890 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002891 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002892 nbchar = 0;
2893 }
Daniel Veillard50582112001-03-26 22:52:16 +00002894 ctxt->input->line = line;
2895 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002896 xmlParseCharDataComplex(ctxt, cdata);
2897}
2898
Daniel Veillard01c13b52002-12-10 15:19:08 +00002899/**
2900 * xmlParseCharDataComplex:
2901 * @ctxt: an XML parser context
2902 * @cdata: int indicating whether we are within a CDATA section
2903 *
2904 * parse a CharData section.this is the fallback function
2905 * of xmlParseCharData() when the parsing requires handling
2906 * of non-ASCII characters.
2907 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002908void
2909xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002910 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2911 int nbchar = 0;
2912 int cur, l;
2913 int count = 0;
2914
2915 SHRINK;
2916 GROW;
2917 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002918 while ((cur != '<') && /* checked */
2919 (cur != '&') &&
2920 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002921 if ((cur == ']') && (NXT(1) == ']') &&
2922 (NXT(2) == '>')) {
2923 if (cdata) break;
2924 else {
2925 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2927 ctxt->sax->error(ctxt->userData,
2928 "Sequence ']]>' not allowed in content\n");
2929 /* Should this be relaxed ??? I see a "must here */
2930 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002931 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002932 }
2933 }
2934 COPY_BUF(l,buf,nbchar,cur);
2935 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2936 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002937 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002938 */
2939 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2940 if (areBlanks(ctxt, buf, nbchar)) {
2941 if (ctxt->sax->ignorableWhitespace != NULL)
2942 ctxt->sax->ignorableWhitespace(ctxt->userData,
2943 buf, nbchar);
2944 } else {
2945 if (ctxt->sax->characters != NULL)
2946 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2947 }
2948 }
2949 nbchar = 0;
2950 }
2951 count++;
2952 if (count > 50) {
2953 GROW;
2954 count = 0;
2955 }
2956 NEXTL(l);
2957 cur = CUR_CHAR(l);
2958 }
2959 if (nbchar != 0) {
2960 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002961 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002962 */
2963 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2964 if (areBlanks(ctxt, buf, nbchar)) {
2965 if (ctxt->sax->ignorableWhitespace != NULL)
2966 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2967 } else {
2968 if (ctxt->sax->characters != NULL)
2969 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2970 }
2971 }
2972 }
2973}
2974
2975/**
2976 * xmlParseExternalID:
2977 * @ctxt: an XML parser context
2978 * @publicID: a xmlChar** receiving PubidLiteral
2979 * @strict: indicate whether we should restrict parsing to only
2980 * production [75], see NOTE below
2981 *
2982 * Parse an External ID or a Public ID
2983 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002984 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002985 * 'PUBLIC' S PubidLiteral S SystemLiteral
2986 *
2987 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2988 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2989 *
2990 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2991 *
2992 * Returns the function returns SystemLiteral and in the second
2993 * case publicID receives PubidLiteral, is strict is off
2994 * it is possible to return NULL and have publicID set.
2995 */
2996
2997xmlChar *
2998xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2999 xmlChar *URI = NULL;
3000
3001 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003002
3003 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003004 if ((RAW == 'S') && (NXT(1) == 'Y') &&
3005 (NXT(2) == 'S') && (NXT(3) == 'T') &&
3006 (NXT(4) == 'E') && (NXT(5) == 'M')) {
3007 SKIP(6);
3008 if (!IS_BLANK(CUR)) {
3009 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3011 ctxt->sax->error(ctxt->userData,
3012 "Space required after 'SYSTEM'\n");
3013 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003014 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003015 }
3016 SKIP_BLANKS;
3017 URI = xmlParseSystemLiteral(ctxt);
3018 if (URI == NULL) {
3019 ctxt->errNo = XML_ERR_URI_REQUIRED;
3020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3021 ctxt->sax->error(ctxt->userData,
3022 "xmlParseExternalID: SYSTEM, no URI\n");
3023 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003024 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003025 }
3026 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3027 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3028 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3029 SKIP(6);
3030 if (!IS_BLANK(CUR)) {
3031 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3033 ctxt->sax->error(ctxt->userData,
3034 "Space required after 'PUBLIC'\n");
3035 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003036 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003037 }
3038 SKIP_BLANKS;
3039 *publicID = xmlParsePubidLiteral(ctxt);
3040 if (*publicID == NULL) {
3041 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
3042 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3043 ctxt->sax->error(ctxt->userData,
3044 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
3045 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003046 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003047 }
3048 if (strict) {
3049 /*
3050 * We don't handle [83] so "S SystemLiteral" is required.
3051 */
3052 if (!IS_BLANK(CUR)) {
3053 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3055 ctxt->sax->error(ctxt->userData,
3056 "Space required after the Public Identifier\n");
3057 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003058 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003059 }
3060 } else {
3061 /*
3062 * We handle [83] so we return immediately, if
3063 * "S SystemLiteral" is not detected. From a purely parsing
3064 * point of view that's a nice mess.
3065 */
3066 const xmlChar *ptr;
3067 GROW;
3068
3069 ptr = CUR_PTR;
3070 if (!IS_BLANK(*ptr)) return(NULL);
3071
3072 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3073 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3074 }
3075 SKIP_BLANKS;
3076 URI = xmlParseSystemLiteral(ctxt);
3077 if (URI == NULL) {
3078 ctxt->errNo = XML_ERR_URI_REQUIRED;
3079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3080 ctxt->sax->error(ctxt->userData,
3081 "xmlParseExternalID: PUBLIC, no URI\n");
3082 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003083 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003084 }
3085 }
3086 return(URI);
3087}
3088
3089/**
3090 * xmlParseComment:
3091 * @ctxt: an XML parser context
3092 *
3093 * Skip an XML (SGML) comment <!-- .... -->
3094 * The spec says that "For compatibility, the string "--" (double-hyphen)
3095 * must not occur within comments. "
3096 *
3097 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3098 */
3099void
3100xmlParseComment(xmlParserCtxtPtr ctxt) {
3101 xmlChar *buf = NULL;
3102 int len;
3103 int size = XML_PARSER_BUFFER_SIZE;
3104 int q, ql;
3105 int r, rl;
3106 int cur, l;
3107 xmlParserInputState state;
3108 xmlParserInputPtr input = ctxt->input;
3109 int count = 0;
3110
3111 /*
3112 * Check that there is a comment right here.
3113 */
3114 if ((RAW != '<') || (NXT(1) != '!') ||
3115 (NXT(2) != '-') || (NXT(3) != '-')) return;
3116
3117 state = ctxt->instate;
3118 ctxt->instate = XML_PARSER_COMMENT;
3119 SHRINK;
3120 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003121 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003122 if (buf == NULL) {
3123 xmlGenericError(xmlGenericErrorContext,
3124 "malloc of %d byte failed\n", size);
3125 ctxt->instate = state;
3126 return;
3127 }
3128 q = CUR_CHAR(ql);
3129 NEXTL(ql);
3130 r = CUR_CHAR(rl);
3131 NEXTL(rl);
3132 cur = CUR_CHAR(l);
3133 len = 0;
3134 while (IS_CHAR(cur) && /* checked */
3135 ((cur != '>') ||
3136 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003137 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003138 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3139 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3140 ctxt->sax->error(ctxt->userData,
3141 "Comment must not contain '--' (double-hyphen)`\n");
3142 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003143 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003144 }
3145 if (len + 5 >= size) {
3146 size *= 2;
3147 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3148 if (buf == NULL) {
3149 xmlGenericError(xmlGenericErrorContext,
3150 "realloc of %d byte failed\n", size);
3151 ctxt->instate = state;
3152 return;
3153 }
3154 }
3155 COPY_BUF(ql,buf,len,q);
3156 q = r;
3157 ql = rl;
3158 r = cur;
3159 rl = l;
3160
3161 count++;
3162 if (count > 50) {
3163 GROW;
3164 count = 0;
3165 }
3166 NEXTL(l);
3167 cur = CUR_CHAR(l);
3168 if (cur == 0) {
3169 SHRINK;
3170 GROW;
3171 cur = CUR_CHAR(l);
3172 }
3173 }
3174 buf[len] = 0;
3175 if (!IS_CHAR(cur)) {
3176 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3178 ctxt->sax->error(ctxt->userData,
3179 "Comment not terminated \n<!--%.50s\n", buf);
3180 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003181 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003182 xmlFree(buf);
3183 } else {
3184 if (input != ctxt->input) {
3185 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3187 ctxt->sax->error(ctxt->userData,
3188"Comment doesn't start and stop in the same entity\n");
3189 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003190 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003191 }
3192 NEXT;
3193 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3194 (!ctxt->disableSAX))
3195 ctxt->sax->comment(ctxt->userData, buf);
3196 xmlFree(buf);
3197 }
3198 ctxt->instate = state;
3199}
3200
3201/**
3202 * xmlParsePITarget:
3203 * @ctxt: an XML parser context
3204 *
3205 * parse the name of a PI
3206 *
3207 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3208 *
3209 * Returns the PITarget name or NULL
3210 */
3211
3212xmlChar *
3213xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3214 xmlChar *name;
3215
3216 name = xmlParseName(ctxt);
3217 if ((name != NULL) &&
3218 ((name[0] == 'x') || (name[0] == 'X')) &&
3219 ((name[1] == 'm') || (name[1] == 'M')) &&
3220 ((name[2] == 'l') || (name[2] == 'L'))) {
3221 int i;
3222 if ((name[0] == 'x') && (name[1] == 'm') &&
3223 (name[2] == 'l') && (name[3] == 0)) {
3224 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3226 ctxt->sax->error(ctxt->userData,
3227 "XML declaration allowed only at the start of the document\n");
3228 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003229 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003230 return(name);
3231 } else if (name[3] == 0) {
3232 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3234 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3235 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003236 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003237 return(name);
3238 }
3239 for (i = 0;;i++) {
3240 if (xmlW3CPIs[i] == NULL) break;
3241 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3242 return(name);
3243 }
3244 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3245 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3246 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003247 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003248 }
3249 }
3250 return(name);
3251}
3252
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003253#ifdef LIBXML_CATALOG_ENABLED
3254/**
3255 * xmlParseCatalogPI:
3256 * @ctxt: an XML parser context
3257 * @catalog: the PI value string
3258 *
3259 * parse an XML Catalog Processing Instruction.
3260 *
3261 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3262 *
3263 * Occurs only if allowed by the user and if happening in the Misc
3264 * part of the document before any doctype informations
3265 * This will add the given catalog to the parsing context in order
3266 * to be used if there is a resolution need further down in the document
3267 */
3268
3269static void
3270xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3271 xmlChar *URL = NULL;
3272 const xmlChar *tmp, *base;
3273 xmlChar marker;
3274
3275 tmp = catalog;
3276 while (IS_BLANK(*tmp)) tmp++;
3277 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3278 goto error;
3279 tmp += 7;
3280 while (IS_BLANK(*tmp)) tmp++;
3281 if (*tmp != '=') {
3282 return;
3283 }
3284 tmp++;
3285 while (IS_BLANK(*tmp)) tmp++;
3286 marker = *tmp;
3287 if ((marker != '\'') && (marker != '"'))
3288 goto error;
3289 tmp++;
3290 base = tmp;
3291 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3292 if (*tmp == 0)
3293 goto error;
3294 URL = xmlStrndup(base, tmp - base);
3295 tmp++;
3296 while (IS_BLANK(*tmp)) tmp++;
3297 if (*tmp != 0)
3298 goto error;
3299
3300 if (URL != NULL) {
3301 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3302 xmlFree(URL);
3303 }
3304 return;
3305
3306error:
3307 ctxt->errNo = XML_WAR_CATALOG_PI;
3308 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3309 ctxt->sax->warning(ctxt->userData,
3310 "Catalog PI syntax error: %s\n", catalog);
3311 if (URL != NULL)
3312 xmlFree(URL);
3313}
3314#endif
3315
Owen Taylor3473f882001-02-23 17:55:21 +00003316/**
3317 * xmlParsePI:
3318 * @ctxt: an XML parser context
3319 *
3320 * parse an XML Processing Instruction.
3321 *
3322 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3323 *
3324 * The processing is transfered to SAX once parsed.
3325 */
3326
3327void
3328xmlParsePI(xmlParserCtxtPtr ctxt) {
3329 xmlChar *buf = NULL;
3330 int len = 0;
3331 int size = XML_PARSER_BUFFER_SIZE;
3332 int cur, l;
3333 xmlChar *target;
3334 xmlParserInputState state;
3335 int count = 0;
3336
3337 if ((RAW == '<') && (NXT(1) == '?')) {
3338 xmlParserInputPtr input = ctxt->input;
3339 state = ctxt->instate;
3340 ctxt->instate = XML_PARSER_PI;
3341 /*
3342 * this is a Processing Instruction.
3343 */
3344 SKIP(2);
3345 SHRINK;
3346
3347 /*
3348 * Parse the target name and check for special support like
3349 * namespace.
3350 */
3351 target = xmlParsePITarget(ctxt);
3352 if (target != NULL) {
3353 if ((RAW == '?') && (NXT(1) == '>')) {
3354 if (input != ctxt->input) {
3355 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3357 ctxt->sax->error(ctxt->userData,
3358 "PI declaration doesn't start and stop in the same entity\n");
3359 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003360 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003361 }
3362 SKIP(2);
3363
3364 /*
3365 * SAX: PI detected.
3366 */
3367 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3368 (ctxt->sax->processingInstruction != NULL))
3369 ctxt->sax->processingInstruction(ctxt->userData,
3370 target, NULL);
3371 ctxt->instate = state;
3372 xmlFree(target);
3373 return;
3374 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003375 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003376 if (buf == NULL) {
3377 xmlGenericError(xmlGenericErrorContext,
3378 "malloc of %d byte failed\n", size);
3379 ctxt->instate = state;
3380 return;
3381 }
3382 cur = CUR;
3383 if (!IS_BLANK(cur)) {
3384 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3386 ctxt->sax->error(ctxt->userData,
3387 "xmlParsePI: PI %s space expected\n", target);
3388 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003389 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003390 }
3391 SKIP_BLANKS;
3392 cur = CUR_CHAR(l);
3393 while (IS_CHAR(cur) && /* checked */
3394 ((cur != '?') || (NXT(1) != '>'))) {
3395 if (len + 5 >= size) {
3396 size *= 2;
3397 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3398 if (buf == NULL) {
3399 xmlGenericError(xmlGenericErrorContext,
3400 "realloc of %d byte failed\n", size);
3401 ctxt->instate = state;
3402 return;
3403 }
3404 }
3405 count++;
3406 if (count > 50) {
3407 GROW;
3408 count = 0;
3409 }
3410 COPY_BUF(l,buf,len,cur);
3411 NEXTL(l);
3412 cur = CUR_CHAR(l);
3413 if (cur == 0) {
3414 SHRINK;
3415 GROW;
3416 cur = CUR_CHAR(l);
3417 }
3418 }
3419 buf[len] = 0;
3420 if (cur != '?') {
3421 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3423 ctxt->sax->error(ctxt->userData,
3424 "xmlParsePI: PI %s never end ...\n", target);
3425 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003426 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003427 } else {
3428 if (input != ctxt->input) {
3429 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3431 ctxt->sax->error(ctxt->userData,
3432 "PI declaration doesn't start and stop in the same entity\n");
3433 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003434 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003435 }
3436 SKIP(2);
3437
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003438#ifdef LIBXML_CATALOG_ENABLED
3439 if (((state == XML_PARSER_MISC) ||
3440 (state == XML_PARSER_START)) &&
3441 (xmlStrEqual(target, XML_CATALOG_PI))) {
3442 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3443 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3444 (allow == XML_CATA_ALLOW_ALL))
3445 xmlParseCatalogPI(ctxt, buf);
3446 }
3447#endif
3448
3449
Owen Taylor3473f882001-02-23 17:55:21 +00003450 /*
3451 * SAX: PI detected.
3452 */
3453 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3454 (ctxt->sax->processingInstruction != NULL))
3455 ctxt->sax->processingInstruction(ctxt->userData,
3456 target, buf);
3457 }
3458 xmlFree(buf);
3459 xmlFree(target);
3460 } else {
3461 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3463 ctxt->sax->error(ctxt->userData,
3464 "xmlParsePI : no target name\n");
3465 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003466 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003467 }
3468 ctxt->instate = state;
3469 }
3470}
3471
3472/**
3473 * xmlParseNotationDecl:
3474 * @ctxt: an XML parser context
3475 *
3476 * parse a notation declaration
3477 *
3478 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3479 *
3480 * Hence there is actually 3 choices:
3481 * 'PUBLIC' S PubidLiteral
3482 * 'PUBLIC' S PubidLiteral S SystemLiteral
3483 * and 'SYSTEM' S SystemLiteral
3484 *
3485 * See the NOTE on xmlParseExternalID().
3486 */
3487
3488void
3489xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3490 xmlChar *name;
3491 xmlChar *Pubid;
3492 xmlChar *Systemid;
3493
3494 if ((RAW == '<') && (NXT(1) == '!') &&
3495 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3496 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3497 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3498 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3499 xmlParserInputPtr input = ctxt->input;
3500 SHRINK;
3501 SKIP(10);
3502 if (!IS_BLANK(CUR)) {
3503 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3505 ctxt->sax->error(ctxt->userData,
3506 "Space required after '<!NOTATION'\n");
3507 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003508 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003509 return;
3510 }
3511 SKIP_BLANKS;
3512
Daniel Veillard76d66f42001-05-16 21:05:17 +00003513 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003514 if (name == NULL) {
3515 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3517 ctxt->sax->error(ctxt->userData,
3518 "NOTATION: Name expected here\n");
3519 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003520 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003521 return;
3522 }
3523 if (!IS_BLANK(CUR)) {
3524 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3526 ctxt->sax->error(ctxt->userData,
3527 "Space required after the NOTATION name'\n");
3528 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003529 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003530 return;
3531 }
3532 SKIP_BLANKS;
3533
3534 /*
3535 * Parse the IDs.
3536 */
3537 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3538 SKIP_BLANKS;
3539
3540 if (RAW == '>') {
3541 if (input != ctxt->input) {
3542 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3544 ctxt->sax->error(ctxt->userData,
3545"Notation declaration doesn't start and stop in the same entity\n");
3546 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003547 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003548 }
3549 NEXT;
3550 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3551 (ctxt->sax->notationDecl != NULL))
3552 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3553 } else {
3554 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3556 ctxt->sax->error(ctxt->userData,
3557 "'>' required to close NOTATION declaration\n");
3558 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003559 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003560 }
3561 xmlFree(name);
3562 if (Systemid != NULL) xmlFree(Systemid);
3563 if (Pubid != NULL) xmlFree(Pubid);
3564 }
3565}
3566
3567/**
3568 * xmlParseEntityDecl:
3569 * @ctxt: an XML parser context
3570 *
3571 * parse <!ENTITY declarations
3572 *
3573 * [70] EntityDecl ::= GEDecl | PEDecl
3574 *
3575 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3576 *
3577 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3578 *
3579 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3580 *
3581 * [74] PEDef ::= EntityValue | ExternalID
3582 *
3583 * [76] NDataDecl ::= S 'NDATA' S Name
3584 *
3585 * [ VC: Notation Declared ]
3586 * The Name must match the declared name of a notation.
3587 */
3588
3589void
3590xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3591 xmlChar *name = NULL;
3592 xmlChar *value = NULL;
3593 xmlChar *URI = NULL, *literal = NULL;
3594 xmlChar *ndata = NULL;
3595 int isParameter = 0;
3596 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003597 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003598
3599 GROW;
3600 if ((RAW == '<') && (NXT(1) == '!') &&
3601 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3602 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3603 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3604 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003605 SHRINK;
3606 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003607 skipped = SKIP_BLANKS;
3608 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003609 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3611 ctxt->sax->error(ctxt->userData,
3612 "Space required after '<!ENTITY'\n");
3613 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003614 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003615 }
Owen Taylor3473f882001-02-23 17:55:21 +00003616
3617 if (RAW == '%') {
3618 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003619 skipped = SKIP_BLANKS;
3620 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003621 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3623 ctxt->sax->error(ctxt->userData,
3624 "Space required after '%'\n");
3625 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003626 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003627 }
Owen Taylor3473f882001-02-23 17:55:21 +00003628 isParameter = 1;
3629 }
3630
Daniel Veillard76d66f42001-05-16 21:05:17 +00003631 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003632 if (name == NULL) {
3633 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3635 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3636 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003637 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003638 return;
3639 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003640 skipped = SKIP_BLANKS;
3641 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003642 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3644 ctxt->sax->error(ctxt->userData,
3645 "Space required after the entity name\n");
3646 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003647 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003648 }
Owen Taylor3473f882001-02-23 17:55:21 +00003649
Daniel Veillardf5582f12002-06-11 10:08:16 +00003650 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003651 /*
3652 * handle the various case of definitions...
3653 */
3654 if (isParameter) {
3655 if ((RAW == '"') || (RAW == '\'')) {
3656 value = xmlParseEntityValue(ctxt, &orig);
3657 if (value) {
3658 if ((ctxt->sax != NULL) &&
3659 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3660 ctxt->sax->entityDecl(ctxt->userData, name,
3661 XML_INTERNAL_PARAMETER_ENTITY,
3662 NULL, NULL, value);
3663 }
3664 } else {
3665 URI = xmlParseExternalID(ctxt, &literal, 1);
3666 if ((URI == NULL) && (literal == NULL)) {
3667 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3669 ctxt->sax->error(ctxt->userData,
3670 "Entity value required\n");
3671 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003672 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003673 }
3674 if (URI) {
3675 xmlURIPtr uri;
3676
3677 uri = xmlParseURI((const char *) URI);
3678 if (uri == NULL) {
3679 ctxt->errNo = XML_ERR_INVALID_URI;
3680 if ((ctxt->sax != NULL) &&
3681 (!ctxt->disableSAX) &&
3682 (ctxt->sax->error != NULL))
3683 ctxt->sax->error(ctxt->userData,
3684 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003685 /*
3686 * This really ought to be a well formedness error
3687 * but the XML Core WG decided otherwise c.f. issue
3688 * E26 of the XML erratas.
3689 */
Owen Taylor3473f882001-02-23 17:55:21 +00003690 } else {
3691 if (uri->fragment != NULL) {
3692 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3693 if ((ctxt->sax != NULL) &&
3694 (!ctxt->disableSAX) &&
3695 (ctxt->sax->error != NULL))
3696 ctxt->sax->error(ctxt->userData,
3697 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003698 /*
3699 * Okay this is foolish to block those but not
3700 * invalid URIs.
3701 */
Owen Taylor3473f882001-02-23 17:55:21 +00003702 ctxt->wellFormed = 0;
3703 } else {
3704 if ((ctxt->sax != NULL) &&
3705 (!ctxt->disableSAX) &&
3706 (ctxt->sax->entityDecl != NULL))
3707 ctxt->sax->entityDecl(ctxt->userData, name,
3708 XML_EXTERNAL_PARAMETER_ENTITY,
3709 literal, URI, NULL);
3710 }
3711 xmlFreeURI(uri);
3712 }
3713 }
3714 }
3715 } else {
3716 if ((RAW == '"') || (RAW == '\'')) {
3717 value = xmlParseEntityValue(ctxt, &orig);
3718 if ((ctxt->sax != NULL) &&
3719 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3720 ctxt->sax->entityDecl(ctxt->userData, name,
3721 XML_INTERNAL_GENERAL_ENTITY,
3722 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003723 /*
3724 * For expat compatibility in SAX mode.
3725 */
3726 if ((ctxt->myDoc == NULL) ||
3727 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3728 if (ctxt->myDoc == NULL) {
3729 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3730 }
3731 if (ctxt->myDoc->intSubset == NULL)
3732 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3733 BAD_CAST "fake", NULL, NULL);
3734
3735 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3736 NULL, NULL, value);
3737 }
Owen Taylor3473f882001-02-23 17:55:21 +00003738 } else {
3739 URI = xmlParseExternalID(ctxt, &literal, 1);
3740 if ((URI == NULL) && (literal == NULL)) {
3741 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3743 ctxt->sax->error(ctxt->userData,
3744 "Entity value required\n");
3745 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003746 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003747 }
3748 if (URI) {
3749 xmlURIPtr uri;
3750
3751 uri = xmlParseURI((const char *)URI);
3752 if (uri == NULL) {
3753 ctxt->errNo = XML_ERR_INVALID_URI;
3754 if ((ctxt->sax != NULL) &&
3755 (!ctxt->disableSAX) &&
3756 (ctxt->sax->error != NULL))
3757 ctxt->sax->error(ctxt->userData,
3758 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003759 /*
3760 * This really ought to be a well formedness error
3761 * but the XML Core WG decided otherwise c.f. issue
3762 * E26 of the XML erratas.
3763 */
Owen Taylor3473f882001-02-23 17:55:21 +00003764 } else {
3765 if (uri->fragment != NULL) {
3766 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3767 if ((ctxt->sax != NULL) &&
3768 (!ctxt->disableSAX) &&
3769 (ctxt->sax->error != NULL))
3770 ctxt->sax->error(ctxt->userData,
3771 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003772 /*
3773 * Okay this is foolish to block those but not
3774 * invalid URIs.
3775 */
Owen Taylor3473f882001-02-23 17:55:21 +00003776 ctxt->wellFormed = 0;
3777 }
3778 xmlFreeURI(uri);
3779 }
3780 }
3781 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3782 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3784 ctxt->sax->error(ctxt->userData,
3785 "Space required before 'NDATA'\n");
3786 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003787 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003788 }
3789 SKIP_BLANKS;
3790 if ((RAW == 'N') && (NXT(1) == 'D') &&
3791 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3792 (NXT(4) == 'A')) {
3793 SKIP(5);
3794 if (!IS_BLANK(CUR)) {
3795 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3797 ctxt->sax->error(ctxt->userData,
3798 "Space required after 'NDATA'\n");
3799 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003800 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003801 }
3802 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003803 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003804 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3805 (ctxt->sax->unparsedEntityDecl != NULL))
3806 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3807 literal, URI, ndata);
3808 } else {
3809 if ((ctxt->sax != NULL) &&
3810 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3811 ctxt->sax->entityDecl(ctxt->userData, name,
3812 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3813 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003814 /*
3815 * For expat compatibility in SAX mode.
3816 * assuming the entity repalcement was asked for
3817 */
3818 if ((ctxt->replaceEntities != 0) &&
3819 ((ctxt->myDoc == NULL) ||
3820 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3821 if (ctxt->myDoc == NULL) {
3822 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3823 }
3824
3825 if (ctxt->myDoc->intSubset == NULL)
3826 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3827 BAD_CAST "fake", NULL, NULL);
3828 entityDecl(ctxt, name,
3829 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3830 literal, URI, NULL);
3831 }
Owen Taylor3473f882001-02-23 17:55:21 +00003832 }
3833 }
3834 }
3835 SKIP_BLANKS;
3836 if (RAW != '>') {
3837 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3839 ctxt->sax->error(ctxt->userData,
3840 "xmlParseEntityDecl: entity %s not terminated\n", name);
3841 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003842 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003843 } else {
3844 if (input != ctxt->input) {
3845 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3847 ctxt->sax->error(ctxt->userData,
3848"Entity declaration doesn't start and stop in the same entity\n");
3849 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003850 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003851 }
3852 NEXT;
3853 }
3854 if (orig != NULL) {
3855 /*
3856 * Ugly mechanism to save the raw entity value.
3857 */
3858 xmlEntityPtr cur = NULL;
3859
3860 if (isParameter) {
3861 if ((ctxt->sax != NULL) &&
3862 (ctxt->sax->getParameterEntity != NULL))
3863 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3864 } else {
3865 if ((ctxt->sax != NULL) &&
3866 (ctxt->sax->getEntity != NULL))
3867 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003868 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3869 cur = getEntity(ctxt, name);
3870 }
Owen Taylor3473f882001-02-23 17:55:21 +00003871 }
3872 if (cur != NULL) {
3873 if (cur->orig != NULL)
3874 xmlFree(orig);
3875 else
3876 cur->orig = orig;
3877 } else
3878 xmlFree(orig);
3879 }
3880 if (name != NULL) xmlFree(name);
3881 if (value != NULL) xmlFree(value);
3882 if (URI != NULL) xmlFree(URI);
3883 if (literal != NULL) xmlFree(literal);
3884 if (ndata != NULL) xmlFree(ndata);
3885 }
3886}
3887
3888/**
3889 * xmlParseDefaultDecl:
3890 * @ctxt: an XML parser context
3891 * @value: Receive a possible fixed default value for the attribute
3892 *
3893 * Parse an attribute default declaration
3894 *
3895 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3896 *
3897 * [ VC: Required Attribute ]
3898 * if the default declaration is the keyword #REQUIRED, then the
3899 * attribute must be specified for all elements of the type in the
3900 * attribute-list declaration.
3901 *
3902 * [ VC: Attribute Default Legal ]
3903 * The declared default value must meet the lexical constraints of
3904 * the declared attribute type c.f. xmlValidateAttributeDecl()
3905 *
3906 * [ VC: Fixed Attribute Default ]
3907 * if an attribute has a default value declared with the #FIXED
3908 * keyword, instances of that attribute must match the default value.
3909 *
3910 * [ WFC: No < in Attribute Values ]
3911 * handled in xmlParseAttValue()
3912 *
3913 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3914 * or XML_ATTRIBUTE_FIXED.
3915 */
3916
3917int
3918xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3919 int val;
3920 xmlChar *ret;
3921
3922 *value = NULL;
3923 if ((RAW == '#') && (NXT(1) == 'R') &&
3924 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3925 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3926 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3927 (NXT(8) == 'D')) {
3928 SKIP(9);
3929 return(XML_ATTRIBUTE_REQUIRED);
3930 }
3931 if ((RAW == '#') && (NXT(1) == 'I') &&
3932 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3933 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3934 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3935 SKIP(8);
3936 return(XML_ATTRIBUTE_IMPLIED);
3937 }
3938 val = XML_ATTRIBUTE_NONE;
3939 if ((RAW == '#') && (NXT(1) == 'F') &&
3940 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3941 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3942 SKIP(6);
3943 val = XML_ATTRIBUTE_FIXED;
3944 if (!IS_BLANK(CUR)) {
3945 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3947 ctxt->sax->error(ctxt->userData,
3948 "Space required after '#FIXED'\n");
3949 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003950 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003951 }
3952 SKIP_BLANKS;
3953 }
3954 ret = xmlParseAttValue(ctxt);
3955 ctxt->instate = XML_PARSER_DTD;
3956 if (ret == NULL) {
3957 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3958 ctxt->sax->error(ctxt->userData,
3959 "Attribute default value declaration error\n");
3960 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003961 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003962 } else
3963 *value = ret;
3964 return(val);
3965}
3966
3967/**
3968 * xmlParseNotationType:
3969 * @ctxt: an XML parser context
3970 *
3971 * parse an Notation attribute type.
3972 *
3973 * Note: the leading 'NOTATION' S part has already being parsed...
3974 *
3975 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3976 *
3977 * [ VC: Notation Attributes ]
3978 * Values of this type must match one of the notation names included
3979 * in the declaration; all notation names in the declaration must be declared.
3980 *
3981 * Returns: the notation attribute tree built while parsing
3982 */
3983
3984xmlEnumerationPtr
3985xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3986 xmlChar *name;
3987 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3988
3989 if (RAW != '(') {
3990 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3992 ctxt->sax->error(ctxt->userData,
3993 "'(' required to start 'NOTATION'\n");
3994 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003995 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003996 return(NULL);
3997 }
3998 SHRINK;
3999 do {
4000 NEXT;
4001 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004002 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004003 if (name == NULL) {
4004 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4006 ctxt->sax->error(ctxt->userData,
4007 "Name expected in NOTATION declaration\n");
4008 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004009 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004010 return(ret);
4011 }
4012 cur = xmlCreateEnumeration(name);
4013 xmlFree(name);
4014 if (cur == NULL) return(ret);
4015 if (last == NULL) ret = last = cur;
4016 else {
4017 last->next = cur;
4018 last = cur;
4019 }
4020 SKIP_BLANKS;
4021 } while (RAW == '|');
4022 if (RAW != ')') {
4023 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
4024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4025 ctxt->sax->error(ctxt->userData,
4026 "')' required to finish NOTATION declaration\n");
4027 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004028 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004029 if ((last != NULL) && (last != ret))
4030 xmlFreeEnumeration(last);
4031 return(ret);
4032 }
4033 NEXT;
4034 return(ret);
4035}
4036
4037/**
4038 * xmlParseEnumerationType:
4039 * @ctxt: an XML parser context
4040 *
4041 * parse an Enumeration attribute type.
4042 *
4043 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4044 *
4045 * [ VC: Enumeration ]
4046 * Values of this type must match one of the Nmtoken tokens in
4047 * the declaration
4048 *
4049 * Returns: the enumeration attribute tree built while parsing
4050 */
4051
4052xmlEnumerationPtr
4053xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4054 xmlChar *name;
4055 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4056
4057 if (RAW != '(') {
4058 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4060 ctxt->sax->error(ctxt->userData,
4061 "'(' required to start ATTLIST enumeration\n");
4062 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004063 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004064 return(NULL);
4065 }
4066 SHRINK;
4067 do {
4068 NEXT;
4069 SKIP_BLANKS;
4070 name = xmlParseNmtoken(ctxt);
4071 if (name == NULL) {
4072 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4074 ctxt->sax->error(ctxt->userData,
4075 "NmToken expected in ATTLIST enumeration\n");
4076 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004077 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004078 return(ret);
4079 }
4080 cur = xmlCreateEnumeration(name);
4081 xmlFree(name);
4082 if (cur == NULL) return(ret);
4083 if (last == NULL) ret = last = cur;
4084 else {
4085 last->next = cur;
4086 last = cur;
4087 }
4088 SKIP_BLANKS;
4089 } while (RAW == '|');
4090 if (RAW != ')') {
4091 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4093 ctxt->sax->error(ctxt->userData,
4094 "')' required to finish ATTLIST enumeration\n");
4095 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004096 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004097 return(ret);
4098 }
4099 NEXT;
4100 return(ret);
4101}
4102
4103/**
4104 * xmlParseEnumeratedType:
4105 * @ctxt: an XML parser context
4106 * @tree: the enumeration tree built while parsing
4107 *
4108 * parse an Enumerated attribute type.
4109 *
4110 * [57] EnumeratedType ::= NotationType | Enumeration
4111 *
4112 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4113 *
4114 *
4115 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4116 */
4117
4118int
4119xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4120 if ((RAW == 'N') && (NXT(1) == 'O') &&
4121 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4122 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4123 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4124 SKIP(8);
4125 if (!IS_BLANK(CUR)) {
4126 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4128 ctxt->sax->error(ctxt->userData,
4129 "Space required after 'NOTATION'\n");
4130 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004131 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004132 return(0);
4133 }
4134 SKIP_BLANKS;
4135 *tree = xmlParseNotationType(ctxt);
4136 if (*tree == NULL) return(0);
4137 return(XML_ATTRIBUTE_NOTATION);
4138 }
4139 *tree = xmlParseEnumerationType(ctxt);
4140 if (*tree == NULL) return(0);
4141 return(XML_ATTRIBUTE_ENUMERATION);
4142}
4143
4144/**
4145 * xmlParseAttributeType:
4146 * @ctxt: an XML parser context
4147 * @tree: the enumeration tree built while parsing
4148 *
4149 * parse the Attribute list def for an element
4150 *
4151 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4152 *
4153 * [55] StringType ::= 'CDATA'
4154 *
4155 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4156 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4157 *
4158 * Validity constraints for attribute values syntax are checked in
4159 * xmlValidateAttributeValue()
4160 *
4161 * [ VC: ID ]
4162 * Values of type ID must match the Name production. A name must not
4163 * appear more than once in an XML document as a value of this type;
4164 * i.e., ID values must uniquely identify the elements which bear them.
4165 *
4166 * [ VC: One ID per Element Type ]
4167 * No element type may have more than one ID attribute specified.
4168 *
4169 * [ VC: ID Attribute Default ]
4170 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4171 *
4172 * [ VC: IDREF ]
4173 * Values of type IDREF must match the Name production, and values
4174 * of type IDREFS must match Names; each IDREF Name must match the value
4175 * of an ID attribute on some element in the XML document; i.e. IDREF
4176 * values must match the value of some ID attribute.
4177 *
4178 * [ VC: Entity Name ]
4179 * Values of type ENTITY must match the Name production, values
4180 * of type ENTITIES must match Names; each Entity Name must match the
4181 * name of an unparsed entity declared in the DTD.
4182 *
4183 * [ VC: Name Token ]
4184 * Values of type NMTOKEN must match the Nmtoken production; values
4185 * of type NMTOKENS must match Nmtokens.
4186 *
4187 * Returns the attribute type
4188 */
4189int
4190xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4191 SHRINK;
4192 if ((RAW == 'C') && (NXT(1) == 'D') &&
4193 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4194 (NXT(4) == 'A')) {
4195 SKIP(5);
4196 return(XML_ATTRIBUTE_CDATA);
4197 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4198 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4199 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4200 SKIP(6);
4201 return(XML_ATTRIBUTE_IDREFS);
4202 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4203 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4204 (NXT(4) == 'F')) {
4205 SKIP(5);
4206 return(XML_ATTRIBUTE_IDREF);
4207 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4208 SKIP(2);
4209 return(XML_ATTRIBUTE_ID);
4210 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4211 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4212 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4213 SKIP(6);
4214 return(XML_ATTRIBUTE_ENTITY);
4215 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4216 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4217 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4218 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4219 SKIP(8);
4220 return(XML_ATTRIBUTE_ENTITIES);
4221 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4222 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4223 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4224 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4225 SKIP(8);
4226 return(XML_ATTRIBUTE_NMTOKENS);
4227 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4228 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4229 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4230 (NXT(6) == 'N')) {
4231 SKIP(7);
4232 return(XML_ATTRIBUTE_NMTOKEN);
4233 }
4234 return(xmlParseEnumeratedType(ctxt, tree));
4235}
4236
4237/**
4238 * xmlParseAttributeListDecl:
4239 * @ctxt: an XML parser context
4240 *
4241 * : parse the Attribute list def for an element
4242 *
4243 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4244 *
4245 * [53] AttDef ::= S Name S AttType S DefaultDecl
4246 *
4247 */
4248void
4249xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4250 xmlChar *elemName;
4251 xmlChar *attrName;
4252 xmlEnumerationPtr tree;
4253
4254 if ((RAW == '<') && (NXT(1) == '!') &&
4255 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4256 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4257 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4258 (NXT(8) == 'T')) {
4259 xmlParserInputPtr input = ctxt->input;
4260
4261 SKIP(9);
4262 if (!IS_BLANK(CUR)) {
4263 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4265 ctxt->sax->error(ctxt->userData,
4266 "Space required after '<!ATTLIST'\n");
4267 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004268 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004269 }
4270 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004271 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004272 if (elemName == NULL) {
4273 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4275 ctxt->sax->error(ctxt->userData,
4276 "ATTLIST: no name for Element\n");
4277 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004278 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004279 return;
4280 }
4281 SKIP_BLANKS;
4282 GROW;
4283 while (RAW != '>') {
4284 const xmlChar *check = CUR_PTR;
4285 int type;
4286 int def;
4287 xmlChar *defaultValue = NULL;
4288
4289 GROW;
4290 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004291 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004292 if (attrName == NULL) {
4293 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4295 ctxt->sax->error(ctxt->userData,
4296 "ATTLIST: no name for Attribute\n");
4297 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004298 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004299 break;
4300 }
4301 GROW;
4302 if (!IS_BLANK(CUR)) {
4303 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4305 ctxt->sax->error(ctxt->userData,
4306 "Space required after the attribute name\n");
4307 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004308 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004309 if (attrName != NULL)
4310 xmlFree(attrName);
4311 if (defaultValue != NULL)
4312 xmlFree(defaultValue);
4313 break;
4314 }
4315 SKIP_BLANKS;
4316
4317 type = xmlParseAttributeType(ctxt, &tree);
4318 if (type <= 0) {
4319 if (attrName != NULL)
4320 xmlFree(attrName);
4321 if (defaultValue != NULL)
4322 xmlFree(defaultValue);
4323 break;
4324 }
4325
4326 GROW;
4327 if (!IS_BLANK(CUR)) {
4328 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4330 ctxt->sax->error(ctxt->userData,
4331 "Space required after the attribute type\n");
4332 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004333 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004334 if (attrName != NULL)
4335 xmlFree(attrName);
4336 if (defaultValue != NULL)
4337 xmlFree(defaultValue);
4338 if (tree != NULL)
4339 xmlFreeEnumeration(tree);
4340 break;
4341 }
4342 SKIP_BLANKS;
4343
4344 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4345 if (def <= 0) {
4346 if (attrName != NULL)
4347 xmlFree(attrName);
4348 if (defaultValue != NULL)
4349 xmlFree(defaultValue);
4350 if (tree != NULL)
4351 xmlFreeEnumeration(tree);
4352 break;
4353 }
4354
4355 GROW;
4356 if (RAW != '>') {
4357 if (!IS_BLANK(CUR)) {
4358 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4360 ctxt->sax->error(ctxt->userData,
4361 "Space required after the attribute default value\n");
4362 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004363 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004364 if (attrName != NULL)
4365 xmlFree(attrName);
4366 if (defaultValue != NULL)
4367 xmlFree(defaultValue);
4368 if (tree != NULL)
4369 xmlFreeEnumeration(tree);
4370 break;
4371 }
4372 SKIP_BLANKS;
4373 }
4374 if (check == CUR_PTR) {
4375 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4377 ctxt->sax->error(ctxt->userData,
4378 "xmlParseAttributeListDecl: detected internal error\n");
4379 if (attrName != NULL)
4380 xmlFree(attrName);
4381 if (defaultValue != NULL)
4382 xmlFree(defaultValue);
4383 if (tree != NULL)
4384 xmlFreeEnumeration(tree);
4385 break;
4386 }
4387 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4388 (ctxt->sax->attributeDecl != NULL))
4389 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4390 type, def, defaultValue, tree);
4391 if (attrName != NULL)
4392 xmlFree(attrName);
4393 if (defaultValue != NULL)
4394 xmlFree(defaultValue);
4395 GROW;
4396 }
4397 if (RAW == '>') {
4398 if (input != ctxt->input) {
4399 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4401 ctxt->sax->error(ctxt->userData,
4402"Attribute list declaration doesn't start and stop in the same entity\n");
4403 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004404 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004405 }
4406 NEXT;
4407 }
4408
4409 xmlFree(elemName);
4410 }
4411}
4412
4413/**
4414 * xmlParseElementMixedContentDecl:
4415 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004416 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004417 *
4418 * parse the declaration for a Mixed Element content
4419 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4420 *
4421 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4422 * '(' S? '#PCDATA' S? ')'
4423 *
4424 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4425 *
4426 * [ VC: No Duplicate Types ]
4427 * The same name must not appear more than once in a single
4428 * mixed-content declaration.
4429 *
4430 * returns: the list of the xmlElementContentPtr describing the element choices
4431 */
4432xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004433xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004434 xmlElementContentPtr ret = NULL, cur = NULL, n;
4435 xmlChar *elem = NULL;
4436
4437 GROW;
4438 if ((RAW == '#') && (NXT(1) == 'P') &&
4439 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4440 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4441 (NXT(6) == 'A')) {
4442 SKIP(7);
4443 SKIP_BLANKS;
4444 SHRINK;
4445 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004446 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4447 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4448 if (ctxt->vctxt.error != NULL)
4449 ctxt->vctxt.error(ctxt->vctxt.userData,
4450"Element content declaration doesn't start and stop in the same entity\n");
4451 ctxt->valid = 0;
4452 }
Owen Taylor3473f882001-02-23 17:55:21 +00004453 NEXT;
4454 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4455 if (RAW == '*') {
4456 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4457 NEXT;
4458 }
4459 return(ret);
4460 }
4461 if ((RAW == '(') || (RAW == '|')) {
4462 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4463 if (ret == NULL) return(NULL);
4464 }
4465 while (RAW == '|') {
4466 NEXT;
4467 if (elem == NULL) {
4468 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4469 if (ret == NULL) return(NULL);
4470 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004471 if (cur != NULL)
4472 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004473 cur = ret;
4474 } else {
4475 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4476 if (n == NULL) return(NULL);
4477 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004478 if (n->c1 != NULL)
4479 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004480 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004481 if (n != NULL)
4482 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004483 cur = n;
4484 xmlFree(elem);
4485 }
4486 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004487 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004488 if (elem == NULL) {
4489 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4491 ctxt->sax->error(ctxt->userData,
4492 "xmlParseElementMixedContentDecl : Name expected\n");
4493 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004494 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004495 xmlFreeElementContent(cur);
4496 return(NULL);
4497 }
4498 SKIP_BLANKS;
4499 GROW;
4500 }
4501 if ((RAW == ')') && (NXT(1) == '*')) {
4502 if (elem != NULL) {
4503 cur->c2 = xmlNewElementContent(elem,
4504 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004505 if (cur->c2 != NULL)
4506 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004507 xmlFree(elem);
4508 }
4509 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004510 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4511 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4512 if (ctxt->vctxt.error != NULL)
4513 ctxt->vctxt.error(ctxt->vctxt.userData,
4514"Element content declaration doesn't start and stop in the same entity\n");
4515 ctxt->valid = 0;
4516 }
Owen Taylor3473f882001-02-23 17:55:21 +00004517 SKIP(2);
4518 } else {
4519 if (elem != NULL) xmlFree(elem);
4520 xmlFreeElementContent(ret);
4521 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4523 ctxt->sax->error(ctxt->userData,
4524 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4525 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004526 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004527 return(NULL);
4528 }
4529
4530 } else {
4531 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4533 ctxt->sax->error(ctxt->userData,
4534 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4535 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004536 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004537 }
4538 return(ret);
4539}
4540
4541/**
4542 * xmlParseElementChildrenContentDecl:
4543 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004544 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004545 *
4546 * parse the declaration for a Mixed Element content
4547 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4548 *
4549 *
4550 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4551 *
4552 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4553 *
4554 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4555 *
4556 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4557 *
4558 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4559 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004560 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004561 * opening or closing parentheses in a choice, seq, or Mixed
4562 * construct is contained in the replacement text for a parameter
4563 * entity, both must be contained in the same replacement text. For
4564 * interoperability, if a parameter-entity reference appears in a
4565 * choice, seq, or Mixed construct, its replacement text should not
4566 * be empty, and neither the first nor last non-blank character of
4567 * the replacement text should be a connector (| or ,).
4568 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004569 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004570 * hierarchy.
4571 */
4572xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004573xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004574(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004575 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4576 xmlChar *elem;
4577 xmlChar type = 0;
4578
4579 SKIP_BLANKS;
4580 GROW;
4581 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004582 xmlParserInputPtr input = ctxt->input;
4583
Owen Taylor3473f882001-02-23 17:55:21 +00004584 /* Recurse on first child */
4585 NEXT;
4586 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004587 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004588 SKIP_BLANKS;
4589 GROW;
4590 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004591 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004592 if (elem == NULL) {
4593 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4595 ctxt->sax->error(ctxt->userData,
4596 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004599 return(NULL);
4600 }
4601 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004602 if (cur == NULL) {
4603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4604 ctxt->sax->error(ctxt->userData,
4605 "xmlParseElementChildrenContentDecl : out of memory\n");
4606 ctxt->errNo = XML_ERR_NO_MEMORY;
4607 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4608 xmlFree(elem);
4609 return(NULL);
4610 }
Owen Taylor3473f882001-02-23 17:55:21 +00004611 GROW;
4612 if (RAW == '?') {
4613 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4614 NEXT;
4615 } else if (RAW == '*') {
4616 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4617 NEXT;
4618 } else if (RAW == '+') {
4619 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4620 NEXT;
4621 } else {
4622 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4623 }
4624 xmlFree(elem);
4625 GROW;
4626 }
4627 SKIP_BLANKS;
4628 SHRINK;
4629 while (RAW != ')') {
4630 /*
4631 * Each loop we parse one separator and one element.
4632 */
4633 if (RAW == ',') {
4634 if (type == 0) type = CUR;
4635
4636 /*
4637 * Detect "Name | Name , Name" error
4638 */
4639 else if (type != CUR) {
4640 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4642 ctxt->sax->error(ctxt->userData,
4643 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4644 type);
4645 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004646 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004647 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004648 xmlFreeElementContent(last);
4649 if (ret != NULL)
4650 xmlFreeElementContent(ret);
4651 return(NULL);
4652 }
4653 NEXT;
4654
4655 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4656 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004657 if ((last != NULL) && (last != ret))
4658 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004659 xmlFreeElementContent(ret);
4660 return(NULL);
4661 }
4662 if (last == NULL) {
4663 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004664 if (ret != NULL)
4665 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004666 ret = cur = op;
4667 } else {
4668 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004669 if (op != NULL)
4670 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004671 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004672 if (last != NULL)
4673 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004674 cur =op;
4675 last = NULL;
4676 }
4677 } else if (RAW == '|') {
4678 if (type == 0) type = CUR;
4679
4680 /*
4681 * Detect "Name , Name | Name" error
4682 */
4683 else if (type != CUR) {
4684 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4686 ctxt->sax->error(ctxt->userData,
4687 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4688 type);
4689 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004690 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004691 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004692 xmlFreeElementContent(last);
4693 if (ret != NULL)
4694 xmlFreeElementContent(ret);
4695 return(NULL);
4696 }
4697 NEXT;
4698
4699 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4700 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004701 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004702 xmlFreeElementContent(last);
4703 if (ret != NULL)
4704 xmlFreeElementContent(ret);
4705 return(NULL);
4706 }
4707 if (last == NULL) {
4708 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004709 if (ret != NULL)
4710 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004711 ret = cur = op;
4712 } else {
4713 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004714 if (op != NULL)
4715 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004716 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004717 if (last != NULL)
4718 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004719 cur =op;
4720 last = NULL;
4721 }
4722 } else {
4723 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4724 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4725 ctxt->sax->error(ctxt->userData,
4726 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4727 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004728 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004729 if (ret != NULL)
4730 xmlFreeElementContent(ret);
4731 return(NULL);
4732 }
4733 GROW;
4734 SKIP_BLANKS;
4735 GROW;
4736 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004737 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004738 /* Recurse on second child */
4739 NEXT;
4740 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004741 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004742 SKIP_BLANKS;
4743 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004744 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004745 if (elem == NULL) {
4746 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4748 ctxt->sax->error(ctxt->userData,
4749 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4750 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004751 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004752 if (ret != NULL)
4753 xmlFreeElementContent(ret);
4754 return(NULL);
4755 }
4756 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4757 xmlFree(elem);
4758 if (RAW == '?') {
4759 last->ocur = XML_ELEMENT_CONTENT_OPT;
4760 NEXT;
4761 } else if (RAW == '*') {
4762 last->ocur = XML_ELEMENT_CONTENT_MULT;
4763 NEXT;
4764 } else if (RAW == '+') {
4765 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4766 NEXT;
4767 } else {
4768 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4769 }
4770 }
4771 SKIP_BLANKS;
4772 GROW;
4773 }
4774 if ((cur != NULL) && (last != NULL)) {
4775 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004776 if (last != NULL)
4777 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004778 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004779 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4780 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4781 if (ctxt->vctxt.error != NULL)
4782 ctxt->vctxt.error(ctxt->vctxt.userData,
4783"Element content declaration doesn't start and stop in the same entity\n");
4784 ctxt->valid = 0;
4785 }
Owen Taylor3473f882001-02-23 17:55:21 +00004786 NEXT;
4787 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004788 if (ret != NULL)
4789 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004790 NEXT;
4791 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004792 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004793 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004794 cur = ret;
4795 /*
4796 * Some normalization:
4797 * (a | b* | c?)* == (a | b | c)*
4798 */
4799 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4800 if ((cur->c1 != NULL) &&
4801 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4802 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4803 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4804 if ((cur->c2 != NULL) &&
4805 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4806 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4807 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4808 cur = cur->c2;
4809 }
4810 }
Owen Taylor3473f882001-02-23 17:55:21 +00004811 NEXT;
4812 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004813 if (ret != NULL) {
4814 int found = 0;
4815
Daniel Veillarde470df72001-04-18 21:41:07 +00004816 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004817 /*
4818 * Some normalization:
4819 * (a | b*)+ == (a | b)*
4820 * (a | b?)+ == (a | b)*
4821 */
4822 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4823 if ((cur->c1 != NULL) &&
4824 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4825 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4826 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4827 found = 1;
4828 }
4829 if ((cur->c2 != NULL) &&
4830 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4831 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4832 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4833 found = 1;
4834 }
4835 cur = cur->c2;
4836 }
4837 if (found)
4838 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4839 }
Owen Taylor3473f882001-02-23 17:55:21 +00004840 NEXT;
4841 }
4842 return(ret);
4843}
4844
4845/**
4846 * xmlParseElementContentDecl:
4847 * @ctxt: an XML parser context
4848 * @name: the name of the element being defined.
4849 * @result: the Element Content pointer will be stored here if any
4850 *
4851 * parse the declaration for an Element content either Mixed or Children,
4852 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4853 *
4854 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4855 *
4856 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4857 */
4858
4859int
4860xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4861 xmlElementContentPtr *result) {
4862
4863 xmlElementContentPtr tree = NULL;
4864 xmlParserInputPtr input = ctxt->input;
4865 int res;
4866
4867 *result = NULL;
4868
4869 if (RAW != '(') {
4870 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4872 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004873 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004874 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004875 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004876 return(-1);
4877 }
4878 NEXT;
4879 GROW;
4880 SKIP_BLANKS;
4881 if ((RAW == '#') && (NXT(1) == 'P') &&
4882 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4883 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4884 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004885 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004886 res = XML_ELEMENT_TYPE_MIXED;
4887 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004888 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004889 res = XML_ELEMENT_TYPE_ELEMENT;
4890 }
Owen Taylor3473f882001-02-23 17:55:21 +00004891 SKIP_BLANKS;
4892 *result = tree;
4893 return(res);
4894}
4895
4896/**
4897 * xmlParseElementDecl:
4898 * @ctxt: an XML parser context
4899 *
4900 * parse an Element declaration.
4901 *
4902 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4903 *
4904 * [ VC: Unique Element Type Declaration ]
4905 * No element type may be declared more than once
4906 *
4907 * Returns the type of the element, or -1 in case of error
4908 */
4909int
4910xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4911 xmlChar *name;
4912 int ret = -1;
4913 xmlElementContentPtr content = NULL;
4914
4915 GROW;
4916 if ((RAW == '<') && (NXT(1) == '!') &&
4917 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4918 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4919 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4920 (NXT(8) == 'T')) {
4921 xmlParserInputPtr input = ctxt->input;
4922
4923 SKIP(9);
4924 if (!IS_BLANK(CUR)) {
4925 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4927 ctxt->sax->error(ctxt->userData,
4928 "Space required after 'ELEMENT'\n");
4929 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004930 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004931 }
4932 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004933 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004934 if (name == NULL) {
4935 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4937 ctxt->sax->error(ctxt->userData,
4938 "xmlParseElementDecl: no name for Element\n");
4939 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004940 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004941 return(-1);
4942 }
4943 while ((RAW == 0) && (ctxt->inputNr > 1))
4944 xmlPopInput(ctxt);
4945 if (!IS_BLANK(CUR)) {
4946 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4948 ctxt->sax->error(ctxt->userData,
4949 "Space required after the element name\n");
4950 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004951 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004952 }
4953 SKIP_BLANKS;
4954 if ((RAW == 'E') && (NXT(1) == 'M') &&
4955 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4956 (NXT(4) == 'Y')) {
4957 SKIP(5);
4958 /*
4959 * Element must always be empty.
4960 */
4961 ret = XML_ELEMENT_TYPE_EMPTY;
4962 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4963 (NXT(2) == 'Y')) {
4964 SKIP(3);
4965 /*
4966 * Element is a generic container.
4967 */
4968 ret = XML_ELEMENT_TYPE_ANY;
4969 } else if (RAW == '(') {
4970 ret = xmlParseElementContentDecl(ctxt, name, &content);
4971 } else {
4972 /*
4973 * [ WFC: PEs in Internal Subset ] error handling.
4974 */
4975 if ((RAW == '%') && (ctxt->external == 0) &&
4976 (ctxt->inputNr == 1)) {
4977 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4979 ctxt->sax->error(ctxt->userData,
4980 "PEReference: forbidden within markup decl in internal subset\n");
4981 } else {
4982 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4984 ctxt->sax->error(ctxt->userData,
4985 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4986 }
4987 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004988 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004989 if (name != NULL) xmlFree(name);
4990 return(-1);
4991 }
4992
4993 SKIP_BLANKS;
4994 /*
4995 * Pop-up of finished entities.
4996 */
4997 while ((RAW == 0) && (ctxt->inputNr > 1))
4998 xmlPopInput(ctxt);
4999 SKIP_BLANKS;
5000
5001 if (RAW != '>') {
5002 ctxt->errNo = XML_ERR_GT_REQUIRED;
5003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5004 ctxt->sax->error(ctxt->userData,
5005 "xmlParseElementDecl: expected '>' at the end\n");
5006 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005007 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005008 } else {
5009 if (input != ctxt->input) {
5010 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5012 ctxt->sax->error(ctxt->userData,
5013"Element declaration doesn't start and stop in the same entity\n");
5014 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005015 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005016 }
5017
5018 NEXT;
5019 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5020 (ctxt->sax->elementDecl != NULL))
5021 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5022 content);
5023 }
5024 if (content != NULL) {
5025 xmlFreeElementContent(content);
5026 }
5027 if (name != NULL) {
5028 xmlFree(name);
5029 }
5030 }
5031 return(ret);
5032}
5033
5034/**
Owen Taylor3473f882001-02-23 17:55:21 +00005035 * xmlParseConditionalSections
5036 * @ctxt: an XML parser context
5037 *
5038 * [61] conditionalSect ::= includeSect | ignoreSect
5039 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5040 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5041 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5042 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5043 */
5044
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005045static void
Owen Taylor3473f882001-02-23 17:55:21 +00005046xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5047 SKIP(3);
5048 SKIP_BLANKS;
5049 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5050 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5051 (NXT(6) == 'E')) {
5052 SKIP(7);
5053 SKIP_BLANKS;
5054 if (RAW != '[') {
5055 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5057 ctxt->sax->error(ctxt->userData,
5058 "XML conditional section '[' expected\n");
5059 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005060 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005061 } else {
5062 NEXT;
5063 }
5064 if (xmlParserDebugEntities) {
5065 if ((ctxt->input != NULL) && (ctxt->input->filename))
5066 xmlGenericError(xmlGenericErrorContext,
5067 "%s(%d): ", ctxt->input->filename,
5068 ctxt->input->line);
5069 xmlGenericError(xmlGenericErrorContext,
5070 "Entering INCLUDE Conditional Section\n");
5071 }
5072
5073 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5074 (NXT(2) != '>'))) {
5075 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005076 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005077
5078 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5079 xmlParseConditionalSections(ctxt);
5080 } else if (IS_BLANK(CUR)) {
5081 NEXT;
5082 } else if (RAW == '%') {
5083 xmlParsePEReference(ctxt);
5084 } else
5085 xmlParseMarkupDecl(ctxt);
5086
5087 /*
5088 * Pop-up of finished entities.
5089 */
5090 while ((RAW == 0) && (ctxt->inputNr > 1))
5091 xmlPopInput(ctxt);
5092
Daniel Veillardfdc91562002-07-01 21:52:03 +00005093 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005094 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5096 ctxt->sax->error(ctxt->userData,
5097 "Content error in the external subset\n");
5098 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005099 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005100 break;
5101 }
5102 }
5103 if (xmlParserDebugEntities) {
5104 if ((ctxt->input != NULL) && (ctxt->input->filename))
5105 xmlGenericError(xmlGenericErrorContext,
5106 "%s(%d): ", ctxt->input->filename,
5107 ctxt->input->line);
5108 xmlGenericError(xmlGenericErrorContext,
5109 "Leaving INCLUDE Conditional Section\n");
5110 }
5111
5112 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5113 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5114 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005115 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005116 int depth = 0;
5117
5118 SKIP(6);
5119 SKIP_BLANKS;
5120 if (RAW != '[') {
5121 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5123 ctxt->sax->error(ctxt->userData,
5124 "XML conditional section '[' expected\n");
5125 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005126 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005127 } else {
5128 NEXT;
5129 }
5130 if (xmlParserDebugEntities) {
5131 if ((ctxt->input != NULL) && (ctxt->input->filename))
5132 xmlGenericError(xmlGenericErrorContext,
5133 "%s(%d): ", ctxt->input->filename,
5134 ctxt->input->line);
5135 xmlGenericError(xmlGenericErrorContext,
5136 "Entering IGNORE Conditional Section\n");
5137 }
5138
5139 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005140 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005141 * But disable SAX event generating DTD building in the meantime
5142 */
5143 state = ctxt->disableSAX;
5144 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005145 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005146 ctxt->instate = XML_PARSER_IGNORE;
5147
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005148 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005149 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5150 depth++;
5151 SKIP(3);
5152 continue;
5153 }
5154 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5155 if (--depth >= 0) SKIP(3);
5156 continue;
5157 }
5158 NEXT;
5159 continue;
5160 }
5161
5162 ctxt->disableSAX = state;
5163 ctxt->instate = instate;
5164
5165 if (xmlParserDebugEntities) {
5166 if ((ctxt->input != NULL) && (ctxt->input->filename))
5167 xmlGenericError(xmlGenericErrorContext,
5168 "%s(%d): ", ctxt->input->filename,
5169 ctxt->input->line);
5170 xmlGenericError(xmlGenericErrorContext,
5171 "Leaving IGNORE Conditional Section\n");
5172 }
5173
5174 } else {
5175 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5177 ctxt->sax->error(ctxt->userData,
5178 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5179 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005180 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005181 }
5182
5183 if (RAW == 0)
5184 SHRINK;
5185
5186 if (RAW == 0) {
5187 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5189 ctxt->sax->error(ctxt->userData,
5190 "XML conditional section not closed\n");
5191 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005192 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005193 } else {
5194 SKIP(3);
5195 }
5196}
5197
5198/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005199 * xmlParseMarkupDecl:
5200 * @ctxt: an XML parser context
5201 *
5202 * parse Markup declarations
5203 *
5204 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5205 * NotationDecl | PI | Comment
5206 *
5207 * [ VC: Proper Declaration/PE Nesting ]
5208 * Parameter-entity replacement text must be properly nested with
5209 * markup declarations. That is to say, if either the first character
5210 * or the last character of a markup declaration (markupdecl above) is
5211 * contained in the replacement text for a parameter-entity reference,
5212 * both must be contained in the same replacement text.
5213 *
5214 * [ WFC: PEs in Internal Subset ]
5215 * In the internal DTD subset, parameter-entity references can occur
5216 * only where markup declarations can occur, not within markup declarations.
5217 * (This does not apply to references that occur in external parameter
5218 * entities or to the external subset.)
5219 */
5220void
5221xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5222 GROW;
5223 xmlParseElementDecl(ctxt);
5224 xmlParseAttributeListDecl(ctxt);
5225 xmlParseEntityDecl(ctxt);
5226 xmlParseNotationDecl(ctxt);
5227 xmlParsePI(ctxt);
5228 xmlParseComment(ctxt);
5229 /*
5230 * This is only for internal subset. On external entities,
5231 * the replacement is done before parsing stage
5232 */
5233 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5234 xmlParsePEReference(ctxt);
5235
5236 /*
5237 * Conditional sections are allowed from entities included
5238 * by PE References in the internal subset.
5239 */
5240 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5241 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5242 xmlParseConditionalSections(ctxt);
5243 }
5244 }
5245
5246 ctxt->instate = XML_PARSER_DTD;
5247}
5248
5249/**
5250 * xmlParseTextDecl:
5251 * @ctxt: an XML parser context
5252 *
5253 * parse an XML declaration header for external entities
5254 *
5255 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5256 *
5257 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5258 */
5259
5260void
5261xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5262 xmlChar *version;
5263
5264 /*
5265 * We know that '<?xml' is here.
5266 */
5267 if ((RAW == '<') && (NXT(1) == '?') &&
5268 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5269 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5270 SKIP(5);
5271 } else {
5272 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5274 ctxt->sax->error(ctxt->userData,
5275 "Text declaration '<?xml' required\n");
5276 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005277 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005278
5279 return;
5280 }
5281
5282 if (!IS_BLANK(CUR)) {
5283 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5285 ctxt->sax->error(ctxt->userData,
5286 "Space needed after '<?xml'\n");
5287 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005288 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005289 }
5290 SKIP_BLANKS;
5291
5292 /*
5293 * We may have the VersionInfo here.
5294 */
5295 version = xmlParseVersionInfo(ctxt);
5296 if (version == NULL)
5297 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005298 else {
5299 if (!IS_BLANK(CUR)) {
5300 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5302 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5303 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005305 }
5306 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005307 ctxt->input->version = version;
5308
5309 /*
5310 * We must have the encoding declaration
5311 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005312 xmlParseEncodingDecl(ctxt);
5313 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5314 /*
5315 * The XML REC instructs us to stop parsing right here
5316 */
5317 return;
5318 }
5319
5320 SKIP_BLANKS;
5321 if ((RAW == '?') && (NXT(1) == '>')) {
5322 SKIP(2);
5323 } else if (RAW == '>') {
5324 /* Deprecated old WD ... */
5325 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5327 ctxt->sax->error(ctxt->userData,
5328 "XML declaration must end-up with '?>'\n");
5329 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005330 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005331 NEXT;
5332 } else {
5333 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5335 ctxt->sax->error(ctxt->userData,
5336 "parsing XML declaration: '?>' expected\n");
5337 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005338 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005339 MOVETO_ENDTAG(CUR_PTR);
5340 NEXT;
5341 }
5342}
5343
5344/**
Owen Taylor3473f882001-02-23 17:55:21 +00005345 * xmlParseExternalSubset:
5346 * @ctxt: an XML parser context
5347 * @ExternalID: the external identifier
5348 * @SystemID: the system identifier (or URL)
5349 *
5350 * parse Markup declarations from an external subset
5351 *
5352 * [30] extSubset ::= textDecl? extSubsetDecl
5353 *
5354 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5355 */
5356void
5357xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5358 const xmlChar *SystemID) {
5359 GROW;
5360 if ((RAW == '<') && (NXT(1) == '?') &&
5361 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5362 (NXT(4) == 'l')) {
5363 xmlParseTextDecl(ctxt);
5364 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5365 /*
5366 * The XML REC instructs us to stop parsing right here
5367 */
5368 ctxt->instate = XML_PARSER_EOF;
5369 return;
5370 }
5371 }
5372 if (ctxt->myDoc == NULL) {
5373 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5374 }
5375 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5376 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5377
5378 ctxt->instate = XML_PARSER_DTD;
5379 ctxt->external = 1;
5380 while (((RAW == '<') && (NXT(1) == '?')) ||
5381 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005382 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005383 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005384 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005385
5386 GROW;
5387 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5388 xmlParseConditionalSections(ctxt);
5389 } else if (IS_BLANK(CUR)) {
5390 NEXT;
5391 } else if (RAW == '%') {
5392 xmlParsePEReference(ctxt);
5393 } else
5394 xmlParseMarkupDecl(ctxt);
5395
5396 /*
5397 * Pop-up of finished entities.
5398 */
5399 while ((RAW == 0) && (ctxt->inputNr > 1))
5400 xmlPopInput(ctxt);
5401
Daniel Veillardfdc91562002-07-01 21:52:03 +00005402 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005403 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5405 ctxt->sax->error(ctxt->userData,
5406 "Content error in the external subset\n");
5407 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005408 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005409 break;
5410 }
5411 }
5412
5413 if (RAW != 0) {
5414 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5416 ctxt->sax->error(ctxt->userData,
5417 "Extra content at the end of the document\n");
5418 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005419 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005420 }
5421
5422}
5423
5424/**
5425 * xmlParseReference:
5426 * @ctxt: an XML parser context
5427 *
5428 * parse and handle entity references in content, depending on the SAX
5429 * interface, this may end-up in a call to character() if this is a
5430 * CharRef, a predefined entity, if there is no reference() callback.
5431 * or if the parser was asked to switch to that mode.
5432 *
5433 * [67] Reference ::= EntityRef | CharRef
5434 */
5435void
5436xmlParseReference(xmlParserCtxtPtr ctxt) {
5437 xmlEntityPtr ent;
5438 xmlChar *val;
5439 if (RAW != '&') return;
5440
5441 if (NXT(1) == '#') {
5442 int i = 0;
5443 xmlChar out[10];
5444 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005445 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005446
5447 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5448 /*
5449 * So we are using non-UTF-8 buffers
5450 * Check that the char fit on 8bits, if not
5451 * generate a CharRef.
5452 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005453 if (value <= 0xFF) {
5454 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005455 out[1] = 0;
5456 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5457 (!ctxt->disableSAX))
5458 ctxt->sax->characters(ctxt->userData, out, 1);
5459 } else {
5460 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005461 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005462 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005463 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005464 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5465 (!ctxt->disableSAX))
5466 ctxt->sax->reference(ctxt->userData, out);
5467 }
5468 } else {
5469 /*
5470 * Just encode the value in UTF-8
5471 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005472 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005473 out[i] = 0;
5474 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5475 (!ctxt->disableSAX))
5476 ctxt->sax->characters(ctxt->userData, out, i);
5477 }
5478 } else {
5479 ent = xmlParseEntityRef(ctxt);
5480 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005481 if (!ctxt->wellFormed)
5482 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005483 if ((ent->name != NULL) &&
5484 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5485 xmlNodePtr list = NULL;
5486 int ret;
5487
5488
5489 /*
5490 * The first reference to the entity trigger a parsing phase
5491 * where the ent->children is filled with the result from
5492 * the parsing.
5493 */
5494 if (ent->children == NULL) {
5495 xmlChar *value;
5496 value = ent->content;
5497
5498 /*
5499 * Check that this entity is well formed
5500 */
5501 if ((value != NULL) &&
5502 (value[1] == 0) && (value[0] == '<') &&
5503 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5504 /*
5505 * DONE: get definite answer on this !!!
5506 * Lots of entity decls are used to declare a single
5507 * char
5508 * <!ENTITY lt "<">
5509 * Which seems to be valid since
5510 * 2.4: The ampersand character (&) and the left angle
5511 * bracket (<) may appear in their literal form only
5512 * when used ... They are also legal within the literal
5513 * entity value of an internal entity declaration;i
5514 * see "4.3.2 Well-Formed Parsed Entities".
5515 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5516 * Looking at the OASIS test suite and James Clark
5517 * tests, this is broken. However the XML REC uses
5518 * it. Is the XML REC not well-formed ????
5519 * This is a hack to avoid this problem
5520 *
5521 * ANSWER: since lt gt amp .. are already defined,
5522 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005523 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005524 * is lousy but acceptable.
5525 */
5526 list = xmlNewDocText(ctxt->myDoc, value);
5527 if (list != NULL) {
5528 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5529 (ent->children == NULL)) {
5530 ent->children = list;
5531 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005532 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005533 list->parent = (xmlNodePtr) ent;
5534 } else {
5535 xmlFreeNodeList(list);
5536 }
5537 } else if (list != NULL) {
5538 xmlFreeNodeList(list);
5539 }
5540 } else {
5541 /*
5542 * 4.3.2: An internal general parsed entity is well-formed
5543 * if its replacement text matches the production labeled
5544 * content.
5545 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005546
5547 void *user_data;
5548 /*
5549 * This is a bit hackish but this seems the best
5550 * way to make sure both SAX and DOM entity support
5551 * behaves okay.
5552 */
5553 if (ctxt->userData == ctxt)
5554 user_data = NULL;
5555 else
5556 user_data = ctxt->userData;
5557
Owen Taylor3473f882001-02-23 17:55:21 +00005558 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5559 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005560 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5561 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005562 ctxt->depth--;
5563 } else if (ent->etype ==
5564 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5565 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005566 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005567 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005568 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005569 ctxt->depth--;
5570 } else {
5571 ret = -1;
5572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5573 ctxt->sax->error(ctxt->userData,
5574 "Internal: invalid entity type\n");
5575 }
5576 if (ret == XML_ERR_ENTITY_LOOP) {
5577 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5578 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5579 ctxt->sax->error(ctxt->userData,
5580 "Detected entity reference loop\n");
5581 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005582 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005583 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005584 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005585 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5586 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005587 (ent->children == NULL)) {
5588 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005589 if (ctxt->replaceEntities) {
5590 /*
5591 * Prune it directly in the generated document
5592 * except for single text nodes.
5593 */
5594 if ((list->type == XML_TEXT_NODE) &&
5595 (list->next == NULL)) {
5596 list->parent = (xmlNodePtr) ent;
5597 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005598 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005599 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005600 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005601 while (list != NULL) {
5602 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005603 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005604 if (list->next == NULL)
5605 ent->last = list;
5606 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005607 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005608 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005609 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5610 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005611 }
5612 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005613 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005614 while (list != NULL) {
5615 list->parent = (xmlNodePtr) ent;
5616 if (list->next == NULL)
5617 ent->last = list;
5618 list = list->next;
5619 }
Owen Taylor3473f882001-02-23 17:55:21 +00005620 }
5621 } else {
5622 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005623 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005624 }
5625 } else if (ret > 0) {
5626 ctxt->errNo = ret;
5627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5628 ctxt->sax->error(ctxt->userData,
5629 "Entity value required\n");
5630 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005631 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005632 } else if (list != NULL) {
5633 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005634 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005635 }
5636 }
5637 }
5638 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5639 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5640 /*
5641 * Create a node.
5642 */
5643 ctxt->sax->reference(ctxt->userData, ent->name);
5644 return;
5645 } else if (ctxt->replaceEntities) {
5646 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5647 /*
5648 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005649 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005650 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005651 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005652 if ((list == NULL) && (ent->owner == 0)) {
5653 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005654 cur = ent->children;
5655 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005656 nw = xmlCopyNode(cur, 1);
5657 if (nw != NULL) {
5658 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005659 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005660 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005661 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005662 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005663 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005664 if (cur == ent->last)
5665 break;
5666 cur = cur->next;
5667 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005668 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005669 xmlAddEntityReference(ent, firstChild, nw);
5670 } else if (list == NULL) {
5671 xmlNodePtr nw = NULL, cur, next, last,
5672 firstChild = NULL;
5673 /*
5674 * Copy the entity child list and make it the new
5675 * entity child list. The goal is to make sure any
5676 * ID or REF referenced will be the one from the
5677 * document content and not the entity copy.
5678 */
5679 cur = ent->children;
5680 ent->children = NULL;
5681 last = ent->last;
5682 ent->last = NULL;
5683 while (cur != NULL) {
5684 next = cur->next;
5685 cur->next = NULL;
5686 cur->parent = NULL;
5687 nw = xmlCopyNode(cur, 1);
5688 if (nw != NULL) {
5689 nw->_private = cur->_private;
5690 if (firstChild == NULL){
5691 firstChild = cur;
5692 }
5693 xmlAddChild((xmlNodePtr) ent, nw);
5694 xmlAddChild(ctxt->node, cur);
5695 }
5696 if (cur == last)
5697 break;
5698 cur = next;
5699 }
5700 ent->owner = 1;
5701 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5702 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005703 } else {
5704 /*
5705 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005706 * node with a possible previous text one which
5707 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005708 */
5709 if (ent->children->type == XML_TEXT_NODE)
5710 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5711 if ((ent->last != ent->children) &&
5712 (ent->last->type == XML_TEXT_NODE))
5713 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5714 xmlAddChildList(ctxt->node, ent->children);
5715 }
5716
Owen Taylor3473f882001-02-23 17:55:21 +00005717 /*
5718 * This is to avoid a nasty side effect, see
5719 * characters() in SAX.c
5720 */
5721 ctxt->nodemem = 0;
5722 ctxt->nodelen = 0;
5723 return;
5724 } else {
5725 /*
5726 * Probably running in SAX mode
5727 */
5728 xmlParserInputPtr input;
5729
5730 input = xmlNewEntityInputStream(ctxt, ent);
5731 xmlPushInput(ctxt, input);
5732 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5733 (RAW == '<') && (NXT(1) == '?') &&
5734 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5735 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5736 xmlParseTextDecl(ctxt);
5737 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5738 /*
5739 * The XML REC instructs us to stop parsing right here
5740 */
5741 ctxt->instate = XML_PARSER_EOF;
5742 return;
5743 }
5744 if (input->standalone == 1) {
5745 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5747 ctxt->sax->error(ctxt->userData,
5748 "external parsed entities cannot be standalone\n");
5749 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005750 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005751 }
5752 }
5753 return;
5754 }
5755 }
5756 } else {
5757 val = ent->content;
5758 if (val == NULL) return;
5759 /*
5760 * inline the entity.
5761 */
5762 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5763 (!ctxt->disableSAX))
5764 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5765 }
5766 }
5767}
5768
5769/**
5770 * xmlParseEntityRef:
5771 * @ctxt: an XML parser context
5772 *
5773 * parse ENTITY references declarations
5774 *
5775 * [68] EntityRef ::= '&' Name ';'
5776 *
5777 * [ WFC: Entity Declared ]
5778 * In a document without any DTD, a document with only an internal DTD
5779 * subset which contains no parameter entity references, or a document
5780 * with "standalone='yes'", the Name given in the entity reference
5781 * must match that in an entity declaration, except that well-formed
5782 * documents need not declare any of the following entities: amp, lt,
5783 * gt, apos, quot. The declaration of a parameter entity must precede
5784 * any reference to it. Similarly, the declaration of a general entity
5785 * must precede any reference to it which appears in a default value in an
5786 * attribute-list declaration. Note that if entities are declared in the
5787 * external subset or in external parameter entities, a non-validating
5788 * processor is not obligated to read and process their declarations;
5789 * for such documents, the rule that an entity must be declared is a
5790 * well-formedness constraint only if standalone='yes'.
5791 *
5792 * [ WFC: Parsed Entity ]
5793 * An entity reference must not contain the name of an unparsed entity
5794 *
5795 * Returns the xmlEntityPtr if found, or NULL otherwise.
5796 */
5797xmlEntityPtr
5798xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5799 xmlChar *name;
5800 xmlEntityPtr ent = NULL;
5801
5802 GROW;
5803
5804 if (RAW == '&') {
5805 NEXT;
5806 name = xmlParseName(ctxt);
5807 if (name == NULL) {
5808 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5810 ctxt->sax->error(ctxt->userData,
5811 "xmlParseEntityRef: no name\n");
5812 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005813 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005814 } else {
5815 if (RAW == ';') {
5816 NEXT;
5817 /*
5818 * Ask first SAX for entity resolution, otherwise try the
5819 * predefined set.
5820 */
5821 if (ctxt->sax != NULL) {
5822 if (ctxt->sax->getEntity != NULL)
5823 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005824 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005825 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005826 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5827 (ctxt->userData==ctxt)) {
Daniel Veillard5997aca2002-03-18 18:36:20 +00005828 ent = getEntity(ctxt, name);
5829 }
Owen Taylor3473f882001-02-23 17:55:21 +00005830 }
5831 /*
5832 * [ WFC: Entity Declared ]
5833 * In a document without any DTD, a document with only an
5834 * internal DTD subset which contains no parameter entity
5835 * references, or a document with "standalone='yes'", the
5836 * Name given in the entity reference must match that in an
5837 * entity declaration, except that well-formed documents
5838 * need not declare any of the following entities: amp, lt,
5839 * gt, apos, quot.
5840 * The declaration of a parameter entity must precede any
5841 * reference to it.
5842 * Similarly, the declaration of a general entity must
5843 * precede any reference to it which appears in a default
5844 * value in an attribute-list declaration. Note that if
5845 * entities are declared in the external subset or in
5846 * external parameter entities, a non-validating processor
5847 * is not obligated to read and process their declarations;
5848 * for such documents, the rule that an entity must be
5849 * declared is a well-formedness constraint only if
5850 * standalone='yes'.
5851 */
5852 if (ent == NULL) {
5853 if ((ctxt->standalone == 1) ||
5854 ((ctxt->hasExternalSubset == 0) &&
5855 (ctxt->hasPErefs == 0))) {
5856 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5858 ctxt->sax->error(ctxt->userData,
5859 "Entity '%s' not defined\n", name);
5860 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005861 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005862 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005863 } else {
5864 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005866 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005867 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005868 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005869 }
5870 }
5871
5872 /*
5873 * [ WFC: Parsed Entity ]
5874 * An entity reference must not contain the name of an
5875 * unparsed entity
5876 */
5877 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5878 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5880 ctxt->sax->error(ctxt->userData,
5881 "Entity reference to unparsed entity %s\n", name);
5882 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005883 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005884 }
5885
5886 /*
5887 * [ WFC: No External Entity References ]
5888 * Attribute values cannot contain direct or indirect
5889 * entity references to external entities.
5890 */
5891 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5892 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5893 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5895 ctxt->sax->error(ctxt->userData,
5896 "Attribute references external entity '%s'\n", name);
5897 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005898 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005899 }
5900 /*
5901 * [ WFC: No < in Attribute Values ]
5902 * The replacement text of any entity referred to directly or
5903 * indirectly in an attribute value (other than "&lt;") must
5904 * not contain a <.
5905 */
5906 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5907 (ent != NULL) &&
5908 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5909 (ent->content != NULL) &&
5910 (xmlStrchr(ent->content, '<'))) {
5911 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5913 ctxt->sax->error(ctxt->userData,
5914 "'<' in entity '%s' is not allowed in attributes values\n", name);
5915 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005916 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005917 }
5918
5919 /*
5920 * Internal check, no parameter entities here ...
5921 */
5922 else {
5923 switch (ent->etype) {
5924 case XML_INTERNAL_PARAMETER_ENTITY:
5925 case XML_EXTERNAL_PARAMETER_ENTITY:
5926 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5928 ctxt->sax->error(ctxt->userData,
5929 "Attempt to reference the parameter entity '%s'\n", name);
5930 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005931 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005932 break;
5933 default:
5934 break;
5935 }
5936 }
5937
5938 /*
5939 * [ WFC: No Recursion ]
5940 * A parsed entity must not contain a recursive reference
5941 * to itself, either directly or indirectly.
5942 * Done somewhere else
5943 */
5944
5945 } else {
5946 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5948 ctxt->sax->error(ctxt->userData,
5949 "xmlParseEntityRef: expecting ';'\n");
5950 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005951 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005952 }
5953 xmlFree(name);
5954 }
5955 }
5956 return(ent);
5957}
5958
5959/**
5960 * xmlParseStringEntityRef:
5961 * @ctxt: an XML parser context
5962 * @str: a pointer to an index in the string
5963 *
5964 * parse ENTITY references declarations, but this version parses it from
5965 * a string value.
5966 *
5967 * [68] EntityRef ::= '&' Name ';'
5968 *
5969 * [ WFC: Entity Declared ]
5970 * In a document without any DTD, a document with only an internal DTD
5971 * subset which contains no parameter entity references, or a document
5972 * with "standalone='yes'", the Name given in the entity reference
5973 * must match that in an entity declaration, except that well-formed
5974 * documents need not declare any of the following entities: amp, lt,
5975 * gt, apos, quot. The declaration of a parameter entity must precede
5976 * any reference to it. Similarly, the declaration of a general entity
5977 * must precede any reference to it which appears in a default value in an
5978 * attribute-list declaration. Note that if entities are declared in the
5979 * external subset or in external parameter entities, a non-validating
5980 * processor is not obligated to read and process their declarations;
5981 * for such documents, the rule that an entity must be declared is a
5982 * well-formedness constraint only if standalone='yes'.
5983 *
5984 * [ WFC: Parsed Entity ]
5985 * An entity reference must not contain the name of an unparsed entity
5986 *
5987 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5988 * is updated to the current location in the string.
5989 */
5990xmlEntityPtr
5991xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5992 xmlChar *name;
5993 const xmlChar *ptr;
5994 xmlChar cur;
5995 xmlEntityPtr ent = NULL;
5996
5997 if ((str == NULL) || (*str == NULL))
5998 return(NULL);
5999 ptr = *str;
6000 cur = *ptr;
6001 if (cur == '&') {
6002 ptr++;
6003 cur = *ptr;
6004 name = xmlParseStringName(ctxt, &ptr);
6005 if (name == NULL) {
6006 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6008 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006009 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006010 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006011 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006012 } else {
6013 if (*ptr == ';') {
6014 ptr++;
6015 /*
6016 * Ask first SAX for entity resolution, otherwise try the
6017 * predefined set.
6018 */
6019 if (ctxt->sax != NULL) {
6020 if (ctxt->sax->getEntity != NULL)
6021 ent = ctxt->sax->getEntity(ctxt->userData, name);
6022 if (ent == NULL)
6023 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006024 if ((ent == NULL) && (ctxt->userData==ctxt)) {
6025 ent = getEntity(ctxt, name);
6026 }
Owen Taylor3473f882001-02-23 17:55:21 +00006027 }
6028 /*
6029 * [ WFC: Entity Declared ]
6030 * In a document without any DTD, a document with only an
6031 * internal DTD subset which contains no parameter entity
6032 * references, or a document with "standalone='yes'", the
6033 * Name given in the entity reference must match that in an
6034 * entity declaration, except that well-formed documents
6035 * need not declare any of the following entities: amp, lt,
6036 * gt, apos, quot.
6037 * The declaration of a parameter entity must precede any
6038 * reference to it.
6039 * Similarly, the declaration of a general entity must
6040 * precede any reference to it which appears in a default
6041 * value in an attribute-list declaration. Note that if
6042 * entities are declared in the external subset or in
6043 * external parameter entities, a non-validating processor
6044 * is not obligated to read and process their declarations;
6045 * for such documents, the rule that an entity must be
6046 * declared is a well-formedness constraint only if
6047 * standalone='yes'.
6048 */
6049 if (ent == NULL) {
6050 if ((ctxt->standalone == 1) ||
6051 ((ctxt->hasExternalSubset == 0) &&
6052 (ctxt->hasPErefs == 0))) {
6053 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6055 ctxt->sax->error(ctxt->userData,
6056 "Entity '%s' not defined\n", name);
6057 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006058 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006059 } else {
6060 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6061 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6062 ctxt->sax->warning(ctxt->userData,
6063 "Entity '%s' not defined\n", name);
6064 }
6065 }
6066
6067 /*
6068 * [ WFC: Parsed Entity ]
6069 * An entity reference must not contain the name of an
6070 * unparsed entity
6071 */
6072 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6073 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6075 ctxt->sax->error(ctxt->userData,
6076 "Entity reference to unparsed entity %s\n", name);
6077 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006078 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006079 }
6080
6081 /*
6082 * [ WFC: No External Entity References ]
6083 * Attribute values cannot contain direct or indirect
6084 * entity references to external entities.
6085 */
6086 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6087 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6088 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6090 ctxt->sax->error(ctxt->userData,
6091 "Attribute references external entity '%s'\n", name);
6092 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006093 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006094 }
6095 /*
6096 * [ WFC: No < in Attribute Values ]
6097 * The replacement text of any entity referred to directly or
6098 * indirectly in an attribute value (other than "&lt;") must
6099 * not contain a <.
6100 */
6101 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6102 (ent != NULL) &&
6103 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6104 (ent->content != NULL) &&
6105 (xmlStrchr(ent->content, '<'))) {
6106 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6108 ctxt->sax->error(ctxt->userData,
6109 "'<' in entity '%s' is not allowed in attributes values\n", name);
6110 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006111 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006112 }
6113
6114 /*
6115 * Internal check, no parameter entities here ...
6116 */
6117 else {
6118 switch (ent->etype) {
6119 case XML_INTERNAL_PARAMETER_ENTITY:
6120 case XML_EXTERNAL_PARAMETER_ENTITY:
6121 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6123 ctxt->sax->error(ctxt->userData,
6124 "Attempt to reference the parameter entity '%s'\n", name);
6125 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006126 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006127 break;
6128 default:
6129 break;
6130 }
6131 }
6132
6133 /*
6134 * [ WFC: No Recursion ]
6135 * A parsed entity must not contain a recursive reference
6136 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006137 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006138 */
6139
6140 } else {
6141 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6143 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006144 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006145 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006146 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006147 }
6148 xmlFree(name);
6149 }
6150 }
6151 *str = ptr;
6152 return(ent);
6153}
6154
6155/**
6156 * xmlParsePEReference:
6157 * @ctxt: an XML parser context
6158 *
6159 * parse PEReference declarations
6160 * The entity content is handled directly by pushing it's content as
6161 * a new input stream.
6162 *
6163 * [69] PEReference ::= '%' Name ';'
6164 *
6165 * [ WFC: No Recursion ]
6166 * A parsed entity must not contain a recursive
6167 * reference to itself, either directly or indirectly.
6168 *
6169 * [ WFC: Entity Declared ]
6170 * In a document without any DTD, a document with only an internal DTD
6171 * subset which contains no parameter entity references, or a document
6172 * with "standalone='yes'", ... ... The declaration of a parameter
6173 * entity must precede any reference to it...
6174 *
6175 * [ VC: Entity Declared ]
6176 * In a document with an external subset or external parameter entities
6177 * with "standalone='no'", ... ... The declaration of a parameter entity
6178 * must precede any reference to it...
6179 *
6180 * [ WFC: In DTD ]
6181 * Parameter-entity references may only appear in the DTD.
6182 * NOTE: misleading but this is handled.
6183 */
6184void
6185xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6186 xmlChar *name;
6187 xmlEntityPtr entity = NULL;
6188 xmlParserInputPtr input;
6189
6190 if (RAW == '%') {
6191 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006192 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006193 if (name == NULL) {
6194 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6196 ctxt->sax->error(ctxt->userData,
6197 "xmlParsePEReference: no name\n");
6198 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006199 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006200 } else {
6201 if (RAW == ';') {
6202 NEXT;
6203 if ((ctxt->sax != NULL) &&
6204 (ctxt->sax->getParameterEntity != NULL))
6205 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6206 name);
6207 if (entity == NULL) {
6208 /*
6209 * [ WFC: Entity Declared ]
6210 * In a document without any DTD, a document with only an
6211 * internal DTD subset which contains no parameter entity
6212 * references, or a document with "standalone='yes'", ...
6213 * ... The declaration of a parameter entity must precede
6214 * any reference to it...
6215 */
6216 if ((ctxt->standalone == 1) ||
6217 ((ctxt->hasExternalSubset == 0) &&
6218 (ctxt->hasPErefs == 0))) {
6219 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6220 if ((!ctxt->disableSAX) &&
6221 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6222 ctxt->sax->error(ctxt->userData,
6223 "PEReference: %%%s; not found\n", name);
6224 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006225 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006226 } else {
6227 /*
6228 * [ VC: Entity Declared ]
6229 * In a document with an external subset or external
6230 * parameter entities with "standalone='no'", ...
6231 * ... The declaration of a parameter entity must precede
6232 * any reference to it...
6233 */
6234 if ((!ctxt->disableSAX) &&
6235 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6236 ctxt->sax->warning(ctxt->userData,
6237 "PEReference: %%%s; not found\n", name);
6238 ctxt->valid = 0;
6239 }
6240 } else {
6241 /*
6242 * Internal checking in case the entity quest barfed
6243 */
6244 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6245 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6246 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6247 ctxt->sax->warning(ctxt->userData,
6248 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006249 } else if (ctxt->input->free != deallocblankswrapper) {
6250 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6251 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006252 } else {
6253 /*
6254 * TODO !!!
6255 * handle the extra spaces added before and after
6256 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6257 */
6258 input = xmlNewEntityInputStream(ctxt, entity);
6259 xmlPushInput(ctxt, input);
6260 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6261 (RAW == '<') && (NXT(1) == '?') &&
6262 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6263 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6264 xmlParseTextDecl(ctxt);
6265 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6266 /*
6267 * The XML REC instructs us to stop parsing
6268 * right here
6269 */
6270 ctxt->instate = XML_PARSER_EOF;
6271 xmlFree(name);
6272 return;
6273 }
6274 }
Owen Taylor3473f882001-02-23 17:55:21 +00006275 }
6276 }
6277 ctxt->hasPErefs = 1;
6278 } else {
6279 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6281 ctxt->sax->error(ctxt->userData,
6282 "xmlParsePEReference: expecting ';'\n");
6283 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006284 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006285 }
6286 xmlFree(name);
6287 }
6288 }
6289}
6290
6291/**
6292 * xmlParseStringPEReference:
6293 * @ctxt: an XML parser context
6294 * @str: a pointer to an index in the string
6295 *
6296 * parse PEReference declarations
6297 *
6298 * [69] PEReference ::= '%' Name ';'
6299 *
6300 * [ WFC: No Recursion ]
6301 * A parsed entity must not contain a recursive
6302 * reference to itself, either directly or indirectly.
6303 *
6304 * [ WFC: Entity Declared ]
6305 * In a document without any DTD, a document with only an internal DTD
6306 * subset which contains no parameter entity references, or a document
6307 * with "standalone='yes'", ... ... The declaration of a parameter
6308 * entity must precede any reference to it...
6309 *
6310 * [ VC: Entity Declared ]
6311 * In a document with an external subset or external parameter entities
6312 * with "standalone='no'", ... ... The declaration of a parameter entity
6313 * must precede any reference to it...
6314 *
6315 * [ WFC: In DTD ]
6316 * Parameter-entity references may only appear in the DTD.
6317 * NOTE: misleading but this is handled.
6318 *
6319 * Returns the string of the entity content.
6320 * str is updated to the current value of the index
6321 */
6322xmlEntityPtr
6323xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6324 const xmlChar *ptr;
6325 xmlChar cur;
6326 xmlChar *name;
6327 xmlEntityPtr entity = NULL;
6328
6329 if ((str == NULL) || (*str == NULL)) return(NULL);
6330 ptr = *str;
6331 cur = *ptr;
6332 if (cur == '%') {
6333 ptr++;
6334 cur = *ptr;
6335 name = xmlParseStringName(ctxt, &ptr);
6336 if (name == NULL) {
6337 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6339 ctxt->sax->error(ctxt->userData,
6340 "xmlParseStringPEReference: no name\n");
6341 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006342 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006343 } else {
6344 cur = *ptr;
6345 if (cur == ';') {
6346 ptr++;
6347 cur = *ptr;
6348 if ((ctxt->sax != NULL) &&
6349 (ctxt->sax->getParameterEntity != NULL))
6350 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6351 name);
6352 if (entity == NULL) {
6353 /*
6354 * [ WFC: Entity Declared ]
6355 * In a document without any DTD, a document with only an
6356 * internal DTD subset which contains no parameter entity
6357 * references, or a document with "standalone='yes'", ...
6358 * ... The declaration of a parameter entity must precede
6359 * any reference to it...
6360 */
6361 if ((ctxt->standalone == 1) ||
6362 ((ctxt->hasExternalSubset == 0) &&
6363 (ctxt->hasPErefs == 0))) {
6364 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6366 ctxt->sax->error(ctxt->userData,
6367 "PEReference: %%%s; not found\n", name);
6368 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006369 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006370 } else {
6371 /*
6372 * [ VC: Entity Declared ]
6373 * In a document with an external subset or external
6374 * parameter entities with "standalone='no'", ...
6375 * ... The declaration of a parameter entity must
6376 * precede any reference to it...
6377 */
6378 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6379 ctxt->sax->warning(ctxt->userData,
6380 "PEReference: %%%s; not found\n", name);
6381 ctxt->valid = 0;
6382 }
6383 } else {
6384 /*
6385 * Internal checking in case the entity quest barfed
6386 */
6387 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6388 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6389 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6390 ctxt->sax->warning(ctxt->userData,
6391 "Internal: %%%s; is not a parameter entity\n", name);
6392 }
6393 }
6394 ctxt->hasPErefs = 1;
6395 } else {
6396 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6398 ctxt->sax->error(ctxt->userData,
6399 "xmlParseStringPEReference: expecting ';'\n");
6400 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006401 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006402 }
6403 xmlFree(name);
6404 }
6405 }
6406 *str = ptr;
6407 return(entity);
6408}
6409
6410/**
6411 * xmlParseDocTypeDecl:
6412 * @ctxt: an XML parser context
6413 *
6414 * parse a DOCTYPE declaration
6415 *
6416 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6417 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6418 *
6419 * [ VC: Root Element Type ]
6420 * The Name in the document type declaration must match the element
6421 * type of the root element.
6422 */
6423
6424void
6425xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6426 xmlChar *name = NULL;
6427 xmlChar *ExternalID = NULL;
6428 xmlChar *URI = NULL;
6429
6430 /*
6431 * We know that '<!DOCTYPE' has been detected.
6432 */
6433 SKIP(9);
6434
6435 SKIP_BLANKS;
6436
6437 /*
6438 * Parse the DOCTYPE name.
6439 */
6440 name = xmlParseName(ctxt);
6441 if (name == NULL) {
6442 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6444 ctxt->sax->error(ctxt->userData,
6445 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6446 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006447 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006448 }
6449 ctxt->intSubName = name;
6450
6451 SKIP_BLANKS;
6452
6453 /*
6454 * Check for SystemID and ExternalID
6455 */
6456 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6457
6458 if ((URI != NULL) || (ExternalID != NULL)) {
6459 ctxt->hasExternalSubset = 1;
6460 }
6461 ctxt->extSubURI = URI;
6462 ctxt->extSubSystem = ExternalID;
6463
6464 SKIP_BLANKS;
6465
6466 /*
6467 * Create and update the internal subset.
6468 */
6469 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6470 (!ctxt->disableSAX))
6471 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6472
6473 /*
6474 * Is there any internal subset declarations ?
6475 * they are handled separately in xmlParseInternalSubset()
6476 */
6477 if (RAW == '[')
6478 return;
6479
6480 /*
6481 * We should be at the end of the DOCTYPE declaration.
6482 */
6483 if (RAW != '>') {
6484 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006486 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006487 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006488 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006489 }
6490 NEXT;
6491}
6492
6493/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006494 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006495 * @ctxt: an XML parser context
6496 *
6497 * parse the internal subset declaration
6498 *
6499 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6500 */
6501
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006502static void
Owen Taylor3473f882001-02-23 17:55:21 +00006503xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6504 /*
6505 * Is there any DTD definition ?
6506 */
6507 if (RAW == '[') {
6508 ctxt->instate = XML_PARSER_DTD;
6509 NEXT;
6510 /*
6511 * Parse the succession of Markup declarations and
6512 * PEReferences.
6513 * Subsequence (markupdecl | PEReference | S)*
6514 */
6515 while (RAW != ']') {
6516 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006517 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006518
6519 SKIP_BLANKS;
6520 xmlParseMarkupDecl(ctxt);
6521 xmlParsePEReference(ctxt);
6522
6523 /*
6524 * Pop-up of finished entities.
6525 */
6526 while ((RAW == 0) && (ctxt->inputNr > 1))
6527 xmlPopInput(ctxt);
6528
6529 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6530 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6532 ctxt->sax->error(ctxt->userData,
6533 "xmlParseInternalSubset: error detected in Markup declaration\n");
6534 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006535 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006536 break;
6537 }
6538 }
6539 if (RAW == ']') {
6540 NEXT;
6541 SKIP_BLANKS;
6542 }
6543 }
6544
6545 /*
6546 * We should be at the end of the DOCTYPE declaration.
6547 */
6548 if (RAW != '>') {
6549 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006551 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006552 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006553 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006554 }
6555 NEXT;
6556}
6557
6558/**
6559 * xmlParseAttribute:
6560 * @ctxt: an XML parser context
6561 * @value: a xmlChar ** used to store the value of the attribute
6562 *
6563 * parse an attribute
6564 *
6565 * [41] Attribute ::= Name Eq AttValue
6566 *
6567 * [ WFC: No External Entity References ]
6568 * Attribute values cannot contain direct or indirect entity references
6569 * to external entities.
6570 *
6571 * [ WFC: No < in Attribute Values ]
6572 * The replacement text of any entity referred to directly or indirectly in
6573 * an attribute value (other than "&lt;") must not contain a <.
6574 *
6575 * [ VC: Attribute Value Type ]
6576 * The attribute must have been declared; the value must be of the type
6577 * declared for it.
6578 *
6579 * [25] Eq ::= S? '=' S?
6580 *
6581 * With namespace:
6582 *
6583 * [NS 11] Attribute ::= QName Eq AttValue
6584 *
6585 * Also the case QName == xmlns:??? is handled independently as a namespace
6586 * definition.
6587 *
6588 * Returns the attribute name, and the value in *value.
6589 */
6590
6591xmlChar *
6592xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6593 xmlChar *name, *val;
6594
6595 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006596 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006597 name = xmlParseName(ctxt);
6598 if (name == NULL) {
6599 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6601 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6602 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006603 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006604 return(NULL);
6605 }
6606
6607 /*
6608 * read the value
6609 */
6610 SKIP_BLANKS;
6611 if (RAW == '=') {
6612 NEXT;
6613 SKIP_BLANKS;
6614 val = xmlParseAttValue(ctxt);
6615 ctxt->instate = XML_PARSER_CONTENT;
6616 } else {
6617 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6619 ctxt->sax->error(ctxt->userData,
6620 "Specification mandate value for attribute %s\n", name);
6621 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006622 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006623 xmlFree(name);
6624 return(NULL);
6625 }
6626
6627 /*
6628 * Check that xml:lang conforms to the specification
6629 * No more registered as an error, just generate a warning now
6630 * since this was deprecated in XML second edition
6631 */
6632 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6633 if (!xmlCheckLanguageID(val)) {
6634 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6635 ctxt->sax->warning(ctxt->userData,
6636 "Malformed value for xml:lang : %s\n", val);
6637 }
6638 }
6639
6640 /*
6641 * Check that xml:space conforms to the specification
6642 */
6643 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6644 if (xmlStrEqual(val, BAD_CAST "default"))
6645 *(ctxt->space) = 0;
6646 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6647 *(ctxt->space) = 1;
6648 else {
6649 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6651 ctxt->sax->error(ctxt->userData,
Daniel Veillard642104e2003-03-26 16:32:05 +00006652"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006653 val);
6654 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006656 }
6657 }
6658
6659 *value = val;
6660 return(name);
6661}
6662
6663/**
6664 * xmlParseStartTag:
6665 * @ctxt: an XML parser context
6666 *
6667 * parse a start of tag either for rule element or
6668 * EmptyElement. In both case we don't parse the tag closing chars.
6669 *
6670 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6671 *
6672 * [ WFC: Unique Att Spec ]
6673 * No attribute name may appear more than once in the same start-tag or
6674 * empty-element tag.
6675 *
6676 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6677 *
6678 * [ WFC: Unique Att Spec ]
6679 * No attribute name may appear more than once in the same start-tag or
6680 * empty-element tag.
6681 *
6682 * With namespace:
6683 *
6684 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6685 *
6686 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6687 *
6688 * Returns the element name parsed
6689 */
6690
6691xmlChar *
6692xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6693 xmlChar *name;
6694 xmlChar *attname;
6695 xmlChar *attvalue;
6696 const xmlChar **atts = NULL;
6697 int nbatts = 0;
6698 int maxatts = 0;
6699 int i;
6700
6701 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006702 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006703
6704 name = xmlParseName(ctxt);
6705 if (name == NULL) {
6706 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6708 ctxt->sax->error(ctxt->userData,
6709 "xmlParseStartTag: invalid element name\n");
6710 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006711 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006712 return(NULL);
6713 }
6714
6715 /*
6716 * Now parse the attributes, it ends up with the ending
6717 *
6718 * (S Attribute)* S?
6719 */
6720 SKIP_BLANKS;
6721 GROW;
6722
Daniel Veillard21a0f912001-02-25 19:54:14 +00006723 while ((RAW != '>') &&
6724 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard34ba3872003-07-15 13:34:05 +00006725 (IS_CHAR((unsigned int) RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006726 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006727 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006728
6729 attname = xmlParseAttribute(ctxt, &attvalue);
6730 if ((attname != NULL) && (attvalue != NULL)) {
6731 /*
6732 * [ WFC: Unique Att Spec ]
6733 * No attribute name may appear more than once in the same
6734 * start-tag or empty-element tag.
6735 */
6736 for (i = 0; i < nbatts;i += 2) {
6737 if (xmlStrEqual(atts[i], attname)) {
6738 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6740 ctxt->sax->error(ctxt->userData,
6741 "Attribute %s redefined\n",
6742 attname);
6743 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006744 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006745 xmlFree(attname);
6746 xmlFree(attvalue);
6747 goto failed;
6748 }
6749 }
6750
6751 /*
6752 * Add the pair to atts
6753 */
6754 if (atts == NULL) {
6755 maxatts = 10;
6756 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6757 if (atts == NULL) {
6758 xmlGenericError(xmlGenericErrorContext,
6759 "malloc of %ld byte failed\n",
6760 maxatts * (long)sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006761 if (attname != NULL)
6762 xmlFree(attname);
6763 if (attvalue != NULL)
6764 xmlFree(attvalue);
6765 ctxt->errNo = XML_ERR_NO_MEMORY;
6766 ctxt->instate = XML_PARSER_EOF;
6767 ctxt->disableSAX = 1;
6768 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006769 }
6770 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006771 const xmlChar **n;
6772
Owen Taylor3473f882001-02-23 17:55:21 +00006773 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006774 n = (const xmlChar **) xmlRealloc((void *) atts,
Owen Taylor3473f882001-02-23 17:55:21 +00006775 maxatts * sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006776 if (n == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006777 xmlGenericError(xmlGenericErrorContext,
6778 "realloc of %ld byte failed\n",
6779 maxatts * (long)sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006780 if (attname != NULL)
6781 xmlFree(attname);
6782 if (attvalue != NULL)
6783 xmlFree(attvalue);
6784 ctxt->errNo = XML_ERR_NO_MEMORY;
6785 ctxt->instate = XML_PARSER_EOF;
6786 ctxt->disableSAX = 1;
6787 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006788 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006789 atts = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006790 }
6791 atts[nbatts++] = attname;
6792 atts[nbatts++] = attvalue;
6793 atts[nbatts] = NULL;
6794 atts[nbatts + 1] = NULL;
6795 } else {
6796 if (attname != NULL)
6797 xmlFree(attname);
6798 if (attvalue != NULL)
6799 xmlFree(attvalue);
6800 }
6801
6802failed:
6803
Daniel Veillard3772de32002-12-17 10:31:45 +00006804 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006805 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6806 break;
6807 if (!IS_BLANK(RAW)) {
6808 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6810 ctxt->sax->error(ctxt->userData,
6811 "attributes construct error\n");
6812 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006813 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006814 }
6815 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006816 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6817 (attname == NULL) && (attvalue == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006818 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6820 ctxt->sax->error(ctxt->userData,
6821 "xmlParseStartTag: problem parsing attributes\n");
6822 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006823 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006824 break;
6825 }
6826 GROW;
6827 }
6828
6829 /*
6830 * SAX: Start of Element !
6831 */
6832 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6833 (!ctxt->disableSAX))
6834 ctxt->sax->startElement(ctxt->userData, name, atts);
6835
6836 if (atts != NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006837 for (i = 0;i < nbatts;i++)
6838 if (atts[i] != NULL)
6839 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006840 xmlFree((void *) atts);
6841 }
6842 return(name);
6843}
6844
6845/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006846 * xmlParseEndTagInternal:
Owen Taylor3473f882001-02-23 17:55:21 +00006847 * @ctxt: an XML parser context
6848 *
6849 * parse an end of tag
6850 *
6851 * [42] ETag ::= '</' Name S? '>'
6852 *
6853 * With namespace
6854 *
6855 * [NS 9] ETag ::= '</' QName S? '>'
6856 */
6857
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006858static void
6859xmlParseEndTagInternal(xmlParserCtxtPtr ctxt, int line) {
Owen Taylor3473f882001-02-23 17:55:21 +00006860 xmlChar *name;
6861 xmlChar *oldname;
6862
6863 GROW;
6864 if ((RAW != '<') || (NXT(1) != '/')) {
6865 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6867 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6868 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006869 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006870 return;
6871 }
6872 SKIP(2);
6873
Daniel Veillard46de64e2002-05-29 08:21:33 +00006874 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006875
6876 /*
6877 * We should definitely be at the ending "S? '>'" part
6878 */
6879 GROW;
6880 SKIP_BLANKS;
Daniel Veillard34ba3872003-07-15 13:34:05 +00006881 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006882 ctxt->errNo = XML_ERR_GT_REQUIRED;
6883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6884 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6885 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006886 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006887 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006888 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006889
6890 /*
6891 * [ WFC: Element Type Match ]
6892 * The Name in an element's end-tag must match the element type in the
6893 * start-tag.
6894 *
6895 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006896 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006897 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006899 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006900 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006901 "Opening and ending tag mismatch: %s line %d and %s\n",
6902 ctxt->name, line, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006903 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006904 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006905 "Ending tag error for: %s line %d\n", ctxt->name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00006906 }
6907
6908 }
6909 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006910 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006911 if (name != NULL)
6912 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006913 }
6914
6915 /*
6916 * SAX: End of Tag
6917 */
6918 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6919 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006920 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006921
Owen Taylor3473f882001-02-23 17:55:21 +00006922 oldname = namePop(ctxt);
6923 spacePop(ctxt);
6924 if (oldname != NULL) {
6925#ifdef DEBUG_STACK
6926 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6927#endif
6928 xmlFree(oldname);
6929 }
6930 return;
6931}
6932
6933/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006934 * xmlParseEndTag:
6935 * @ctxt: an XML parser context
6936 *
6937 * parse an end of tag
6938 *
6939 * [42] ETag ::= '</' Name S? '>'
6940 *
6941 * With namespace
6942 *
6943 * [NS 9] ETag ::= '</' QName S? '>'
6944 */
6945
6946void
6947xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6948 xmlParseEndTagInternal(ctxt, 0);
6949}
6950
6951/**
Owen Taylor3473f882001-02-23 17:55:21 +00006952 * xmlParseCDSect:
6953 * @ctxt: an XML parser context
6954 *
6955 * Parse escaped pure raw content.
6956 *
6957 * [18] CDSect ::= CDStart CData CDEnd
6958 *
6959 * [19] CDStart ::= '<![CDATA['
6960 *
6961 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6962 *
6963 * [21] CDEnd ::= ']]>'
6964 */
6965void
6966xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6967 xmlChar *buf = NULL;
6968 int len = 0;
6969 int size = XML_PARSER_BUFFER_SIZE;
6970 int r, rl;
6971 int s, sl;
6972 int cur, l;
6973 int count = 0;
6974
6975 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6976 (NXT(2) == '[') && (NXT(3) == 'C') &&
6977 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6978 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6979 (NXT(8) == '[')) {
6980 SKIP(9);
6981 } else
6982 return;
6983
6984 ctxt->instate = XML_PARSER_CDATA_SECTION;
6985 r = CUR_CHAR(rl);
6986 if (!IS_CHAR(r)) {
6987 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6988 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6989 ctxt->sax->error(ctxt->userData,
6990 "CData section not finished\n");
6991 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006992 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006993 ctxt->instate = XML_PARSER_CONTENT;
6994 return;
6995 }
6996 NEXTL(rl);
6997 s = CUR_CHAR(sl);
6998 if (!IS_CHAR(s)) {
6999 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
7000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7001 ctxt->sax->error(ctxt->userData,
7002 "CData section not finished\n");
7003 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007004 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007005 ctxt->instate = XML_PARSER_CONTENT;
7006 return;
7007 }
7008 NEXTL(sl);
7009 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007010 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007011 if (buf == NULL) {
7012 xmlGenericError(xmlGenericErrorContext,
7013 "malloc of %d byte failed\n", size);
7014 return;
7015 }
7016 while (IS_CHAR(cur) &&
7017 ((r != ']') || (s != ']') || (cur != '>'))) {
7018 if (len + 5 >= size) {
7019 size *= 2;
7020 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7021 if (buf == NULL) {
7022 xmlGenericError(xmlGenericErrorContext,
7023 "realloc of %d byte failed\n", size);
7024 return;
7025 }
7026 }
7027 COPY_BUF(rl,buf,len,r);
7028 r = s;
7029 rl = sl;
7030 s = cur;
7031 sl = l;
7032 count++;
7033 if (count > 50) {
7034 GROW;
7035 count = 0;
7036 }
7037 NEXTL(l);
7038 cur = CUR_CHAR(l);
7039 }
7040 buf[len] = 0;
7041 ctxt->instate = XML_PARSER_CONTENT;
7042 if (cur != '>') {
7043 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
7044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7045 ctxt->sax->error(ctxt->userData,
7046 "CData section not finished\n%.50s\n", buf);
7047 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007048 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007049 xmlFree(buf);
7050 return;
7051 }
7052 NEXTL(l);
7053
7054 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007055 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007056 */
7057 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7058 if (ctxt->sax->cdataBlock != NULL)
7059 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007060 else if (ctxt->sax->characters != NULL)
7061 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007062 }
7063 xmlFree(buf);
7064}
7065
7066/**
7067 * xmlParseContent:
7068 * @ctxt: an XML parser context
7069 *
7070 * Parse a content:
7071 *
7072 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7073 */
7074
7075void
7076xmlParseContent(xmlParserCtxtPtr ctxt) {
7077 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007078 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007079 ((RAW != '<') || (NXT(1) != '/'))) {
7080 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007081 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007082 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007083
7084 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007085 * First case : a Processing Instruction.
7086 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007087 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007088 xmlParsePI(ctxt);
7089 }
7090
7091 /*
7092 * Second case : a CDSection
7093 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007094 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007095 (NXT(2) == '[') && (NXT(3) == 'C') &&
7096 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7097 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7098 (NXT(8) == '[')) {
7099 xmlParseCDSect(ctxt);
7100 }
7101
7102 /*
7103 * Third case : a comment
7104 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007105 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007106 (NXT(2) == '-') && (NXT(3) == '-')) {
7107 xmlParseComment(ctxt);
7108 ctxt->instate = XML_PARSER_CONTENT;
7109 }
7110
7111 /*
7112 * Fourth case : a sub-element.
7113 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007114 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007115 xmlParseElement(ctxt);
7116 }
7117
7118 /*
7119 * Fifth case : a reference. If if has not been resolved,
7120 * parsing returns it's Name, create the node
7121 */
7122
Daniel Veillard21a0f912001-02-25 19:54:14 +00007123 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007124 xmlParseReference(ctxt);
7125 }
7126
7127 /*
7128 * Last case, text. Note that References are handled directly.
7129 */
7130 else {
7131 xmlParseCharData(ctxt, 0);
7132 }
7133
7134 GROW;
7135 /*
7136 * Pop-up of finished entities.
7137 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007138 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007139 xmlPopInput(ctxt);
7140 SHRINK;
7141
Daniel Veillardfdc91562002-07-01 21:52:03 +00007142 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007143 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7145 ctxt->sax->error(ctxt->userData,
7146 "detected an error in element content\n");
7147 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007148 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007149 ctxt->instate = XML_PARSER_EOF;
7150 break;
7151 }
7152 }
7153}
7154
7155/**
7156 * xmlParseElement:
7157 * @ctxt: an XML parser context
7158 *
7159 * parse an XML element, this is highly recursive
7160 *
7161 * [39] element ::= EmptyElemTag | STag content ETag
7162 *
7163 * [ WFC: Element Type Match ]
7164 * The Name in an element's end-tag must match the element type in the
7165 * start-tag.
7166 *
7167 * [ VC: Element Valid ]
7168 * An element is valid if there is a declaration matching elementdecl
7169 * where the Name matches the element type and one of the following holds:
7170 * - The declaration matches EMPTY and the element has no content.
7171 * - The declaration matches children and the sequence of child elements
7172 * belongs to the language generated by the regular expression in the
7173 * content model, with optional white space (characters matching the
7174 * nonterminal S) between each pair of child elements.
7175 * - The declaration matches Mixed and the content consists of character
7176 * data and child elements whose types match names in the content model.
7177 * - The declaration matches ANY, and the types of any child elements have
7178 * been declared.
7179 */
7180
7181void
7182xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007183 xmlChar *name;
7184 xmlChar *oldname;
7185 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007186 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00007187 xmlNodePtr ret;
7188
7189 /* Capture start position */
7190 if (ctxt->record_info) {
7191 node_info.begin_pos = ctxt->input->consumed +
7192 (CUR_PTR - ctxt->input->base);
7193 node_info.begin_line = ctxt->input->line;
7194 }
7195
7196 if (ctxt->spaceNr == 0)
7197 spacePush(ctxt, -1);
7198 else
7199 spacePush(ctxt, *ctxt->space);
7200
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007201 line = ctxt->input->line;
Owen Taylor3473f882001-02-23 17:55:21 +00007202 name = xmlParseStartTag(ctxt);
7203 if (name == NULL) {
7204 spacePop(ctxt);
7205 return;
7206 }
7207 namePush(ctxt, name);
7208 ret = ctxt->node;
7209
7210 /*
7211 * [ VC: Root Element Type ]
7212 * The Name in the document type declaration must match the element
7213 * type of the root element.
7214 */
7215 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7216 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7217 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7218
7219 /*
7220 * Check for an Empty Element.
7221 */
7222 if ((RAW == '/') && (NXT(1) == '>')) {
7223 SKIP(2);
7224 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7225 (!ctxt->disableSAX))
7226 ctxt->sax->endElement(ctxt->userData, name);
7227 oldname = namePop(ctxt);
7228 spacePop(ctxt);
7229 if (oldname != NULL) {
7230#ifdef DEBUG_STACK
7231 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7232#endif
7233 xmlFree(oldname);
7234 }
7235 if ( ret != NULL && ctxt->record_info ) {
7236 node_info.end_pos = ctxt->input->consumed +
7237 (CUR_PTR - ctxt->input->base);
7238 node_info.end_line = ctxt->input->line;
7239 node_info.node = ret;
7240 xmlParserAddNodeInfo(ctxt, &node_info);
7241 }
7242 return;
7243 }
7244 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007245 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007246 } else {
7247 ctxt->errNo = XML_ERR_GT_REQUIRED;
7248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7249 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007250 "Couldn't find end of Start Tag %s line %d\n",
7251 name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007252 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007253 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007254
7255 /*
7256 * end of parsing of this node.
7257 */
7258 nodePop(ctxt);
7259 oldname = namePop(ctxt);
7260 spacePop(ctxt);
7261 if (oldname != NULL) {
7262#ifdef DEBUG_STACK
7263 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7264#endif
7265 xmlFree(oldname);
7266 }
7267
7268 /*
7269 * Capture end position and add node
7270 */
7271 if ( ret != NULL && ctxt->record_info ) {
7272 node_info.end_pos = ctxt->input->consumed +
7273 (CUR_PTR - ctxt->input->base);
7274 node_info.end_line = ctxt->input->line;
7275 node_info.node = ret;
7276 xmlParserAddNodeInfo(ctxt, &node_info);
7277 }
7278 return;
7279 }
7280
7281 /*
7282 * Parse the content of the element:
7283 */
7284 xmlParseContent(ctxt);
Daniel Veillard34ba3872003-07-15 13:34:05 +00007285 if (!IS_CHAR((unsigned int) RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007286 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007287 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7288 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007289 "Premature end of data in tag %s line %d\n", name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007290 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007291 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007292
7293 /*
7294 * end of parsing of this node.
7295 */
7296 nodePop(ctxt);
7297 oldname = namePop(ctxt);
7298 spacePop(ctxt);
7299 if (oldname != NULL) {
7300#ifdef DEBUG_STACK
7301 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7302#endif
7303 xmlFree(oldname);
7304 }
7305 return;
7306 }
7307
7308 /*
7309 * parse the end of tag: '</' should be here.
7310 */
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007311 xmlParseEndTagInternal(ctxt, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007312
7313 /*
7314 * Capture end position and add node
7315 */
7316 if ( ret != NULL && ctxt->record_info ) {
7317 node_info.end_pos = ctxt->input->consumed +
7318 (CUR_PTR - ctxt->input->base);
7319 node_info.end_line = ctxt->input->line;
7320 node_info.node = ret;
7321 xmlParserAddNodeInfo(ctxt, &node_info);
7322 }
7323}
7324
7325/**
7326 * xmlParseVersionNum:
7327 * @ctxt: an XML parser context
7328 *
7329 * parse the XML version value.
7330 *
7331 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7332 *
7333 * Returns the string giving the XML version number, or NULL
7334 */
7335xmlChar *
7336xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7337 xmlChar *buf = NULL;
7338 int len = 0;
7339 int size = 10;
7340 xmlChar cur;
7341
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007342 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007343 if (buf == NULL) {
7344 xmlGenericError(xmlGenericErrorContext,
7345 "malloc of %d byte failed\n", size);
7346 return(NULL);
7347 }
7348 cur = CUR;
7349 while (((cur >= 'a') && (cur <= 'z')) ||
7350 ((cur >= 'A') && (cur <= 'Z')) ||
7351 ((cur >= '0') && (cur <= '9')) ||
7352 (cur == '_') || (cur == '.') ||
7353 (cur == ':') || (cur == '-')) {
7354 if (len + 1 >= size) {
7355 size *= 2;
7356 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7357 if (buf == NULL) {
7358 xmlGenericError(xmlGenericErrorContext,
7359 "realloc of %d byte failed\n", size);
7360 return(NULL);
7361 }
7362 }
7363 buf[len++] = cur;
7364 NEXT;
7365 cur=CUR;
7366 }
7367 buf[len] = 0;
7368 return(buf);
7369}
7370
7371/**
7372 * xmlParseVersionInfo:
7373 * @ctxt: an XML parser context
7374 *
7375 * parse the XML version.
7376 *
7377 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7378 *
7379 * [25] Eq ::= S? '=' S?
7380 *
7381 * Returns the version string, e.g. "1.0"
7382 */
7383
7384xmlChar *
7385xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7386 xmlChar *version = NULL;
7387 const xmlChar *q;
7388
7389 if ((RAW == 'v') && (NXT(1) == 'e') &&
7390 (NXT(2) == 'r') && (NXT(3) == 's') &&
7391 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7392 (NXT(6) == 'n')) {
7393 SKIP(7);
7394 SKIP_BLANKS;
7395 if (RAW != '=') {
7396 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7398 ctxt->sax->error(ctxt->userData,
7399 "xmlParseVersionInfo : expected '='\n");
7400 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007401 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007402 return(NULL);
7403 }
7404 NEXT;
7405 SKIP_BLANKS;
7406 if (RAW == '"') {
7407 NEXT;
7408 q = CUR_PTR;
7409 version = xmlParseVersionNum(ctxt);
7410 if (RAW != '"') {
7411 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7413 ctxt->sax->error(ctxt->userData,
7414 "String not closed\n%.50s\n", q);
7415 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007416 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007417 } else
7418 NEXT;
7419 } else if (RAW == '\''){
7420 NEXT;
7421 q = CUR_PTR;
7422 version = xmlParseVersionNum(ctxt);
7423 if (RAW != '\'') {
7424 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7425 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7426 ctxt->sax->error(ctxt->userData,
7427 "String not closed\n%.50s\n", q);
7428 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007429 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007430 } else
7431 NEXT;
7432 } else {
7433 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7435 ctxt->sax->error(ctxt->userData,
7436 "xmlParseVersionInfo : expected ' or \"\n");
7437 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007438 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007439 }
7440 }
7441 return(version);
7442}
7443
7444/**
7445 * xmlParseEncName:
7446 * @ctxt: an XML parser context
7447 *
7448 * parse the XML encoding name
7449 *
7450 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7451 *
7452 * Returns the encoding name value or NULL
7453 */
7454xmlChar *
7455xmlParseEncName(xmlParserCtxtPtr ctxt) {
7456 xmlChar *buf = NULL;
7457 int len = 0;
7458 int size = 10;
7459 xmlChar cur;
7460
7461 cur = CUR;
7462 if (((cur >= 'a') && (cur <= 'z')) ||
7463 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007464 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007465 if (buf == NULL) {
7466 xmlGenericError(xmlGenericErrorContext,
7467 "malloc of %d byte failed\n", size);
7468 return(NULL);
7469 }
7470
7471 buf[len++] = cur;
7472 NEXT;
7473 cur = CUR;
7474 while (((cur >= 'a') && (cur <= 'z')) ||
7475 ((cur >= 'A') && (cur <= 'Z')) ||
7476 ((cur >= '0') && (cur <= '9')) ||
7477 (cur == '.') || (cur == '_') ||
7478 (cur == '-')) {
7479 if (len + 1 >= size) {
7480 size *= 2;
7481 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7482 if (buf == NULL) {
7483 xmlGenericError(xmlGenericErrorContext,
7484 "realloc of %d byte failed\n", size);
7485 return(NULL);
7486 }
7487 }
7488 buf[len++] = cur;
7489 NEXT;
7490 cur = CUR;
7491 if (cur == 0) {
7492 SHRINK;
7493 GROW;
7494 cur = CUR;
7495 }
7496 }
7497 buf[len] = 0;
7498 } else {
7499 ctxt->errNo = XML_ERR_ENCODING_NAME;
7500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7501 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7502 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007503 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007504 }
7505 return(buf);
7506}
7507
7508/**
7509 * xmlParseEncodingDecl:
7510 * @ctxt: an XML parser context
7511 *
7512 * parse the XML encoding declaration
7513 *
7514 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7515 *
7516 * this setups the conversion filters.
7517 *
7518 * Returns the encoding value or NULL
7519 */
7520
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007521const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007522xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7523 xmlChar *encoding = NULL;
7524 const xmlChar *q;
7525
7526 SKIP_BLANKS;
7527 if ((RAW == 'e') && (NXT(1) == 'n') &&
7528 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7529 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7530 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7531 SKIP(8);
7532 SKIP_BLANKS;
7533 if (RAW != '=') {
7534 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7536 ctxt->sax->error(ctxt->userData,
7537 "xmlParseEncodingDecl : expected '='\n");
7538 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007539 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007540 return(NULL);
7541 }
7542 NEXT;
7543 SKIP_BLANKS;
7544 if (RAW == '"') {
7545 NEXT;
7546 q = CUR_PTR;
7547 encoding = xmlParseEncName(ctxt);
7548 if (RAW != '"') {
7549 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7551 ctxt->sax->error(ctxt->userData,
7552 "String not closed\n%.50s\n", q);
7553 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007554 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007555 } else
7556 NEXT;
7557 } else if (RAW == '\''){
7558 NEXT;
7559 q = CUR_PTR;
7560 encoding = xmlParseEncName(ctxt);
7561 if (RAW != '\'') {
7562 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7564 ctxt->sax->error(ctxt->userData,
7565 "String not closed\n%.50s\n", q);
7566 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007567 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007568 } else
7569 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007570 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007571 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7573 ctxt->sax->error(ctxt->userData,
7574 "xmlParseEncodingDecl : expected ' or \"\n");
7575 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007576 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007577 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00007578 /*
7579 * UTF-16 encoding stwich has already taken place at this stage,
7580 * more over the little-endian/big-endian selection is already done
7581 */
7582 if ((encoding != NULL) &&
7583 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
7584 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007585 if (ctxt->encoding != NULL)
7586 xmlFree((xmlChar *) ctxt->encoding);
7587 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00007588 }
7589 /*
7590 * UTF-8 encoding is handled natively
7591 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007592 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00007593 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
7594 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007595 if (ctxt->encoding != NULL)
7596 xmlFree((xmlChar *) ctxt->encoding);
7597 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00007598 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007599 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00007600 xmlCharEncodingHandlerPtr handler;
7601
7602 if (ctxt->input->encoding != NULL)
7603 xmlFree((xmlChar *) ctxt->input->encoding);
7604 ctxt->input->encoding = encoding;
7605
Daniel Veillarda6874ca2003-07-29 16:47:24 +00007606 handler = xmlFindCharEncodingHandler((const char *) encoding);
7607 if (handler != NULL) {
7608 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00007609 } else {
Daniel Veillarda6874ca2003-07-29 16:47:24 +00007610 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7612 ctxt->sax->error(ctxt->userData,
7613 "Unsupported encoding %s\n", encoding);
7614 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007615 }
7616 }
7617 }
7618 return(encoding);
7619}
7620
7621/**
7622 * xmlParseSDDecl:
7623 * @ctxt: an XML parser context
7624 *
7625 * parse the XML standalone declaration
7626 *
7627 * [32] SDDecl ::= S 'standalone' Eq
7628 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7629 *
7630 * [ VC: Standalone Document Declaration ]
7631 * TODO The standalone document declaration must have the value "no"
7632 * if any external markup declarations contain declarations of:
7633 * - attributes with default values, if elements to which these
7634 * attributes apply appear in the document without specifications
7635 * of values for these attributes, or
7636 * - entities (other than amp, lt, gt, apos, quot), if references
7637 * to those entities appear in the document, or
7638 * - attributes with values subject to normalization, where the
7639 * attribute appears in the document with a value which will change
7640 * as a result of normalization, or
7641 * - element types with element content, if white space occurs directly
7642 * within any instance of those types.
7643 *
7644 * Returns 1 if standalone, 0 otherwise
7645 */
7646
7647int
7648xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7649 int standalone = -1;
7650
7651 SKIP_BLANKS;
7652 if ((RAW == 's') && (NXT(1) == 't') &&
7653 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7654 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7655 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7656 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7657 SKIP(10);
7658 SKIP_BLANKS;
7659 if (RAW != '=') {
7660 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7662 ctxt->sax->error(ctxt->userData,
7663 "XML standalone declaration : expected '='\n");
7664 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007665 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007666 return(standalone);
7667 }
7668 NEXT;
7669 SKIP_BLANKS;
7670 if (RAW == '\''){
7671 NEXT;
7672 if ((RAW == 'n') && (NXT(1) == 'o')) {
7673 standalone = 0;
7674 SKIP(2);
7675 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7676 (NXT(2) == 's')) {
7677 standalone = 1;
7678 SKIP(3);
7679 } else {
7680 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7681 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7682 ctxt->sax->error(ctxt->userData,
7683 "standalone accepts only 'yes' or 'no'\n");
7684 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007685 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007686 }
7687 if (RAW != '\'') {
7688 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7690 ctxt->sax->error(ctxt->userData, "String not closed\n");
7691 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007692 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007693 } else
7694 NEXT;
7695 } else if (RAW == '"'){
7696 NEXT;
7697 if ((RAW == 'n') && (NXT(1) == 'o')) {
7698 standalone = 0;
7699 SKIP(2);
7700 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7701 (NXT(2) == 's')) {
7702 standalone = 1;
7703 SKIP(3);
7704 } else {
7705 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7707 ctxt->sax->error(ctxt->userData,
7708 "standalone accepts only 'yes' or 'no'\n");
7709 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007710 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007711 }
7712 if (RAW != '"') {
7713 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7715 ctxt->sax->error(ctxt->userData, "String not closed\n");
7716 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007717 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007718 } else
7719 NEXT;
7720 } else {
7721 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7723 ctxt->sax->error(ctxt->userData,
7724 "Standalone value not found\n");
7725 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007726 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007727 }
7728 }
7729 return(standalone);
7730}
7731
7732/**
7733 * xmlParseXMLDecl:
7734 * @ctxt: an XML parser context
7735 *
7736 * parse an XML declaration header
7737 *
7738 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7739 */
7740
7741void
7742xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7743 xmlChar *version;
7744
7745 /*
7746 * We know that '<?xml' is here.
7747 */
7748 SKIP(5);
7749
7750 if (!IS_BLANK(RAW)) {
7751 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7753 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7754 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007755 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007756 }
7757 SKIP_BLANKS;
7758
7759 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007760 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007761 */
7762 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007763 if (version == NULL) {
7764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7765 ctxt->sax->error(ctxt->userData,
7766 "Malformed declaration expecting version\n");
7767 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007768 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007769 } else {
7770 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7771 /*
7772 * TODO: Blueberry should be detected here
7773 */
7774 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7775 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7776 version);
7777 }
7778 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007779 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007780 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007781 }
Owen Taylor3473f882001-02-23 17:55:21 +00007782
7783 /*
7784 * We may have the encoding declaration
7785 */
7786 if (!IS_BLANK(RAW)) {
7787 if ((RAW == '?') && (NXT(1) == '>')) {
7788 SKIP(2);
7789 return;
7790 }
7791 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7793 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7794 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007795 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007796 }
7797 xmlParseEncodingDecl(ctxt);
7798 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7799 /*
7800 * The XML REC instructs us to stop parsing right here
7801 */
7802 return;
7803 }
7804
7805 /*
7806 * We may have the standalone status.
7807 */
7808 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7809 if ((RAW == '?') && (NXT(1) == '>')) {
7810 SKIP(2);
7811 return;
7812 }
7813 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7815 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7816 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007817 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007818 }
7819 SKIP_BLANKS;
7820 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7821
7822 SKIP_BLANKS;
7823 if ((RAW == '?') && (NXT(1) == '>')) {
7824 SKIP(2);
7825 } else if (RAW == '>') {
7826 /* Deprecated old WD ... */
7827 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7829 ctxt->sax->error(ctxt->userData,
7830 "XML declaration must end-up with '?>'\n");
7831 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007832 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007833 NEXT;
7834 } else {
7835 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7837 ctxt->sax->error(ctxt->userData,
7838 "parsing XML declaration: '?>' expected\n");
7839 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007840 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007841 MOVETO_ENDTAG(CUR_PTR);
7842 NEXT;
7843 }
7844}
7845
7846/**
7847 * xmlParseMisc:
7848 * @ctxt: an XML parser context
7849 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007850 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007851 *
7852 * [27] Misc ::= Comment | PI | S
7853 */
7854
7855void
7856xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007857 while (((RAW == '<') && (NXT(1) == '?')) ||
7858 ((RAW == '<') && (NXT(1) == '!') &&
7859 (NXT(2) == '-') && (NXT(3) == '-')) ||
7860 IS_BLANK(CUR)) {
7861 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007862 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007863 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007864 NEXT;
7865 } else
7866 xmlParseComment(ctxt);
7867 }
7868}
7869
7870/**
7871 * xmlParseDocument:
7872 * @ctxt: an XML parser context
7873 *
7874 * parse an XML document (and build a tree if using the standard SAX
7875 * interface).
7876 *
7877 * [1] document ::= prolog element Misc*
7878 *
7879 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7880 *
7881 * Returns 0, -1 in case of error. the parser context is augmented
7882 * as a result of the parsing.
7883 */
7884
7885int
7886xmlParseDocument(xmlParserCtxtPtr ctxt) {
7887 xmlChar start[4];
7888 xmlCharEncoding enc;
7889
7890 xmlInitParser();
7891
7892 GROW;
7893
7894 /*
7895 * SAX: beginning of the document processing.
7896 */
7897 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7898 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7899
Daniel Veillard50f34372001-08-03 12:06:36 +00007900 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007901 /*
7902 * Get the 4 first bytes and decode the charset
7903 * if enc != XML_CHAR_ENCODING_NONE
7904 * plug some encoding conversion routines.
7905 */
7906 start[0] = RAW;
7907 start[1] = NXT(1);
7908 start[2] = NXT(2);
7909 start[3] = NXT(3);
7910 enc = xmlDetectCharEncoding(start, 4);
7911 if (enc != XML_CHAR_ENCODING_NONE) {
7912 xmlSwitchEncoding(ctxt, enc);
7913 }
Owen Taylor3473f882001-02-23 17:55:21 +00007914 }
7915
7916
7917 if (CUR == 0) {
7918 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7920 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7921 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007922 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007923 }
7924
7925 /*
7926 * Check for the XMLDecl in the Prolog.
7927 */
7928 GROW;
7929 if ((RAW == '<') && (NXT(1) == '?') &&
7930 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7931 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7932
7933 /*
7934 * Note that we will switch encoding on the fly.
7935 */
7936 xmlParseXMLDecl(ctxt);
7937 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7938 /*
7939 * The XML REC instructs us to stop parsing right here
7940 */
7941 return(-1);
7942 }
7943 ctxt->standalone = ctxt->input->standalone;
7944 SKIP_BLANKS;
7945 } else {
7946 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7947 }
7948 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7949 ctxt->sax->startDocument(ctxt->userData);
7950
7951 /*
7952 * The Misc part of the Prolog
7953 */
7954 GROW;
7955 xmlParseMisc(ctxt);
7956
7957 /*
7958 * Then possibly doc type declaration(s) and more Misc
7959 * (doctypedecl Misc*)?
7960 */
7961 GROW;
7962 if ((RAW == '<') && (NXT(1) == '!') &&
7963 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7964 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7965 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7966 (NXT(8) == 'E')) {
7967
7968 ctxt->inSubset = 1;
7969 xmlParseDocTypeDecl(ctxt);
7970 if (RAW == '[') {
7971 ctxt->instate = XML_PARSER_DTD;
7972 xmlParseInternalSubset(ctxt);
7973 }
7974
7975 /*
7976 * Create and update the external subset.
7977 */
7978 ctxt->inSubset = 2;
7979 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7980 (!ctxt->disableSAX))
7981 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7982 ctxt->extSubSystem, ctxt->extSubURI);
7983 ctxt->inSubset = 0;
7984
7985
7986 ctxt->instate = XML_PARSER_PROLOG;
7987 xmlParseMisc(ctxt);
7988 }
7989
7990 /*
7991 * Time to start parsing the tree itself
7992 */
7993 GROW;
7994 if (RAW != '<') {
7995 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7997 ctxt->sax->error(ctxt->userData,
7998 "Start tag expected, '<' not found\n");
7999 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008000 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008001 ctxt->instate = XML_PARSER_EOF;
8002 } else {
8003 ctxt->instate = XML_PARSER_CONTENT;
8004 xmlParseElement(ctxt);
8005 ctxt->instate = XML_PARSER_EPILOG;
8006
8007
8008 /*
8009 * The Misc part at the end
8010 */
8011 xmlParseMisc(ctxt);
8012
Daniel Veillard561b7f82002-03-20 21:55:57 +00008013 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008014 ctxt->errNo = XML_ERR_DOCUMENT_END;
8015 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8016 ctxt->sax->error(ctxt->userData,
8017 "Extra content at the end of the document\n");
8018 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008019 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008020 }
8021 ctxt->instate = XML_PARSER_EOF;
8022 }
8023
8024 /*
8025 * SAX: end of the document processing.
8026 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008027 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008028 ctxt->sax->endDocument(ctxt->userData);
8029
Daniel Veillard5997aca2002-03-18 18:36:20 +00008030 /*
8031 * Remove locally kept entity definitions if the tree was not built
8032 */
8033 if ((ctxt->myDoc != NULL) &&
8034 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8035 xmlFreeDoc(ctxt->myDoc);
8036 ctxt->myDoc = NULL;
8037 }
8038
Daniel Veillardc7612992002-02-17 22:47:37 +00008039 if (! ctxt->wellFormed) {
8040 ctxt->valid = 0;
8041 return(-1);
8042 }
Owen Taylor3473f882001-02-23 17:55:21 +00008043 return(0);
8044}
8045
8046/**
8047 * xmlParseExtParsedEnt:
8048 * @ctxt: an XML parser context
8049 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008050 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008051 * An external general parsed entity is well-formed if it matches the
8052 * production labeled extParsedEnt.
8053 *
8054 * [78] extParsedEnt ::= TextDecl? content
8055 *
8056 * Returns 0, -1 in case of error. the parser context is augmented
8057 * as a result of the parsing.
8058 */
8059
8060int
8061xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8062 xmlChar start[4];
8063 xmlCharEncoding enc;
8064
8065 xmlDefaultSAXHandlerInit();
8066
8067 GROW;
8068
8069 /*
8070 * SAX: beginning of the document processing.
8071 */
8072 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8073 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8074
8075 /*
8076 * Get the 4 first bytes and decode the charset
8077 * if enc != XML_CHAR_ENCODING_NONE
8078 * plug some encoding conversion routines.
8079 */
8080 start[0] = RAW;
8081 start[1] = NXT(1);
8082 start[2] = NXT(2);
8083 start[3] = NXT(3);
8084 enc = xmlDetectCharEncoding(start, 4);
8085 if (enc != XML_CHAR_ENCODING_NONE) {
8086 xmlSwitchEncoding(ctxt, enc);
8087 }
8088
8089
8090 if (CUR == 0) {
8091 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8093 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8094 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008095 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008096 }
8097
8098 /*
8099 * Check for the XMLDecl in the Prolog.
8100 */
8101 GROW;
8102 if ((RAW == '<') && (NXT(1) == '?') &&
8103 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8104 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8105
8106 /*
8107 * Note that we will switch encoding on the fly.
8108 */
8109 xmlParseXMLDecl(ctxt);
8110 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8111 /*
8112 * The XML REC instructs us to stop parsing right here
8113 */
8114 return(-1);
8115 }
8116 SKIP_BLANKS;
8117 } else {
8118 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8119 }
8120 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8121 ctxt->sax->startDocument(ctxt->userData);
8122
8123 /*
8124 * Doing validity checking on chunk doesn't make sense
8125 */
8126 ctxt->instate = XML_PARSER_CONTENT;
8127 ctxt->validate = 0;
8128 ctxt->loadsubset = 0;
8129 ctxt->depth = 0;
8130
8131 xmlParseContent(ctxt);
8132
8133 if ((RAW == '<') && (NXT(1) == '/')) {
8134 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8136 ctxt->sax->error(ctxt->userData,
8137 "chunk is not well balanced\n");
8138 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008139 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008140 } else if (RAW != 0) {
8141 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8143 ctxt->sax->error(ctxt->userData,
8144 "extra content at the end of well balanced chunk\n");
8145 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008146 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008147 }
8148
8149 /*
8150 * SAX: end of the document processing.
8151 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008152 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008153 ctxt->sax->endDocument(ctxt->userData);
8154
8155 if (! ctxt->wellFormed) return(-1);
8156 return(0);
8157}
8158
8159/************************************************************************
8160 * *
8161 * Progressive parsing interfaces *
8162 * *
8163 ************************************************************************/
8164
8165/**
8166 * xmlParseLookupSequence:
8167 * @ctxt: an XML parser context
8168 * @first: the first char to lookup
8169 * @next: the next char to lookup or zero
8170 * @third: the next char to lookup or zero
8171 *
8172 * Try to find if a sequence (first, next, third) or just (first next) or
8173 * (first) is available in the input stream.
8174 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8175 * to avoid rescanning sequences of bytes, it DOES change the state of the
8176 * parser, do not use liberally.
8177 *
8178 * Returns the index to the current parsing point if the full sequence
8179 * is available, -1 otherwise.
8180 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008181static int
Owen Taylor3473f882001-02-23 17:55:21 +00008182xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8183 xmlChar next, xmlChar third) {
8184 int base, len;
8185 xmlParserInputPtr in;
8186 const xmlChar *buf;
8187
8188 in = ctxt->input;
8189 if (in == NULL) return(-1);
8190 base = in->cur - in->base;
8191 if (base < 0) return(-1);
8192 if (ctxt->checkIndex > base)
8193 base = ctxt->checkIndex;
8194 if (in->buf == NULL) {
8195 buf = in->base;
8196 len = in->length;
8197 } else {
8198 buf = in->buf->buffer->content;
8199 len = in->buf->buffer->use;
8200 }
8201 /* take into account the sequence length */
8202 if (third) len -= 2;
8203 else if (next) len --;
8204 for (;base < len;base++) {
8205 if (buf[base] == first) {
8206 if (third != 0) {
8207 if ((buf[base + 1] != next) ||
8208 (buf[base + 2] != third)) continue;
8209 } else if (next != 0) {
8210 if (buf[base + 1] != next) continue;
8211 }
8212 ctxt->checkIndex = 0;
8213#ifdef DEBUG_PUSH
8214 if (next == 0)
8215 xmlGenericError(xmlGenericErrorContext,
8216 "PP: lookup '%c' found at %d\n",
8217 first, base);
8218 else if (third == 0)
8219 xmlGenericError(xmlGenericErrorContext,
8220 "PP: lookup '%c%c' found at %d\n",
8221 first, next, base);
8222 else
8223 xmlGenericError(xmlGenericErrorContext,
8224 "PP: lookup '%c%c%c' found at %d\n",
8225 first, next, third, base);
8226#endif
8227 return(base - (in->cur - in->base));
8228 }
8229 }
8230 ctxt->checkIndex = base;
8231#ifdef DEBUG_PUSH
8232 if (next == 0)
8233 xmlGenericError(xmlGenericErrorContext,
8234 "PP: lookup '%c' failed\n", first);
8235 else if (third == 0)
8236 xmlGenericError(xmlGenericErrorContext,
8237 "PP: lookup '%c%c' failed\n", first, next);
8238 else
8239 xmlGenericError(xmlGenericErrorContext,
8240 "PP: lookup '%c%c%c' failed\n", first, next, third);
8241#endif
8242 return(-1);
8243}
8244
8245/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008246 * xmlParseGetLasts:
8247 * @ctxt: an XML parser context
8248 * @lastlt: pointer to store the last '<' from the input
8249 * @lastgt: pointer to store the last '>' from the input
8250 *
8251 * Lookup the last < and > in the current chunk
8252 */
8253static void
8254xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8255 const xmlChar **lastgt) {
8256 const xmlChar *tmp;
8257
8258 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8259 xmlGenericError(xmlGenericErrorContext,
8260 "Internal error: xmlParseGetLasts\n");
8261 return;
8262 }
8263 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
8264 tmp = ctxt->input->end;
8265 tmp--;
8266 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8267 (*tmp != '>')) tmp--;
8268 if (tmp < ctxt->input->base) {
8269 *lastlt = NULL;
8270 *lastgt = NULL;
8271 } else if (*tmp == '<') {
8272 *lastlt = tmp;
8273 tmp--;
8274 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8275 if (tmp < ctxt->input->base)
8276 *lastgt = NULL;
8277 else
8278 *lastgt = tmp;
8279 } else {
8280 *lastgt = tmp;
8281 tmp--;
8282 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8283 if (tmp < ctxt->input->base)
8284 *lastlt = NULL;
8285 else
8286 *lastlt = tmp;
8287 }
8288
8289 } else {
8290 *lastlt = NULL;
8291 *lastgt = NULL;
8292 }
8293}
8294/**
Owen Taylor3473f882001-02-23 17:55:21 +00008295 * xmlParseTryOrFinish:
8296 * @ctxt: an XML parser context
8297 * @terminate: last chunk indicator
8298 *
8299 * Try to progress on parsing
8300 *
8301 * Returns zero if no parsing was possible
8302 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008303static int
Owen Taylor3473f882001-02-23 17:55:21 +00008304xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8305 int ret = 0;
8306 int avail;
8307 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008308 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008309
8310#ifdef DEBUG_PUSH
8311 switch (ctxt->instate) {
8312 case XML_PARSER_EOF:
8313 xmlGenericError(xmlGenericErrorContext,
8314 "PP: try EOF\n"); break;
8315 case XML_PARSER_START:
8316 xmlGenericError(xmlGenericErrorContext,
8317 "PP: try START\n"); break;
8318 case XML_PARSER_MISC:
8319 xmlGenericError(xmlGenericErrorContext,
8320 "PP: try MISC\n");break;
8321 case XML_PARSER_COMMENT:
8322 xmlGenericError(xmlGenericErrorContext,
8323 "PP: try COMMENT\n");break;
8324 case XML_PARSER_PROLOG:
8325 xmlGenericError(xmlGenericErrorContext,
8326 "PP: try PROLOG\n");break;
8327 case XML_PARSER_START_TAG:
8328 xmlGenericError(xmlGenericErrorContext,
8329 "PP: try START_TAG\n");break;
8330 case XML_PARSER_CONTENT:
8331 xmlGenericError(xmlGenericErrorContext,
8332 "PP: try CONTENT\n");break;
8333 case XML_PARSER_CDATA_SECTION:
8334 xmlGenericError(xmlGenericErrorContext,
8335 "PP: try CDATA_SECTION\n");break;
8336 case XML_PARSER_END_TAG:
8337 xmlGenericError(xmlGenericErrorContext,
8338 "PP: try END_TAG\n");break;
8339 case XML_PARSER_ENTITY_DECL:
8340 xmlGenericError(xmlGenericErrorContext,
8341 "PP: try ENTITY_DECL\n");break;
8342 case XML_PARSER_ENTITY_VALUE:
8343 xmlGenericError(xmlGenericErrorContext,
8344 "PP: try ENTITY_VALUE\n");break;
8345 case XML_PARSER_ATTRIBUTE_VALUE:
8346 xmlGenericError(xmlGenericErrorContext,
8347 "PP: try ATTRIBUTE_VALUE\n");break;
8348 case XML_PARSER_DTD:
8349 xmlGenericError(xmlGenericErrorContext,
8350 "PP: try DTD\n");break;
8351 case XML_PARSER_EPILOG:
8352 xmlGenericError(xmlGenericErrorContext,
8353 "PP: try EPILOG\n");break;
8354 case XML_PARSER_PI:
8355 xmlGenericError(xmlGenericErrorContext,
8356 "PP: try PI\n");break;
8357 case XML_PARSER_IGNORE:
8358 xmlGenericError(xmlGenericErrorContext,
8359 "PP: try IGNORE\n");break;
8360 }
8361#endif
8362
Daniel Veillarda880b122003-04-21 21:36:41 +00008363 if (ctxt->input->cur - ctxt->input->base > 4096) {
8364 xmlSHRINK(ctxt);
8365 ctxt->checkIndex = 0;
8366 }
8367 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008368
Daniel Veillarda880b122003-04-21 21:36:41 +00008369 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008370 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8371 return(0);
8372
8373
Owen Taylor3473f882001-02-23 17:55:21 +00008374 /*
8375 * Pop-up of finished entities.
8376 */
8377 while ((RAW == 0) && (ctxt->inputNr > 1))
8378 xmlPopInput(ctxt);
8379
8380 if (ctxt->input ==NULL) break;
8381 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008382 avail = ctxt->input->length -
8383 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008384 else {
8385 /*
8386 * If we are operating on converted input, try to flush
8387 * remainng chars to avoid them stalling in the non-converted
8388 * buffer.
8389 */
8390 if ((ctxt->input->buf->raw != NULL) &&
8391 (ctxt->input->buf->raw->use > 0)) {
8392 int base = ctxt->input->base -
8393 ctxt->input->buf->buffer->content;
8394 int current = ctxt->input->cur - ctxt->input->base;
8395
8396 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8397 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8398 ctxt->input->cur = ctxt->input->base + current;
8399 ctxt->input->end =
8400 &ctxt->input->buf->buffer->content[
8401 ctxt->input->buf->buffer->use];
8402 }
8403 avail = ctxt->input->buf->buffer->use -
8404 (ctxt->input->cur - ctxt->input->base);
8405 }
Owen Taylor3473f882001-02-23 17:55:21 +00008406 if (avail < 1)
8407 goto done;
8408 switch (ctxt->instate) {
8409 case XML_PARSER_EOF:
8410 /*
8411 * Document parsing is done !
8412 */
8413 goto done;
8414 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008415 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8416 xmlChar start[4];
8417 xmlCharEncoding enc;
8418
8419 /*
8420 * Very first chars read from the document flow.
8421 */
8422 if (avail < 4)
8423 goto done;
8424
8425 /*
8426 * Get the 4 first bytes and decode the charset
8427 * if enc != XML_CHAR_ENCODING_NONE
8428 * plug some encoding conversion routines.
8429 */
8430 start[0] = RAW;
8431 start[1] = NXT(1);
8432 start[2] = NXT(2);
8433 start[3] = NXT(3);
8434 enc = xmlDetectCharEncoding(start, 4);
8435 if (enc != XML_CHAR_ENCODING_NONE) {
8436 xmlSwitchEncoding(ctxt, enc);
8437 }
8438 break;
8439 }
Owen Taylor3473f882001-02-23 17:55:21 +00008440
8441 cur = ctxt->input->cur[0];
8442 next = ctxt->input->cur[1];
8443 if (cur == 0) {
8444 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8445 ctxt->sax->setDocumentLocator(ctxt->userData,
8446 &xmlDefaultSAXLocator);
8447 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8448 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8449 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8450 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008451 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008452 ctxt->instate = XML_PARSER_EOF;
8453#ifdef DEBUG_PUSH
8454 xmlGenericError(xmlGenericErrorContext,
8455 "PP: entering EOF\n");
8456#endif
8457 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8458 ctxt->sax->endDocument(ctxt->userData);
8459 goto done;
8460 }
8461 if ((cur == '<') && (next == '?')) {
8462 /* PI or XML decl */
8463 if (avail < 5) return(ret);
8464 if ((!terminate) &&
8465 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8466 return(ret);
8467 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8468 ctxt->sax->setDocumentLocator(ctxt->userData,
8469 &xmlDefaultSAXLocator);
8470 if ((ctxt->input->cur[2] == 'x') &&
8471 (ctxt->input->cur[3] == 'm') &&
8472 (ctxt->input->cur[4] == 'l') &&
8473 (IS_BLANK(ctxt->input->cur[5]))) {
8474 ret += 5;
8475#ifdef DEBUG_PUSH
8476 xmlGenericError(xmlGenericErrorContext,
8477 "PP: Parsing XML Decl\n");
8478#endif
8479 xmlParseXMLDecl(ctxt);
8480 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8481 /*
8482 * The XML REC instructs us to stop parsing right
8483 * here
8484 */
8485 ctxt->instate = XML_PARSER_EOF;
8486 return(0);
8487 }
8488 ctxt->standalone = ctxt->input->standalone;
8489 if ((ctxt->encoding == NULL) &&
8490 (ctxt->input->encoding != NULL))
8491 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8492 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8493 (!ctxt->disableSAX))
8494 ctxt->sax->startDocument(ctxt->userData);
8495 ctxt->instate = XML_PARSER_MISC;
8496#ifdef DEBUG_PUSH
8497 xmlGenericError(xmlGenericErrorContext,
8498 "PP: entering MISC\n");
8499#endif
8500 } else {
8501 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8502 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8503 (!ctxt->disableSAX))
8504 ctxt->sax->startDocument(ctxt->userData);
8505 ctxt->instate = XML_PARSER_MISC;
8506#ifdef DEBUG_PUSH
8507 xmlGenericError(xmlGenericErrorContext,
8508 "PP: entering MISC\n");
8509#endif
8510 }
8511 } else {
8512 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8513 ctxt->sax->setDocumentLocator(ctxt->userData,
8514 &xmlDefaultSAXLocator);
8515 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8516 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8517 (!ctxt->disableSAX))
8518 ctxt->sax->startDocument(ctxt->userData);
8519 ctxt->instate = XML_PARSER_MISC;
8520#ifdef DEBUG_PUSH
8521 xmlGenericError(xmlGenericErrorContext,
8522 "PP: entering MISC\n");
8523#endif
8524 }
8525 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00008526 case XML_PARSER_START_TAG: {
8527 xmlChar *name, *oldname;
8528
8529 if ((avail < 2) && (ctxt->inputNr == 1))
8530 goto done;
8531 cur = ctxt->input->cur[0];
8532 if (cur != '<') {
8533 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8535 ctxt->sax->error(ctxt->userData,
8536 "Start tag expect, '<' not found\n");
8537 ctxt->wellFormed = 0;
8538 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8539 ctxt->instate = XML_PARSER_EOF;
8540#ifdef DEBUG_PUSH
8541 xmlGenericError(xmlGenericErrorContext,
8542 "PP: entering EOF\n");
8543#endif
8544 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8545 ctxt->sax->endDocument(ctxt->userData);
8546 goto done;
8547 }
8548 if (!terminate) {
8549 if (ctxt->progressive) {
8550 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8551 goto done;
8552 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8553 goto done;
8554 }
8555 }
8556 if (ctxt->spaceNr == 0)
8557 spacePush(ctxt, -1);
8558 else
8559 spacePush(ctxt, *ctxt->space);
8560 name = xmlParseStartTag(ctxt);
8561 if (name == NULL) {
8562 spacePop(ctxt);
8563 ctxt->instate = XML_PARSER_EOF;
8564#ifdef DEBUG_PUSH
8565 xmlGenericError(xmlGenericErrorContext,
8566 "PP: entering EOF\n");
8567#endif
8568 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8569 ctxt->sax->endDocument(ctxt->userData);
8570 goto done;
8571 }
8572 namePush(ctxt, name);
8573
8574 /*
8575 * [ VC: Root Element Type ]
8576 * The Name in the document type declaration must match
8577 * the element type of the root element.
8578 */
8579 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8580 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8581 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8582
8583 /*
8584 * Check for an Empty Element.
8585 */
8586 if ((RAW == '/') && (NXT(1) == '>')) {
8587 SKIP(2);
8588 if ((ctxt->sax != NULL) &&
8589 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8590 ctxt->sax->endElement(ctxt->userData, name);
8591 oldname = namePop(ctxt);
8592 spacePop(ctxt);
8593 if (oldname != NULL) {
8594#ifdef DEBUG_STACK
8595 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8596#endif
8597 xmlFree(oldname);
8598 }
8599 if (ctxt->name == NULL) {
8600 ctxt->instate = XML_PARSER_EPILOG;
8601#ifdef DEBUG_PUSH
8602 xmlGenericError(xmlGenericErrorContext,
8603 "PP: entering EPILOG\n");
8604#endif
8605 } else {
8606 ctxt->instate = XML_PARSER_CONTENT;
8607#ifdef DEBUG_PUSH
8608 xmlGenericError(xmlGenericErrorContext,
8609 "PP: entering CONTENT\n");
8610#endif
8611 }
8612 break;
8613 }
8614 if (RAW == '>') {
8615 NEXT;
8616 } else {
8617 ctxt->errNo = XML_ERR_GT_REQUIRED;
8618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8619 ctxt->sax->error(ctxt->userData,
8620 "Couldn't find end of Start Tag %s\n",
8621 name);
8622 ctxt->wellFormed = 0;
8623 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8624
8625 /*
8626 * end of parsing of this node.
8627 */
8628 nodePop(ctxt);
8629 oldname = namePop(ctxt);
8630 spacePop(ctxt);
8631 if (oldname != NULL) {
8632#ifdef DEBUG_STACK
8633 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8634#endif
8635 xmlFree(oldname);
8636 }
8637 }
8638 ctxt->instate = XML_PARSER_CONTENT;
8639#ifdef DEBUG_PUSH
8640 xmlGenericError(xmlGenericErrorContext,
8641 "PP: entering CONTENT\n");
8642#endif
8643 break;
8644 }
8645 case XML_PARSER_CONTENT: {
8646 const xmlChar *test;
8647 unsigned int cons;
8648 if ((avail < 2) && (ctxt->inputNr == 1))
8649 goto done;
8650 cur = ctxt->input->cur[0];
8651 next = ctxt->input->cur[1];
8652
8653 test = CUR_PTR;
8654 cons = ctxt->input->consumed;
8655 if ((cur == '<') && (next == '/')) {
8656 ctxt->instate = XML_PARSER_END_TAG;
8657#ifdef DEBUG_PUSH
8658 xmlGenericError(xmlGenericErrorContext,
8659 "PP: entering END_TAG\n");
8660#endif
8661 break;
8662 } else if ((cur == '<') && (next == '?')) {
8663 if ((!terminate) &&
8664 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8665 goto done;
8666#ifdef DEBUG_PUSH
8667 xmlGenericError(xmlGenericErrorContext,
8668 "PP: Parsing PI\n");
8669#endif
8670 xmlParsePI(ctxt);
8671 } else if ((cur == '<') && (next != '!')) {
8672 ctxt->instate = XML_PARSER_START_TAG;
8673#ifdef DEBUG_PUSH
8674 xmlGenericError(xmlGenericErrorContext,
8675 "PP: entering START_TAG\n");
8676#endif
8677 break;
8678 } else if ((cur == '<') && (next == '!') &&
8679 (ctxt->input->cur[2] == '-') &&
8680 (ctxt->input->cur[3] == '-')) {
8681 if ((!terminate) &&
8682 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8683 goto done;
8684#ifdef DEBUG_PUSH
8685 xmlGenericError(xmlGenericErrorContext,
8686 "PP: Parsing Comment\n");
8687#endif
8688 xmlParseComment(ctxt);
8689 ctxt->instate = XML_PARSER_CONTENT;
8690 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8691 (ctxt->input->cur[2] == '[') &&
8692 (ctxt->input->cur[3] == 'C') &&
8693 (ctxt->input->cur[4] == 'D') &&
8694 (ctxt->input->cur[5] == 'A') &&
8695 (ctxt->input->cur[6] == 'T') &&
8696 (ctxt->input->cur[7] == 'A') &&
8697 (ctxt->input->cur[8] == '[')) {
8698 SKIP(9);
8699 ctxt->instate = XML_PARSER_CDATA_SECTION;
8700#ifdef DEBUG_PUSH
8701 xmlGenericError(xmlGenericErrorContext,
8702 "PP: entering CDATA_SECTION\n");
8703#endif
8704 break;
8705 } else if ((cur == '<') && (next == '!') &&
8706 (avail < 9)) {
8707 goto done;
8708 } else if (cur == '&') {
8709 if ((!terminate) &&
8710 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8711 goto done;
8712#ifdef DEBUG_PUSH
8713 xmlGenericError(xmlGenericErrorContext,
8714 "PP: Parsing Reference\n");
8715#endif
8716 xmlParseReference(ctxt);
8717 } else {
8718 /* TODO Avoid the extra copy, handle directly !!! */
8719 /*
8720 * Goal of the following test is:
8721 * - minimize calls to the SAX 'character' callback
8722 * when they are mergeable
8723 * - handle an problem for isBlank when we only parse
8724 * a sequence of blank chars and the next one is
8725 * not available to check against '<' presence.
8726 * - tries to homogenize the differences in SAX
8727 * callbacks between the push and pull versions
8728 * of the parser.
8729 */
8730 if ((ctxt->inputNr == 1) &&
8731 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8732 if (!terminate) {
8733 if (ctxt->progressive) {
8734 if ((lastlt == NULL) ||
8735 (ctxt->input->cur > lastlt))
8736 goto done;
8737 } else if (xmlParseLookupSequence(ctxt,
8738 '<', 0, 0) < 0) {
8739 goto done;
8740 }
8741 }
8742 }
8743 ctxt->checkIndex = 0;
8744#ifdef DEBUG_PUSH
8745 xmlGenericError(xmlGenericErrorContext,
8746 "PP: Parsing char data\n");
8747#endif
8748 xmlParseCharData(ctxt, 0);
8749 }
8750 /*
8751 * Pop-up of finished entities.
8752 */
8753 while ((RAW == 0) && (ctxt->inputNr > 1))
8754 xmlPopInput(ctxt);
8755 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8756 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8758 ctxt->sax->error(ctxt->userData,
8759 "detected an error in element content\n");
8760 ctxt->wellFormed = 0;
8761 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8762 ctxt->instate = XML_PARSER_EOF;
8763 break;
8764 }
8765 break;
8766 }
8767 case XML_PARSER_END_TAG:
8768 if (avail < 2)
8769 goto done;
8770 if (!terminate) {
8771 if (ctxt->progressive) {
8772 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8773 goto done;
8774 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8775 goto done;
8776 }
8777 }
8778 xmlParseEndTag(ctxt);
8779 if (ctxt->name == NULL) {
8780 ctxt->instate = XML_PARSER_EPILOG;
8781#ifdef DEBUG_PUSH
8782 xmlGenericError(xmlGenericErrorContext,
8783 "PP: entering EPILOG\n");
8784#endif
8785 } else {
8786 ctxt->instate = XML_PARSER_CONTENT;
8787#ifdef DEBUG_PUSH
8788 xmlGenericError(xmlGenericErrorContext,
8789 "PP: entering CONTENT\n");
8790#endif
8791 }
8792 break;
8793 case XML_PARSER_CDATA_SECTION: {
8794 /*
8795 * The Push mode need to have the SAX callback for
8796 * cdataBlock merge back contiguous callbacks.
8797 */
8798 int base;
8799
8800 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8801 if (base < 0) {
8802 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8803 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8804 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00008805 ctxt->sax->cdataBlock(ctxt->userData,
8806 ctxt->input->cur,
8807 XML_PARSER_BIG_BUFFER_SIZE);
8808 else if (ctxt->sax->characters != NULL)
8809 ctxt->sax->characters(ctxt->userData,
8810 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00008811 XML_PARSER_BIG_BUFFER_SIZE);
8812 }
8813 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8814 ctxt->checkIndex = 0;
8815 }
8816 goto done;
8817 } else {
8818 if ((ctxt->sax != NULL) && (base > 0) &&
8819 (!ctxt->disableSAX)) {
8820 if (ctxt->sax->cdataBlock != NULL)
8821 ctxt->sax->cdataBlock(ctxt->userData,
8822 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00008823 else if (ctxt->sax->characters != NULL)
8824 ctxt->sax->characters(ctxt->userData,
8825 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00008826 }
8827 SKIP(base + 3);
8828 ctxt->checkIndex = 0;
8829 ctxt->instate = XML_PARSER_CONTENT;
8830#ifdef DEBUG_PUSH
8831 xmlGenericError(xmlGenericErrorContext,
8832 "PP: entering CONTENT\n");
8833#endif
8834 }
8835 break;
8836 }
Owen Taylor3473f882001-02-23 17:55:21 +00008837 case XML_PARSER_MISC:
8838 SKIP_BLANKS;
8839 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008840 avail = ctxt->input->length -
8841 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00008842 else
Daniel Veillarda880b122003-04-21 21:36:41 +00008843 avail = ctxt->input->buf->buffer->use -
8844 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00008845 if (avail < 2)
8846 goto done;
8847 cur = ctxt->input->cur[0];
8848 next = ctxt->input->cur[1];
8849 if ((cur == '<') && (next == '?')) {
8850 if ((!terminate) &&
8851 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8852 goto done;
8853#ifdef DEBUG_PUSH
8854 xmlGenericError(xmlGenericErrorContext,
8855 "PP: Parsing PI\n");
8856#endif
8857 xmlParsePI(ctxt);
8858 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00008859 (ctxt->input->cur[2] == '-') &&
8860 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008861 if ((!terminate) &&
8862 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8863 goto done;
8864#ifdef DEBUG_PUSH
8865 xmlGenericError(xmlGenericErrorContext,
8866 "PP: Parsing Comment\n");
8867#endif
8868 xmlParseComment(ctxt);
8869 ctxt->instate = XML_PARSER_MISC;
8870 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00008871 (ctxt->input->cur[2] == 'D') &&
8872 (ctxt->input->cur[3] == 'O') &&
8873 (ctxt->input->cur[4] == 'C') &&
8874 (ctxt->input->cur[5] == 'T') &&
8875 (ctxt->input->cur[6] == 'Y') &&
8876 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008877 (ctxt->input->cur[8] == 'E')) {
8878 if ((!terminate) &&
8879 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8880 goto done;
8881#ifdef DEBUG_PUSH
8882 xmlGenericError(xmlGenericErrorContext,
8883 "PP: Parsing internal subset\n");
8884#endif
8885 ctxt->inSubset = 1;
8886 xmlParseDocTypeDecl(ctxt);
8887 if (RAW == '[') {
8888 ctxt->instate = XML_PARSER_DTD;
8889#ifdef DEBUG_PUSH
8890 xmlGenericError(xmlGenericErrorContext,
8891 "PP: entering DTD\n");
8892#endif
8893 } else {
8894 /*
8895 * Create and update the external subset.
8896 */
8897 ctxt->inSubset = 2;
8898 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8899 (ctxt->sax->externalSubset != NULL))
8900 ctxt->sax->externalSubset(ctxt->userData,
8901 ctxt->intSubName, ctxt->extSubSystem,
8902 ctxt->extSubURI);
8903 ctxt->inSubset = 0;
8904 ctxt->instate = XML_PARSER_PROLOG;
8905#ifdef DEBUG_PUSH
8906 xmlGenericError(xmlGenericErrorContext,
8907 "PP: entering PROLOG\n");
8908#endif
8909 }
8910 } else if ((cur == '<') && (next == '!') &&
8911 (avail < 9)) {
8912 goto done;
8913 } else {
8914 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00008915 ctxt->progressive = 1;
8916 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00008917#ifdef DEBUG_PUSH
8918 xmlGenericError(xmlGenericErrorContext,
8919 "PP: entering START_TAG\n");
8920#endif
8921 }
8922 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008923 case XML_PARSER_PROLOG:
8924 SKIP_BLANKS;
8925 if (ctxt->input->buf == NULL)
8926 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8927 else
8928 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8929 if (avail < 2)
8930 goto done;
8931 cur = ctxt->input->cur[0];
8932 next = ctxt->input->cur[1];
8933 if ((cur == '<') && (next == '?')) {
8934 if ((!terminate) &&
8935 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8936 goto done;
8937#ifdef DEBUG_PUSH
8938 xmlGenericError(xmlGenericErrorContext,
8939 "PP: Parsing PI\n");
8940#endif
8941 xmlParsePI(ctxt);
8942 } else if ((cur == '<') && (next == '!') &&
8943 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8944 if ((!terminate) &&
8945 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8946 goto done;
8947#ifdef DEBUG_PUSH
8948 xmlGenericError(xmlGenericErrorContext,
8949 "PP: Parsing Comment\n");
8950#endif
8951 xmlParseComment(ctxt);
8952 ctxt->instate = XML_PARSER_PROLOG;
8953 } else if ((cur == '<') && (next == '!') &&
8954 (avail < 4)) {
8955 goto done;
8956 } else {
8957 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00008958 ctxt->progressive = 1;
8959 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00008960#ifdef DEBUG_PUSH
8961 xmlGenericError(xmlGenericErrorContext,
8962 "PP: entering START_TAG\n");
8963#endif
8964 }
8965 break;
8966 case XML_PARSER_EPILOG:
8967 SKIP_BLANKS;
8968 if (ctxt->input->buf == NULL)
8969 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8970 else
8971 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8972 if (avail < 2)
8973 goto done;
8974 cur = ctxt->input->cur[0];
8975 next = ctxt->input->cur[1];
8976 if ((cur == '<') && (next == '?')) {
8977 if ((!terminate) &&
8978 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8979 goto done;
8980#ifdef DEBUG_PUSH
8981 xmlGenericError(xmlGenericErrorContext,
8982 "PP: Parsing PI\n");
8983#endif
8984 xmlParsePI(ctxt);
8985 ctxt->instate = XML_PARSER_EPILOG;
8986 } else if ((cur == '<') && (next == '!') &&
8987 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8988 if ((!terminate) &&
8989 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8990 goto done;
8991#ifdef DEBUG_PUSH
8992 xmlGenericError(xmlGenericErrorContext,
8993 "PP: Parsing Comment\n");
8994#endif
8995 xmlParseComment(ctxt);
8996 ctxt->instate = XML_PARSER_EPILOG;
8997 } else if ((cur == '<') && (next == '!') &&
8998 (avail < 4)) {
8999 goto done;
9000 } else {
9001 ctxt->errNo = XML_ERR_DOCUMENT_END;
9002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9003 ctxt->sax->error(ctxt->userData,
9004 "Extra content at the end of the document\n");
9005 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009006 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009007 ctxt->instate = XML_PARSER_EOF;
9008#ifdef DEBUG_PUSH
9009 xmlGenericError(xmlGenericErrorContext,
9010 "PP: entering EOF\n");
9011#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009012 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009013 ctxt->sax->endDocument(ctxt->userData);
9014 goto done;
9015 }
9016 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009017 case XML_PARSER_DTD: {
9018 /*
9019 * Sorry but progressive parsing of the internal subset
9020 * is not expected to be supported. We first check that
9021 * the full content of the internal subset is available and
9022 * the parsing is launched only at that point.
9023 * Internal subset ends up with "']' S? '>'" in an unescaped
9024 * section and not in a ']]>' sequence which are conditional
9025 * sections (whoever argued to keep that crap in XML deserve
9026 * a place in hell !).
9027 */
9028 int base, i;
9029 xmlChar *buf;
9030 xmlChar quote = 0;
9031
9032 base = ctxt->input->cur - ctxt->input->base;
9033 if (base < 0) return(0);
9034 if (ctxt->checkIndex > base)
9035 base = ctxt->checkIndex;
9036 buf = ctxt->input->buf->buffer->content;
9037 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9038 base++) {
9039 if (quote != 0) {
9040 if (buf[base] == quote)
9041 quote = 0;
9042 continue;
9043 }
9044 if (buf[base] == '"') {
9045 quote = '"';
9046 continue;
9047 }
9048 if (buf[base] == '\'') {
9049 quote = '\'';
9050 continue;
9051 }
9052 if (buf[base] == ']') {
9053 if ((unsigned int) base +1 >=
9054 ctxt->input->buf->buffer->use)
9055 break;
9056 if (buf[base + 1] == ']') {
9057 /* conditional crap, skip both ']' ! */
9058 base++;
9059 continue;
9060 }
9061 for (i = 0;
9062 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9063 i++) {
9064 if (buf[base + i] == '>')
9065 goto found_end_int_subset;
9066 }
9067 break;
9068 }
9069 }
9070 /*
9071 * We didn't found the end of the Internal subset
9072 */
9073 if (quote == 0)
9074 ctxt->checkIndex = base;
9075#ifdef DEBUG_PUSH
9076 if (next == 0)
9077 xmlGenericError(xmlGenericErrorContext,
9078 "PP: lookup of int subset end filed\n");
9079#endif
9080 goto done;
9081
9082found_end_int_subset:
9083 xmlParseInternalSubset(ctxt);
9084 ctxt->inSubset = 2;
9085 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9086 (ctxt->sax->externalSubset != NULL))
9087 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9088 ctxt->extSubSystem, ctxt->extSubURI);
9089 ctxt->inSubset = 0;
9090 ctxt->instate = XML_PARSER_PROLOG;
9091 ctxt->checkIndex = 0;
9092#ifdef DEBUG_PUSH
9093 xmlGenericError(xmlGenericErrorContext,
9094 "PP: entering PROLOG\n");
9095#endif
9096 break;
9097 }
9098 case XML_PARSER_COMMENT:
9099 xmlGenericError(xmlGenericErrorContext,
9100 "PP: internal error, state == COMMENT\n");
9101 ctxt->instate = XML_PARSER_CONTENT;
9102#ifdef DEBUG_PUSH
9103 xmlGenericError(xmlGenericErrorContext,
9104 "PP: entering CONTENT\n");
9105#endif
9106 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009107 case XML_PARSER_IGNORE:
9108 xmlGenericError(xmlGenericErrorContext,
9109 "PP: internal error, state == IGNORE");
9110 ctxt->instate = XML_PARSER_DTD;
9111#ifdef DEBUG_PUSH
9112 xmlGenericError(xmlGenericErrorContext,
9113 "PP: entering DTD\n");
9114#endif
9115 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009116 case XML_PARSER_PI:
9117 xmlGenericError(xmlGenericErrorContext,
9118 "PP: internal error, state == PI\n");
9119 ctxt->instate = XML_PARSER_CONTENT;
9120#ifdef DEBUG_PUSH
9121 xmlGenericError(xmlGenericErrorContext,
9122 "PP: entering CONTENT\n");
9123#endif
9124 break;
9125 case XML_PARSER_ENTITY_DECL:
9126 xmlGenericError(xmlGenericErrorContext,
9127 "PP: internal error, state == ENTITY_DECL\n");
9128 ctxt->instate = XML_PARSER_DTD;
9129#ifdef DEBUG_PUSH
9130 xmlGenericError(xmlGenericErrorContext,
9131 "PP: entering DTD\n");
9132#endif
9133 break;
9134 case XML_PARSER_ENTITY_VALUE:
9135 xmlGenericError(xmlGenericErrorContext,
9136 "PP: internal error, state == ENTITY_VALUE\n");
9137 ctxt->instate = XML_PARSER_CONTENT;
9138#ifdef DEBUG_PUSH
9139 xmlGenericError(xmlGenericErrorContext,
9140 "PP: entering DTD\n");
9141#endif
9142 break;
9143 case XML_PARSER_ATTRIBUTE_VALUE:
9144 xmlGenericError(xmlGenericErrorContext,
9145 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9146 ctxt->instate = XML_PARSER_START_TAG;
9147#ifdef DEBUG_PUSH
9148 xmlGenericError(xmlGenericErrorContext,
9149 "PP: entering START_TAG\n");
9150#endif
9151 break;
9152 case XML_PARSER_SYSTEM_LITERAL:
9153 xmlGenericError(xmlGenericErrorContext,
9154 "PP: internal error, state == SYSTEM_LITERAL\n");
9155 ctxt->instate = XML_PARSER_START_TAG;
9156#ifdef DEBUG_PUSH
9157 xmlGenericError(xmlGenericErrorContext,
9158 "PP: entering START_TAG\n");
9159#endif
9160 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009161 case XML_PARSER_PUBLIC_LITERAL:
9162 xmlGenericError(xmlGenericErrorContext,
9163 "PP: internal error, state == PUBLIC_LITERAL\n");
9164 ctxt->instate = XML_PARSER_START_TAG;
9165#ifdef DEBUG_PUSH
9166 xmlGenericError(xmlGenericErrorContext,
9167 "PP: entering START_TAG\n");
9168#endif
9169 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009170 }
9171 }
9172done:
9173#ifdef DEBUG_PUSH
9174 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9175#endif
9176 return(ret);
9177}
9178
9179/**
Owen Taylor3473f882001-02-23 17:55:21 +00009180 * xmlParseChunk:
9181 * @ctxt: an XML parser context
9182 * @chunk: an char array
9183 * @size: the size in byte of the chunk
9184 * @terminate: last chunk indicator
9185 *
9186 * Parse a Chunk of memory
9187 *
9188 * Returns zero if no error, the xmlParserErrors otherwise.
9189 */
9190int
9191xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9192 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009193 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9194 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009195 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9196 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9197 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9198 int cur = ctxt->input->cur - ctxt->input->base;
9199
9200 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9201 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9202 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009203 ctxt->input->end =
9204 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009205#ifdef DEBUG_PUSH
9206 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9207#endif
9208
Daniel Veillarda880b122003-04-21 21:36:41 +00009209#if 0
Owen Taylor3473f882001-02-23 17:55:21 +00009210 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9211 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda880b122003-04-21 21:36:41 +00009212#endif
Owen Taylor3473f882001-02-23 17:55:21 +00009213 } else if (ctxt->instate != XML_PARSER_EOF) {
9214 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9215 xmlParserInputBufferPtr in = ctxt->input->buf;
9216 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9217 (in->raw != NULL)) {
9218 int nbchars;
9219
9220 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9221 if (nbchars < 0) {
9222 xmlGenericError(xmlGenericErrorContext,
9223 "xmlParseChunk: encoder error\n");
9224 return(XML_ERR_INVALID_ENCODING);
9225 }
9226 }
9227 }
9228 }
9229 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009230 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9231 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009232 if (terminate) {
9233 /*
9234 * Check for termination
9235 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009236 int avail = 0;
9237 if (ctxt->input->buf == NULL)
9238 avail = ctxt->input->length -
9239 (ctxt->input->cur - ctxt->input->base);
9240 else
9241 avail = ctxt->input->buf->buffer->use -
9242 (ctxt->input->cur - ctxt->input->base);
9243
Owen Taylor3473f882001-02-23 17:55:21 +00009244 if ((ctxt->instate != XML_PARSER_EOF) &&
9245 (ctxt->instate != XML_PARSER_EPILOG)) {
9246 ctxt->errNo = XML_ERR_DOCUMENT_END;
9247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9248 ctxt->sax->error(ctxt->userData,
9249 "Extra content at the end of the document\n");
9250 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009251 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009252 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009253 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9254 ctxt->errNo = XML_ERR_DOCUMENT_END;
9255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9256 ctxt->sax->error(ctxt->userData,
9257 "Extra content at the end of the document\n");
9258 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009259 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009260
9261 }
Owen Taylor3473f882001-02-23 17:55:21 +00009262 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009263 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009264 ctxt->sax->endDocument(ctxt->userData);
9265 }
9266 ctxt->instate = XML_PARSER_EOF;
9267 }
9268 return((xmlParserErrors) ctxt->errNo);
9269}
9270
9271/************************************************************************
9272 * *
9273 * I/O front end functions to the parser *
9274 * *
9275 ************************************************************************/
9276
9277/**
9278 * xmlStopParser:
9279 * @ctxt: an XML parser context
9280 *
9281 * Blocks further parser processing
9282 */
9283void
9284xmlStopParser(xmlParserCtxtPtr ctxt) {
9285 ctxt->instate = XML_PARSER_EOF;
9286 if (ctxt->input != NULL)
9287 ctxt->input->cur = BAD_CAST"";
9288}
9289
9290/**
9291 * xmlCreatePushParserCtxt:
9292 * @sax: a SAX handler
9293 * @user_data: The user data returned on SAX callbacks
9294 * @chunk: a pointer to an array of chars
9295 * @size: number of chars in the array
9296 * @filename: an optional file name or URI
9297 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009298 * Create a parser context for using the XML parser in push mode.
9299 * If @buffer and @size are non-NULL, the data is used to detect
9300 * the encoding. The remaining characters will be parsed so they
9301 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009302 * To allow content encoding detection, @size should be >= 4
9303 * The value of @filename is used for fetching external entities
9304 * and error/warning reports.
9305 *
9306 * Returns the new parser context or NULL
9307 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009308
Owen Taylor3473f882001-02-23 17:55:21 +00009309xmlParserCtxtPtr
9310xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9311 const char *chunk, int size, const char *filename) {
9312 xmlParserCtxtPtr ctxt;
9313 xmlParserInputPtr inputStream;
9314 xmlParserInputBufferPtr buf;
9315 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9316
9317 /*
9318 * plug some encoding conversion routines
9319 */
9320 if ((chunk != NULL) && (size >= 4))
9321 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9322
9323 buf = xmlAllocParserInputBuffer(enc);
9324 if (buf == NULL) return(NULL);
9325
9326 ctxt = xmlNewParserCtxt();
9327 if (ctxt == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009328 xmlGenericError(xmlGenericErrorContext,
9329 "xml parser: out of memory\n");
9330 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009331 return(NULL);
9332 }
9333 if (sax != NULL) {
9334 if (ctxt->sax != &xmlDefaultSAXHandler)
9335 xmlFree(ctxt->sax);
9336 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9337 if (ctxt->sax == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009338 xmlGenericError(xmlGenericErrorContext,
9339 "xml parser: out of memory\n");
9340 xmlFreeParserInputBuffer(buf);
9341 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009342 return(NULL);
9343 }
9344 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9345 if (user_data != NULL)
9346 ctxt->userData = user_data;
9347 }
9348 if (filename == NULL) {
9349 ctxt->directory = NULL;
9350 } else {
9351 ctxt->directory = xmlParserGetDirectory(filename);
9352 }
9353
9354 inputStream = xmlNewInputStream(ctxt);
9355 if (inputStream == NULL) {
9356 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009357 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009358 return(NULL);
9359 }
9360
9361 if (filename == NULL)
9362 inputStream->filename = NULL;
9363 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009364 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009365 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009366 inputStream->buf = buf;
9367 inputStream->base = inputStream->buf->buffer->content;
9368 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009369 inputStream->end =
9370 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009371
9372 inputPush(ctxt, inputStream);
9373
9374 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9375 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009376 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9377 int cur = ctxt->input->cur - ctxt->input->base;
9378
Owen Taylor3473f882001-02-23 17:55:21 +00009379 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009380
9381 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9382 ctxt->input->cur = ctxt->input->base + cur;
9383 ctxt->input->end =
9384 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009385#ifdef DEBUG_PUSH
9386 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9387#endif
9388 }
9389
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009390 if (enc != XML_CHAR_ENCODING_NONE) {
9391 xmlSwitchEncoding(ctxt, enc);
9392 }
9393
Owen Taylor3473f882001-02-23 17:55:21 +00009394 return(ctxt);
9395}
9396
9397/**
9398 * xmlCreateIOParserCtxt:
9399 * @sax: a SAX handler
9400 * @user_data: The user data returned on SAX callbacks
9401 * @ioread: an I/O read function
9402 * @ioclose: an I/O close function
9403 * @ioctx: an I/O handler
9404 * @enc: the charset encoding if known
9405 *
9406 * Create a parser context for using the XML parser with an existing
9407 * I/O stream
9408 *
9409 * Returns the new parser context or NULL
9410 */
9411xmlParserCtxtPtr
9412xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9413 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9414 void *ioctx, xmlCharEncoding enc) {
9415 xmlParserCtxtPtr ctxt;
9416 xmlParserInputPtr inputStream;
9417 xmlParserInputBufferPtr buf;
9418
9419 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9420 if (buf == NULL) return(NULL);
9421
9422 ctxt = xmlNewParserCtxt();
9423 if (ctxt == NULL) {
9424 xmlFree(buf);
9425 return(NULL);
9426 }
9427 if (sax != NULL) {
9428 if (ctxt->sax != &xmlDefaultSAXHandler)
9429 xmlFree(ctxt->sax);
9430 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9431 if (ctxt->sax == NULL) {
9432 xmlFree(buf);
9433 xmlFree(ctxt);
9434 return(NULL);
9435 }
9436 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9437 if (user_data != NULL)
9438 ctxt->userData = user_data;
9439 }
9440
9441 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9442 if (inputStream == NULL) {
9443 xmlFreeParserCtxt(ctxt);
9444 return(NULL);
9445 }
9446 inputPush(ctxt, inputStream);
9447
9448 return(ctxt);
9449}
9450
9451/************************************************************************
9452 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009453 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009454 * *
9455 ************************************************************************/
9456
9457/**
9458 * xmlIOParseDTD:
9459 * @sax: the SAX handler block or NULL
9460 * @input: an Input Buffer
9461 * @enc: the charset encoding if known
9462 *
9463 * Load and parse a DTD
9464 *
9465 * Returns the resulting xmlDtdPtr or NULL in case of error.
9466 * @input will be freed at parsing end.
9467 */
9468
9469xmlDtdPtr
9470xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9471 xmlCharEncoding enc) {
9472 xmlDtdPtr ret = NULL;
9473 xmlParserCtxtPtr ctxt;
9474 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009475 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009476
9477 if (input == NULL)
9478 return(NULL);
9479
9480 ctxt = xmlNewParserCtxt();
9481 if (ctxt == NULL) {
9482 return(NULL);
9483 }
9484
9485 /*
9486 * Set-up the SAX context
9487 */
9488 if (sax != NULL) {
9489 if (ctxt->sax != NULL)
9490 xmlFree(ctxt->sax);
9491 ctxt->sax = sax;
9492 ctxt->userData = NULL;
9493 }
9494
9495 /*
9496 * generate a parser input from the I/O handler
9497 */
9498
9499 pinput = xmlNewIOInputStream(ctxt, input, enc);
9500 if (pinput == NULL) {
9501 if (sax != NULL) ctxt->sax = NULL;
9502 xmlFreeParserCtxt(ctxt);
9503 return(NULL);
9504 }
9505
9506 /*
9507 * plug some encoding conversion routines here.
9508 */
9509 xmlPushInput(ctxt, pinput);
9510
9511 pinput->filename = NULL;
9512 pinput->line = 1;
9513 pinput->col = 1;
9514 pinput->base = ctxt->input->cur;
9515 pinput->cur = ctxt->input->cur;
9516 pinput->free = NULL;
9517
9518 /*
9519 * let's parse that entity knowing it's an external subset.
9520 */
9521 ctxt->inSubset = 2;
9522 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9523 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9524 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009525
9526 if (enc == XML_CHAR_ENCODING_NONE) {
9527 /*
9528 * Get the 4 first bytes and decode the charset
9529 * if enc != XML_CHAR_ENCODING_NONE
9530 * plug some encoding conversion routines.
9531 */
9532 start[0] = RAW;
9533 start[1] = NXT(1);
9534 start[2] = NXT(2);
9535 start[3] = NXT(3);
9536 enc = xmlDetectCharEncoding(start, 4);
9537 if (enc != XML_CHAR_ENCODING_NONE) {
9538 xmlSwitchEncoding(ctxt, enc);
9539 }
9540 }
9541
Owen Taylor3473f882001-02-23 17:55:21 +00009542 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9543
9544 if (ctxt->myDoc != NULL) {
9545 if (ctxt->wellFormed) {
9546 ret = ctxt->myDoc->extSubset;
9547 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +00009548 if (ret != NULL) {
9549 xmlNodePtr tmp;
9550
9551 ret->doc = NULL;
9552 tmp = ret->children;
9553 while (tmp != NULL) {
9554 tmp->doc = NULL;
9555 tmp = tmp->next;
9556 }
9557 }
Owen Taylor3473f882001-02-23 17:55:21 +00009558 } else {
9559 ret = NULL;
9560 }
9561 xmlFreeDoc(ctxt->myDoc);
9562 ctxt->myDoc = NULL;
9563 }
9564 if (sax != NULL) ctxt->sax = NULL;
9565 xmlFreeParserCtxt(ctxt);
9566
9567 return(ret);
9568}
9569
9570/**
9571 * xmlSAXParseDTD:
9572 * @sax: the SAX handler block
9573 * @ExternalID: a NAME* containing the External ID of the DTD
9574 * @SystemID: a NAME* containing the URL to the DTD
9575 *
9576 * Load and parse an external subset.
9577 *
9578 * Returns the resulting xmlDtdPtr or NULL in case of error.
9579 */
9580
9581xmlDtdPtr
9582xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9583 const xmlChar *SystemID) {
9584 xmlDtdPtr ret = NULL;
9585 xmlParserCtxtPtr ctxt;
9586 xmlParserInputPtr input = NULL;
9587 xmlCharEncoding enc;
9588
9589 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9590
9591 ctxt = xmlNewParserCtxt();
9592 if (ctxt == NULL) {
9593 return(NULL);
9594 }
9595
9596 /*
9597 * Set-up the SAX context
9598 */
9599 if (sax != NULL) {
9600 if (ctxt->sax != NULL)
9601 xmlFree(ctxt->sax);
9602 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +00009603 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +00009604 }
9605
9606 /*
9607 * Ask the Entity resolver to load the damn thing
9608 */
9609
9610 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +00009611 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +00009612 if (input == NULL) {
9613 if (sax != NULL) ctxt->sax = NULL;
9614 xmlFreeParserCtxt(ctxt);
9615 return(NULL);
9616 }
9617
9618 /*
9619 * plug some encoding conversion routines here.
9620 */
9621 xmlPushInput(ctxt, input);
9622 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9623 xmlSwitchEncoding(ctxt, enc);
9624
9625 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +00009626 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +00009627 input->line = 1;
9628 input->col = 1;
9629 input->base = ctxt->input->cur;
9630 input->cur = ctxt->input->cur;
9631 input->free = NULL;
9632
9633 /*
9634 * let's parse that entity knowing it's an external subset.
9635 */
9636 ctxt->inSubset = 2;
9637 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9638 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9639 ExternalID, SystemID);
9640 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9641
9642 if (ctxt->myDoc != NULL) {
9643 if (ctxt->wellFormed) {
9644 ret = ctxt->myDoc->extSubset;
9645 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +00009646 if (ret != NULL) {
9647 xmlNodePtr tmp;
9648
9649 ret->doc = NULL;
9650 tmp = ret->children;
9651 while (tmp != NULL) {
9652 tmp->doc = NULL;
9653 tmp = tmp->next;
9654 }
9655 }
Owen Taylor3473f882001-02-23 17:55:21 +00009656 } else {
9657 ret = NULL;
9658 }
9659 xmlFreeDoc(ctxt->myDoc);
9660 ctxt->myDoc = NULL;
9661 }
9662 if (sax != NULL) ctxt->sax = NULL;
9663 xmlFreeParserCtxt(ctxt);
9664
9665 return(ret);
9666}
9667
9668/**
9669 * xmlParseDTD:
9670 * @ExternalID: a NAME* containing the External ID of the DTD
9671 * @SystemID: a NAME* containing the URL to the DTD
9672 *
9673 * Load and parse an external subset.
9674 *
9675 * Returns the resulting xmlDtdPtr or NULL in case of error.
9676 */
9677
9678xmlDtdPtr
9679xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9680 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9681}
9682
9683/************************************************************************
9684 * *
9685 * Front ends when parsing an Entity *
9686 * *
9687 ************************************************************************/
9688
9689/**
Owen Taylor3473f882001-02-23 17:55:21 +00009690 * xmlParseCtxtExternalEntity:
9691 * @ctx: the existing parsing context
9692 * @URL: the URL for the entity to load
9693 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009694 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009695 *
9696 * Parse an external general entity within an existing parsing context
9697 * An external general parsed entity is well-formed if it matches the
9698 * production labeled extParsedEnt.
9699 *
9700 * [78] extParsedEnt ::= TextDecl? content
9701 *
9702 * Returns 0 if the entity is well formed, -1 in case of args problem and
9703 * the parser error code otherwise
9704 */
9705
9706int
9707xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009708 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009709 xmlParserCtxtPtr ctxt;
9710 xmlDocPtr newDoc;
9711 xmlSAXHandlerPtr oldsax = NULL;
9712 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009713 xmlChar start[4];
9714 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009715
9716 if (ctx->depth > 40) {
9717 return(XML_ERR_ENTITY_LOOP);
9718 }
9719
Daniel Veillardcda96922001-08-21 10:56:31 +00009720 if (lst != NULL)
9721 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009722 if ((URL == NULL) && (ID == NULL))
9723 return(-1);
9724 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9725 return(-1);
9726
9727
9728 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9729 if (ctxt == NULL) return(-1);
9730 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +00009731 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +00009732 oldsax = ctxt->sax;
9733 ctxt->sax = ctx->sax;
9734 newDoc = xmlNewDoc(BAD_CAST "1.0");
9735 if (newDoc == NULL) {
9736 xmlFreeParserCtxt(ctxt);
9737 return(-1);
9738 }
9739 if (ctx->myDoc != NULL) {
9740 newDoc->intSubset = ctx->myDoc->intSubset;
9741 newDoc->extSubset = ctx->myDoc->extSubset;
9742 }
9743 if (ctx->myDoc->URL != NULL) {
9744 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9745 }
9746 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9747 if (newDoc->children == NULL) {
9748 ctxt->sax = oldsax;
9749 xmlFreeParserCtxt(ctxt);
9750 newDoc->intSubset = NULL;
9751 newDoc->extSubset = NULL;
9752 xmlFreeDoc(newDoc);
9753 return(-1);
9754 }
9755 nodePush(ctxt, newDoc->children);
9756 if (ctx->myDoc == NULL) {
9757 ctxt->myDoc = newDoc;
9758 } else {
9759 ctxt->myDoc = ctx->myDoc;
9760 newDoc->children->doc = ctx->myDoc;
9761 }
9762
Daniel Veillard87a764e2001-06-20 17:41:10 +00009763 /*
9764 * Get the 4 first bytes and decode the charset
9765 * if enc != XML_CHAR_ENCODING_NONE
9766 * plug some encoding conversion routines.
9767 */
9768 GROW
9769 start[0] = RAW;
9770 start[1] = NXT(1);
9771 start[2] = NXT(2);
9772 start[3] = NXT(3);
9773 enc = xmlDetectCharEncoding(start, 4);
9774 if (enc != XML_CHAR_ENCODING_NONE) {
9775 xmlSwitchEncoding(ctxt, enc);
9776 }
9777
Owen Taylor3473f882001-02-23 17:55:21 +00009778 /*
9779 * Parse a possible text declaration first
9780 */
Owen Taylor3473f882001-02-23 17:55:21 +00009781 if ((RAW == '<') && (NXT(1) == '?') &&
9782 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9783 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9784 xmlParseTextDecl(ctxt);
9785 }
9786
9787 /*
9788 * Doing validity checking on chunk doesn't make sense
9789 */
9790 ctxt->instate = XML_PARSER_CONTENT;
9791 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009792 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009793 ctxt->loadsubset = ctx->loadsubset;
9794 ctxt->depth = ctx->depth + 1;
9795 ctxt->replaceEntities = ctx->replaceEntities;
9796 if (ctxt->validate) {
9797 ctxt->vctxt.error = ctx->vctxt.error;
9798 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009799 } else {
9800 ctxt->vctxt.error = NULL;
9801 ctxt->vctxt.warning = NULL;
9802 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009803 ctxt->vctxt.nodeTab = NULL;
9804 ctxt->vctxt.nodeNr = 0;
9805 ctxt->vctxt.nodeMax = 0;
9806 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009807
9808 xmlParseContent(ctxt);
9809
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009810 ctx->validate = ctxt->validate;
9811 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009812 if ((RAW == '<') && (NXT(1) == '/')) {
9813 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9815 ctxt->sax->error(ctxt->userData,
9816 "chunk is not well balanced\n");
9817 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009818 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009819 } else if (RAW != 0) {
9820 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9822 ctxt->sax->error(ctxt->userData,
9823 "extra content at the end of well balanced chunk\n");
9824 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009825 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009826 }
9827 if (ctxt->node != newDoc->children) {
9828 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9829 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9830 ctxt->sax->error(ctxt->userData,
9831 "chunk is not well balanced\n");
9832 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009833 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009834 }
9835
9836 if (!ctxt->wellFormed) {
9837 if (ctxt->errNo == 0)
9838 ret = 1;
9839 else
9840 ret = ctxt->errNo;
9841 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009842 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009843 xmlNodePtr cur;
9844
9845 /*
9846 * Return the newly created nodeset after unlinking it from
9847 * they pseudo parent.
9848 */
9849 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009850 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009851 while (cur != NULL) {
9852 cur->parent = NULL;
9853 cur = cur->next;
9854 }
9855 newDoc->children->children = NULL;
9856 }
9857 ret = 0;
9858 }
9859 ctxt->sax = oldsax;
9860 xmlFreeParserCtxt(ctxt);
9861 newDoc->intSubset = NULL;
9862 newDoc->extSubset = NULL;
9863 xmlFreeDoc(newDoc);
9864
9865 return(ret);
9866}
9867
9868/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009869 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009870 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009871 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009872 * @sax: the SAX handler bloc (possibly NULL)
9873 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9874 * @depth: Used for loop detection, use 0
9875 * @URL: the URL for the entity to load
9876 * @ID: the System ID for the entity to load
9877 * @list: the return value for the set of parsed nodes
9878 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009879 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009880 *
9881 * Returns 0 if the entity is well formed, -1 in case of args problem and
9882 * the parser error code otherwise
9883 */
9884
Daniel Veillard257d9102001-05-08 10:41:44 +00009885static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009886xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9887 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009888 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009889 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009890 xmlParserCtxtPtr ctxt;
9891 xmlDocPtr newDoc;
9892 xmlSAXHandlerPtr oldsax = NULL;
9893 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009894 xmlChar start[4];
9895 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009896
9897 if (depth > 40) {
9898 return(XML_ERR_ENTITY_LOOP);
9899 }
9900
9901
9902
9903 if (list != NULL)
9904 *list = NULL;
9905 if ((URL == NULL) && (ID == NULL))
9906 return(-1);
9907 if (doc == NULL) /* @@ relax but check for dereferences */
9908 return(-1);
9909
9910
9911 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9912 if (ctxt == NULL) return(-1);
9913 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009914 if (oldctxt != NULL) {
9915 ctxt->_private = oldctxt->_private;
9916 ctxt->loadsubset = oldctxt->loadsubset;
9917 ctxt->validate = oldctxt->validate;
9918 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009919 ctxt->record_info = oldctxt->record_info;
9920 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
9921 ctxt->node_seq.length = oldctxt->node_seq.length;
9922 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009923 } else {
9924 /*
9925 * Doing validity checking on chunk without context
9926 * doesn't make sense
9927 */
9928 ctxt->_private = NULL;
9929 ctxt->validate = 0;
9930 ctxt->external = 2;
9931 ctxt->loadsubset = 0;
9932 }
Owen Taylor3473f882001-02-23 17:55:21 +00009933 if (sax != NULL) {
9934 oldsax = ctxt->sax;
9935 ctxt->sax = sax;
9936 if (user_data != NULL)
9937 ctxt->userData = user_data;
9938 }
9939 newDoc = xmlNewDoc(BAD_CAST "1.0");
9940 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009941 ctxt->node_seq.maximum = 0;
9942 ctxt->node_seq.length = 0;
9943 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009944 xmlFreeParserCtxt(ctxt);
9945 return(-1);
9946 }
9947 if (doc != NULL) {
9948 newDoc->intSubset = doc->intSubset;
9949 newDoc->extSubset = doc->extSubset;
9950 }
9951 if (doc->URL != NULL) {
9952 newDoc->URL = xmlStrdup(doc->URL);
9953 }
9954 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9955 if (newDoc->children == NULL) {
9956 if (sax != NULL)
9957 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009958 ctxt->node_seq.maximum = 0;
9959 ctxt->node_seq.length = 0;
9960 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009961 xmlFreeParserCtxt(ctxt);
9962 newDoc->intSubset = NULL;
9963 newDoc->extSubset = NULL;
9964 xmlFreeDoc(newDoc);
9965 return(-1);
9966 }
9967 nodePush(ctxt, newDoc->children);
9968 if (doc == NULL) {
9969 ctxt->myDoc = newDoc;
9970 } else {
9971 ctxt->myDoc = doc;
9972 newDoc->children->doc = doc;
9973 }
9974
Daniel Veillard87a764e2001-06-20 17:41:10 +00009975 /*
9976 * Get the 4 first bytes and decode the charset
9977 * if enc != XML_CHAR_ENCODING_NONE
9978 * plug some encoding conversion routines.
9979 */
9980 GROW;
9981 start[0] = RAW;
9982 start[1] = NXT(1);
9983 start[2] = NXT(2);
9984 start[3] = NXT(3);
9985 enc = xmlDetectCharEncoding(start, 4);
9986 if (enc != XML_CHAR_ENCODING_NONE) {
9987 xmlSwitchEncoding(ctxt, enc);
9988 }
9989
Owen Taylor3473f882001-02-23 17:55:21 +00009990 /*
9991 * Parse a possible text declaration first
9992 */
Owen Taylor3473f882001-02-23 17:55:21 +00009993 if ((RAW == '<') && (NXT(1) == '?') &&
9994 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9995 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9996 xmlParseTextDecl(ctxt);
9997 }
9998
Owen Taylor3473f882001-02-23 17:55:21 +00009999 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010000 ctxt->depth = depth;
10001
10002 xmlParseContent(ctxt);
10003
Daniel Veillard561b7f82002-03-20 21:55:57 +000010004 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010005 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10007 ctxt->sax->error(ctxt->userData,
10008 "chunk is not well balanced\n");
10009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +000010011 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +000010012 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10014 ctxt->sax->error(ctxt->userData,
10015 "extra content at the end of well balanced chunk\n");
10016 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010017 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010018 }
10019 if (ctxt->node != newDoc->children) {
10020 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10022 ctxt->sax->error(ctxt->userData,
10023 "chunk is not well balanced\n");
10024 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010026 }
10027
10028 if (!ctxt->wellFormed) {
10029 if (ctxt->errNo == 0)
10030 ret = 1;
10031 else
10032 ret = ctxt->errNo;
10033 } else {
10034 if (list != NULL) {
10035 xmlNodePtr cur;
10036
10037 /*
10038 * Return the newly created nodeset after unlinking it from
10039 * they pseudo parent.
10040 */
10041 cur = newDoc->children->children;
10042 *list = cur;
10043 while (cur != NULL) {
10044 cur->parent = NULL;
10045 cur = cur->next;
10046 }
10047 newDoc->children->children = NULL;
10048 }
10049 ret = 0;
10050 }
10051 if (sax != NULL)
10052 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010053 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10054 oldctxt->node_seq.length = ctxt->node_seq.length;
10055 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010056 ctxt->node_seq.maximum = 0;
10057 ctxt->node_seq.length = 0;
10058 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010059 xmlFreeParserCtxt(ctxt);
10060 newDoc->intSubset = NULL;
10061 newDoc->extSubset = NULL;
10062 xmlFreeDoc(newDoc);
10063
10064 return(ret);
10065}
10066
10067/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010068 * xmlParseExternalEntity:
10069 * @doc: the document the chunk pertains to
10070 * @sax: the SAX handler bloc (possibly NULL)
10071 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10072 * @depth: Used for loop detection, use 0
10073 * @URL: the URL for the entity to load
10074 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010075 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010076 *
10077 * Parse an external general entity
10078 * An external general parsed entity is well-formed if it matches the
10079 * production labeled extParsedEnt.
10080 *
10081 * [78] extParsedEnt ::= TextDecl? content
10082 *
10083 * Returns 0 if the entity is well formed, -1 in case of args problem and
10084 * the parser error code otherwise
10085 */
10086
10087int
10088xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010089 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010090 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010091 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010092}
10093
10094/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010095 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010096 * @doc: the document the chunk pertains to
10097 * @sax: the SAX handler bloc (possibly NULL)
10098 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10099 * @depth: Used for loop detection, use 0
10100 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010101 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010102 *
10103 * Parse a well-balanced chunk of an XML document
10104 * called by the parser
10105 * The allowed sequence for the Well Balanced Chunk is the one defined by
10106 * the content production in the XML grammar:
10107 *
10108 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10109 *
10110 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10111 * the parser error code otherwise
10112 */
10113
10114int
10115xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010116 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010117 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10118 depth, string, lst, 0 );
10119}
10120
10121/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010122 * xmlParseBalancedChunkMemoryInternal:
10123 * @oldctxt: the existing parsing context
10124 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10125 * @user_data: the user data field for the parser context
10126 * @lst: the return value for the set of parsed nodes
10127 *
10128 *
10129 * Parse a well-balanced chunk of an XML document
10130 * called by the parser
10131 * The allowed sequence for the Well Balanced Chunk is the one defined by
10132 * the content production in the XML grammar:
10133 *
10134 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10135 *
10136 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10137 * the parser error code otherwise
10138 *
10139 * In case recover is set to 1, the nodelist will not be empty even if
10140 * the parsed chunk is not well balanced.
10141 */
10142static int
10143xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10144 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10145 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010146 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010147 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010148 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010149 int size;
10150 int ret = 0;
10151
10152 if (oldctxt->depth > 40) {
10153 return(XML_ERR_ENTITY_LOOP);
10154 }
10155
10156
10157 if (lst != NULL)
10158 *lst = NULL;
10159 if (string == NULL)
10160 return(-1);
10161
10162 size = xmlStrlen(string);
10163
10164 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10165 if (ctxt == NULL) return(-1);
10166 if (user_data != NULL)
10167 ctxt->userData = user_data;
10168 else
10169 ctxt->userData = ctxt;
10170
10171 oldsax = ctxt->sax;
10172 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010173 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010174 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010175 newDoc = xmlNewDoc(BAD_CAST "1.0");
10176 if (newDoc == NULL) {
10177 ctxt->sax = oldsax;
10178 xmlFreeParserCtxt(ctxt);
10179 return(-1);
10180 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010181 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010182 } else {
10183 ctxt->myDoc = oldctxt->myDoc;
10184 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010185 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010186 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010187 BAD_CAST "pseudoroot", NULL);
10188 if (ctxt->myDoc->children == NULL) {
10189 ctxt->sax = oldsax;
10190 xmlFreeParserCtxt(ctxt);
10191 if (newDoc != NULL)
10192 xmlFreeDoc(newDoc);
10193 return(-1);
10194 }
10195 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010196 ctxt->instate = XML_PARSER_CONTENT;
10197 ctxt->depth = oldctxt->depth + 1;
10198
Daniel Veillard328f48c2002-11-15 15:24:34 +000010199 ctxt->validate = 0;
10200 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010201 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10202 /*
10203 * ID/IDREF registration will be done in xmlValidateElement below
10204 */
10205 ctxt->loadsubset |= XML_SKIP_IDS;
10206 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010207
Daniel Veillard68e9e742002-11-16 15:35:11 +000010208 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010209 if ((RAW == '<') && (NXT(1) == '/')) {
10210 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10212 ctxt->sax->error(ctxt->userData,
10213 "chunk is not well balanced\n");
10214 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010215 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010216 } else if (RAW != 0) {
10217 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10219 ctxt->sax->error(ctxt->userData,
10220 "extra content at the end of well balanced chunk\n");
10221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010223 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010224 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010225 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10227 ctxt->sax->error(ctxt->userData,
10228 "chunk is not well balanced\n");
10229 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010230 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010231 }
10232
10233 if (!ctxt->wellFormed) {
10234 if (ctxt->errNo == 0)
10235 ret = 1;
10236 else
10237 ret = ctxt->errNo;
10238 } else {
10239 ret = 0;
10240 }
10241
10242 if ((lst != NULL) && (ret == 0)) {
10243 xmlNodePtr cur;
10244
10245 /*
10246 * Return the newly created nodeset after unlinking it from
10247 * they pseudo parent.
10248 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010249 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010250 *lst = cur;
10251 while (cur != NULL) {
Daniel Veillard8d589042003-02-04 15:07:21 +000010252 if (oldctxt->validate && oldctxt->wellFormed &&
10253 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10254 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10255 oldctxt->myDoc, cur);
10256 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010257 cur->parent = NULL;
10258 cur = cur->next;
10259 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010260 ctxt->myDoc->children->children = NULL;
10261 }
10262 if (ctxt->myDoc != NULL) {
10263 xmlFreeNode(ctxt->myDoc->children);
10264 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010265 }
10266
10267 ctxt->sax = oldsax;
10268 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010269 if (newDoc != NULL)
10270 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010271
10272 return(ret);
10273}
10274
10275/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010276 * xmlParseBalancedChunkMemoryRecover:
10277 * @doc: the document the chunk pertains to
10278 * @sax: the SAX handler bloc (possibly NULL)
10279 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10280 * @depth: Used for loop detection, use 0
10281 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10282 * @lst: the return value for the set of parsed nodes
10283 * @recover: return nodes even if the data is broken (use 0)
10284 *
10285 *
10286 * Parse a well-balanced chunk of an XML document
10287 * called by the parser
10288 * The allowed sequence for the Well Balanced Chunk is the one defined by
10289 * the content production in the XML grammar:
10290 *
10291 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10292 *
10293 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10294 * the parser error code otherwise
10295 *
10296 * In case recover is set to 1, the nodelist will not be empty even if
10297 * the parsed chunk is not well balanced.
10298 */
10299int
10300xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10301 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10302 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010303 xmlParserCtxtPtr ctxt;
10304 xmlDocPtr newDoc;
10305 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010306 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010307 int size;
10308 int ret = 0;
10309
10310 if (depth > 40) {
10311 return(XML_ERR_ENTITY_LOOP);
10312 }
10313
10314
Daniel Veillardcda96922001-08-21 10:56:31 +000010315 if (lst != NULL)
10316 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010317 if (string == NULL)
10318 return(-1);
10319
10320 size = xmlStrlen(string);
10321
10322 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10323 if (ctxt == NULL) return(-1);
10324 ctxt->userData = ctxt;
10325 if (sax != NULL) {
10326 oldsax = ctxt->sax;
10327 ctxt->sax = sax;
10328 if (user_data != NULL)
10329 ctxt->userData = user_data;
10330 }
10331 newDoc = xmlNewDoc(BAD_CAST "1.0");
10332 if (newDoc == NULL) {
10333 xmlFreeParserCtxt(ctxt);
10334 return(-1);
10335 }
10336 if (doc != NULL) {
10337 newDoc->intSubset = doc->intSubset;
10338 newDoc->extSubset = doc->extSubset;
10339 }
10340 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10341 if (newDoc->children == NULL) {
10342 if (sax != NULL)
10343 ctxt->sax = oldsax;
10344 xmlFreeParserCtxt(ctxt);
10345 newDoc->intSubset = NULL;
10346 newDoc->extSubset = NULL;
10347 xmlFreeDoc(newDoc);
10348 return(-1);
10349 }
10350 nodePush(ctxt, newDoc->children);
10351 if (doc == NULL) {
10352 ctxt->myDoc = newDoc;
10353 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010354 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010355 newDoc->children->doc = doc;
10356 }
10357 ctxt->instate = XML_PARSER_CONTENT;
10358 ctxt->depth = depth;
10359
10360 /*
10361 * Doing validity checking on chunk doesn't make sense
10362 */
10363 ctxt->validate = 0;
10364 ctxt->loadsubset = 0;
10365
Daniel Veillardb39bc392002-10-26 19:29:51 +000010366 if ( doc != NULL ){
10367 content = doc->children;
10368 doc->children = NULL;
10369 xmlParseContent(ctxt);
10370 doc->children = content;
10371 }
10372 else {
10373 xmlParseContent(ctxt);
10374 }
Owen Taylor3473f882001-02-23 17:55:21 +000010375 if ((RAW == '<') && (NXT(1) == '/')) {
10376 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10378 ctxt->sax->error(ctxt->userData,
10379 "chunk is not well balanced\n");
10380 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010381 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010382 } else if (RAW != 0) {
10383 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10385 ctxt->sax->error(ctxt->userData,
10386 "extra content at the end of well balanced chunk\n");
10387 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010388 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010389 }
10390 if (ctxt->node != newDoc->children) {
10391 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10393 ctxt->sax->error(ctxt->userData,
10394 "chunk is not well balanced\n");
10395 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010396 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010397 }
10398
10399 if (!ctxt->wellFormed) {
10400 if (ctxt->errNo == 0)
10401 ret = 1;
10402 else
10403 ret = ctxt->errNo;
10404 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010405 ret = 0;
10406 }
10407
10408 if (lst != NULL && (ret == 0 || recover == 1)) {
10409 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010410
10411 /*
10412 * Return the newly created nodeset after unlinking it from
10413 * they pseudo parent.
10414 */
10415 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010416 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010417 while (cur != NULL) {
10418 cur->parent = NULL;
10419 cur = cur->next;
10420 }
10421 newDoc->children->children = NULL;
10422 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010423
Owen Taylor3473f882001-02-23 17:55:21 +000010424 if (sax != NULL)
10425 ctxt->sax = oldsax;
10426 xmlFreeParserCtxt(ctxt);
10427 newDoc->intSubset = NULL;
10428 newDoc->extSubset = NULL;
10429 xmlFreeDoc(newDoc);
10430
10431 return(ret);
10432}
10433
10434/**
10435 * xmlSAXParseEntity:
10436 * @sax: the SAX handler block
10437 * @filename: the filename
10438 *
10439 * parse an XML external entity out of context and build a tree.
10440 * It use the given SAX function block to handle the parsing callback.
10441 * If sax is NULL, fallback to the default DOM tree building routines.
10442 *
10443 * [78] extParsedEnt ::= TextDecl? content
10444 *
10445 * This correspond to a "Well Balanced" chunk
10446 *
10447 * Returns the resulting document tree
10448 */
10449
10450xmlDocPtr
10451xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10452 xmlDocPtr ret;
10453 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010454
10455 ctxt = xmlCreateFileParserCtxt(filename);
10456 if (ctxt == NULL) {
10457 return(NULL);
10458 }
10459 if (sax != NULL) {
10460 if (ctxt->sax != NULL)
10461 xmlFree(ctxt->sax);
10462 ctxt->sax = sax;
10463 ctxt->userData = NULL;
10464 }
10465
Owen Taylor3473f882001-02-23 17:55:21 +000010466 xmlParseExtParsedEnt(ctxt);
10467
10468 if (ctxt->wellFormed)
10469 ret = ctxt->myDoc;
10470 else {
10471 ret = NULL;
10472 xmlFreeDoc(ctxt->myDoc);
10473 ctxt->myDoc = NULL;
10474 }
10475 if (sax != NULL)
10476 ctxt->sax = NULL;
10477 xmlFreeParserCtxt(ctxt);
10478
10479 return(ret);
10480}
10481
10482/**
10483 * xmlParseEntity:
10484 * @filename: the filename
10485 *
10486 * parse an XML external entity out of context and build a tree.
10487 *
10488 * [78] extParsedEnt ::= TextDecl? content
10489 *
10490 * This correspond to a "Well Balanced" chunk
10491 *
10492 * Returns the resulting document tree
10493 */
10494
10495xmlDocPtr
10496xmlParseEntity(const char *filename) {
10497 return(xmlSAXParseEntity(NULL, filename));
10498}
10499
10500/**
10501 * xmlCreateEntityParserCtxt:
10502 * @URL: the entity URL
10503 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010504 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010505 *
10506 * Create a parser context for an external entity
10507 * Automatic support for ZLIB/Compress compressed document is provided
10508 * by default if found at compile-time.
10509 *
10510 * Returns the new parser context or NULL
10511 */
10512xmlParserCtxtPtr
10513xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10514 const xmlChar *base) {
10515 xmlParserCtxtPtr ctxt;
10516 xmlParserInputPtr inputStream;
10517 char *directory = NULL;
10518 xmlChar *uri;
10519
10520 ctxt = xmlNewParserCtxt();
10521 if (ctxt == NULL) {
10522 return(NULL);
10523 }
10524
10525 uri = xmlBuildURI(URL, base);
10526
10527 if (uri == NULL) {
10528 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10529 if (inputStream == NULL) {
10530 xmlFreeParserCtxt(ctxt);
10531 return(NULL);
10532 }
10533
10534 inputPush(ctxt, inputStream);
10535
10536 if ((ctxt->directory == NULL) && (directory == NULL))
10537 directory = xmlParserGetDirectory((char *)URL);
10538 if ((ctxt->directory == NULL) && (directory != NULL))
10539 ctxt->directory = directory;
10540 } else {
10541 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10542 if (inputStream == NULL) {
10543 xmlFree(uri);
10544 xmlFreeParserCtxt(ctxt);
10545 return(NULL);
10546 }
10547
10548 inputPush(ctxt, inputStream);
10549
10550 if ((ctxt->directory == NULL) && (directory == NULL))
10551 directory = xmlParserGetDirectory((char *)uri);
10552 if ((ctxt->directory == NULL) && (directory != NULL))
10553 ctxt->directory = directory;
10554 xmlFree(uri);
10555 }
10556
10557 return(ctxt);
10558}
10559
10560/************************************************************************
10561 * *
10562 * Front ends when parsing from a file *
10563 * *
10564 ************************************************************************/
10565
10566/**
10567 * xmlCreateFileParserCtxt:
10568 * @filename: the filename
10569 *
10570 * Create a parser context for a file content.
10571 * Automatic support for ZLIB/Compress compressed document is provided
10572 * by default if found at compile-time.
10573 *
10574 * Returns the new parser context or NULL
10575 */
10576xmlParserCtxtPtr
10577xmlCreateFileParserCtxt(const char *filename)
10578{
10579 xmlParserCtxtPtr ctxt;
10580 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010581 char *directory = NULL;
10582
Owen Taylor3473f882001-02-23 17:55:21 +000010583 ctxt = xmlNewParserCtxt();
10584 if (ctxt == NULL) {
10585 if (xmlDefaultSAXHandler.error != NULL) {
10586 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10587 }
10588 return(NULL);
10589 }
10590
Igor Zlatkovicce076162003-02-23 13:39:39 +000010591
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000010592 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010593 if (inputStream == NULL) {
10594 xmlFreeParserCtxt(ctxt);
10595 return(NULL);
10596 }
10597
Owen Taylor3473f882001-02-23 17:55:21 +000010598 inputPush(ctxt, inputStream);
10599 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010600 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010601 if ((ctxt->directory == NULL) && (directory != NULL))
10602 ctxt->directory = directory;
10603
10604 return(ctxt);
10605}
10606
10607/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010608 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010609 * @sax: the SAX handler block
10610 * @filename: the filename
10611 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10612 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010613 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010614 *
10615 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10616 * compressed document is provided by default if found at compile-time.
10617 * It use the given SAX function block to handle the parsing callback.
10618 * If sax is NULL, fallback to the default DOM tree building routines.
10619 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010620 * User data (void *) is stored within the parser context in the
10621 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010622 *
Owen Taylor3473f882001-02-23 17:55:21 +000010623 * Returns the resulting document tree
10624 */
10625
10626xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010627xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10628 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010629 xmlDocPtr ret;
10630 xmlParserCtxtPtr ctxt;
10631 char *directory = NULL;
10632
Daniel Veillard635ef722001-10-29 11:48:19 +000010633 xmlInitParser();
10634
Owen Taylor3473f882001-02-23 17:55:21 +000010635 ctxt = xmlCreateFileParserCtxt(filename);
10636 if (ctxt == NULL) {
10637 return(NULL);
10638 }
10639 if (sax != NULL) {
10640 if (ctxt->sax != NULL)
10641 xmlFree(ctxt->sax);
10642 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010643 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010644 if (data!=NULL) {
10645 ctxt->_private=data;
10646 }
Owen Taylor3473f882001-02-23 17:55:21 +000010647
10648 if ((ctxt->directory == NULL) && (directory == NULL))
10649 directory = xmlParserGetDirectory(filename);
10650 if ((ctxt->directory == NULL) && (directory != NULL))
10651 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10652
Daniel Veillarddad3f682002-11-17 16:47:27 +000010653 ctxt->recovery = recovery;
10654
Owen Taylor3473f882001-02-23 17:55:21 +000010655 xmlParseDocument(ctxt);
10656
10657 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10658 else {
10659 ret = NULL;
10660 xmlFreeDoc(ctxt->myDoc);
10661 ctxt->myDoc = NULL;
10662 }
10663 if (sax != NULL)
10664 ctxt->sax = NULL;
10665 xmlFreeParserCtxt(ctxt);
10666
10667 return(ret);
10668}
10669
10670/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010671 * xmlSAXParseFile:
10672 * @sax: the SAX handler block
10673 * @filename: the filename
10674 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10675 * documents
10676 *
10677 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10678 * compressed document is provided by default if found at compile-time.
10679 * It use the given SAX function block to handle the parsing callback.
10680 * If sax is NULL, fallback to the default DOM tree building routines.
10681 *
10682 * Returns the resulting document tree
10683 */
10684
10685xmlDocPtr
10686xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10687 int recovery) {
10688 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10689}
10690
10691/**
Owen Taylor3473f882001-02-23 17:55:21 +000010692 * xmlRecoverDoc:
10693 * @cur: a pointer to an array of xmlChar
10694 *
10695 * parse an XML in-memory document and build a tree.
10696 * In the case the document is not Well Formed, a tree is built anyway
10697 *
10698 * Returns the resulting document tree
10699 */
10700
10701xmlDocPtr
10702xmlRecoverDoc(xmlChar *cur) {
10703 return(xmlSAXParseDoc(NULL, cur, 1));
10704}
10705
10706/**
10707 * xmlParseFile:
10708 * @filename: the filename
10709 *
10710 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10711 * compressed document is provided by default if found at compile-time.
10712 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010713 * Returns the resulting document tree if the file was wellformed,
10714 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010715 */
10716
10717xmlDocPtr
10718xmlParseFile(const char *filename) {
10719 return(xmlSAXParseFile(NULL, filename, 0));
10720}
10721
10722/**
10723 * xmlRecoverFile:
10724 * @filename: the filename
10725 *
10726 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10727 * compressed document is provided by default if found at compile-time.
10728 * In the case the document is not Well Formed, a tree is built anyway
10729 *
10730 * Returns the resulting document tree
10731 */
10732
10733xmlDocPtr
10734xmlRecoverFile(const char *filename) {
10735 return(xmlSAXParseFile(NULL, filename, 1));
10736}
10737
10738
10739/**
10740 * xmlSetupParserForBuffer:
10741 * @ctxt: an XML parser context
10742 * @buffer: a xmlChar * buffer
10743 * @filename: a file name
10744 *
10745 * Setup the parser context to parse a new buffer; Clears any prior
10746 * contents from the parser context. The buffer parameter must not be
10747 * NULL, but the filename parameter can be
10748 */
10749void
10750xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10751 const char* filename)
10752{
10753 xmlParserInputPtr input;
10754
10755 input = xmlNewInputStream(ctxt);
10756 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010757 xmlGenericError(xmlGenericErrorContext,
10758 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010759 xmlFree(ctxt);
10760 return;
10761 }
10762
10763 xmlClearParserCtxt(ctxt);
10764 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000010765 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010766 input->base = buffer;
10767 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010768 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010769 inputPush(ctxt, input);
10770}
10771
10772/**
10773 * xmlSAXUserParseFile:
10774 * @sax: a SAX handler
10775 * @user_data: The user data returned on SAX callbacks
10776 * @filename: a file name
10777 *
10778 * parse an XML file and call the given SAX handler routines.
10779 * Automatic support for ZLIB/Compress compressed document is provided
10780 *
10781 * Returns 0 in case of success or a error number otherwise
10782 */
10783int
10784xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10785 const char *filename) {
10786 int ret = 0;
10787 xmlParserCtxtPtr ctxt;
10788
10789 ctxt = xmlCreateFileParserCtxt(filename);
10790 if (ctxt == NULL) return -1;
10791 if (ctxt->sax != &xmlDefaultSAXHandler)
10792 xmlFree(ctxt->sax);
10793 ctxt->sax = sax;
10794 if (user_data != NULL)
10795 ctxt->userData = user_data;
10796
10797 xmlParseDocument(ctxt);
10798
10799 if (ctxt->wellFormed)
10800 ret = 0;
10801 else {
10802 if (ctxt->errNo != 0)
10803 ret = ctxt->errNo;
10804 else
10805 ret = -1;
10806 }
10807 if (sax != NULL)
10808 ctxt->sax = NULL;
10809 xmlFreeParserCtxt(ctxt);
10810
10811 return ret;
10812}
10813
10814/************************************************************************
10815 * *
10816 * Front ends when parsing from memory *
10817 * *
10818 ************************************************************************/
10819
10820/**
10821 * xmlCreateMemoryParserCtxt:
10822 * @buffer: a pointer to a char array
10823 * @size: the size of the array
10824 *
10825 * Create a parser context for an XML in-memory document.
10826 *
10827 * Returns the new parser context or NULL
10828 */
10829xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010830xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010831 xmlParserCtxtPtr ctxt;
10832 xmlParserInputPtr input;
10833 xmlParserInputBufferPtr buf;
10834
10835 if (buffer == NULL)
10836 return(NULL);
10837 if (size <= 0)
10838 return(NULL);
10839
10840 ctxt = xmlNewParserCtxt();
10841 if (ctxt == NULL)
10842 return(NULL);
10843
10844 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010845 if (buf == NULL) {
10846 xmlFreeParserCtxt(ctxt);
10847 return(NULL);
10848 }
Owen Taylor3473f882001-02-23 17:55:21 +000010849
10850 input = xmlNewInputStream(ctxt);
10851 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010852 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010853 xmlFreeParserCtxt(ctxt);
10854 return(NULL);
10855 }
10856
10857 input->filename = NULL;
10858 input->buf = buf;
10859 input->base = input->buf->buffer->content;
10860 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010861 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010862
10863 inputPush(ctxt, input);
10864 return(ctxt);
10865}
10866
10867/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010868 * xmlSAXParseMemoryWithData:
10869 * @sax: the SAX handler block
10870 * @buffer: an pointer to a char array
10871 * @size: the size of the array
10872 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10873 * documents
10874 * @data: the userdata
10875 *
10876 * parse an XML in-memory block and use the given SAX function block
10877 * to handle the parsing callback. If sax is NULL, fallback to the default
10878 * DOM tree building routines.
10879 *
10880 * User data (void *) is stored within the parser context in the
10881 * context's _private member, so it is available nearly everywhere in libxml
10882 *
10883 * Returns the resulting document tree
10884 */
10885
10886xmlDocPtr
10887xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10888 int size, int recovery, void *data) {
10889 xmlDocPtr ret;
10890 xmlParserCtxtPtr ctxt;
10891
10892 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10893 if (ctxt == NULL) return(NULL);
10894 if (sax != NULL) {
10895 if (ctxt->sax != NULL)
10896 xmlFree(ctxt->sax);
10897 ctxt->sax = sax;
10898 }
10899 if (data!=NULL) {
10900 ctxt->_private=data;
10901 }
10902
Daniel Veillardadba5f12003-04-04 16:09:01 +000010903 ctxt->recovery = recovery;
10904
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010905 xmlParseDocument(ctxt);
10906
10907 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10908 else {
10909 ret = NULL;
10910 xmlFreeDoc(ctxt->myDoc);
10911 ctxt->myDoc = NULL;
10912 }
10913 if (sax != NULL)
10914 ctxt->sax = NULL;
10915 xmlFreeParserCtxt(ctxt);
10916
10917 return(ret);
10918}
10919
10920/**
Owen Taylor3473f882001-02-23 17:55:21 +000010921 * xmlSAXParseMemory:
10922 * @sax: the SAX handler block
10923 * @buffer: an pointer to a char array
10924 * @size: the size of the array
10925 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10926 * documents
10927 *
10928 * parse an XML in-memory block and use the given SAX function block
10929 * to handle the parsing callback. If sax is NULL, fallback to the default
10930 * DOM tree building routines.
10931 *
10932 * Returns the resulting document tree
10933 */
10934xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010935xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10936 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010937 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010938}
10939
10940/**
10941 * xmlParseMemory:
10942 * @buffer: an pointer to a char array
10943 * @size: the size of the array
10944 *
10945 * parse an XML in-memory block and build a tree.
10946 *
10947 * Returns the resulting document tree
10948 */
10949
Daniel Veillard50822cb2001-07-26 20:05:51 +000010950xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010951 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10952}
10953
10954/**
10955 * xmlRecoverMemory:
10956 * @buffer: an pointer to a char array
10957 * @size: the size of the array
10958 *
10959 * parse an XML in-memory block and build a tree.
10960 * In the case the document is not Well Formed, a tree is built anyway
10961 *
10962 * Returns the resulting document tree
10963 */
10964
Daniel Veillard50822cb2001-07-26 20:05:51 +000010965xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010966 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10967}
10968
10969/**
10970 * xmlSAXUserParseMemory:
10971 * @sax: a SAX handler
10972 * @user_data: The user data returned on SAX callbacks
10973 * @buffer: an in-memory XML document input
10974 * @size: the length of the XML document in bytes
10975 *
10976 * A better SAX parsing routine.
10977 * parse an XML in-memory buffer and call the given SAX handler routines.
10978 *
10979 * Returns 0 in case of success or a error number otherwise
10980 */
10981int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010982 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010983 int ret = 0;
10984 xmlParserCtxtPtr ctxt;
10985 xmlSAXHandlerPtr oldsax = NULL;
10986
Daniel Veillard9e923512002-08-14 08:48:52 +000010987 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010988 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10989 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010990 oldsax = ctxt->sax;
10991 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010992 if (user_data != NULL)
10993 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010994
10995 xmlParseDocument(ctxt);
10996
10997 if (ctxt->wellFormed)
10998 ret = 0;
10999 else {
11000 if (ctxt->errNo != 0)
11001 ret = ctxt->errNo;
11002 else
11003 ret = -1;
11004 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011005 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011006 xmlFreeParserCtxt(ctxt);
11007
11008 return ret;
11009}
11010
11011/**
11012 * xmlCreateDocParserCtxt:
11013 * @cur: a pointer to an array of xmlChar
11014 *
11015 * Creates a parser context for an XML in-memory document.
11016 *
11017 * Returns the new parser context or NULL
11018 */
11019xmlParserCtxtPtr
11020xmlCreateDocParserCtxt(xmlChar *cur) {
11021 int len;
11022
11023 if (cur == NULL)
11024 return(NULL);
11025 len = xmlStrlen(cur);
11026 return(xmlCreateMemoryParserCtxt((char *)cur, len));
11027}
11028
11029/**
11030 * xmlSAXParseDoc:
11031 * @sax: the SAX handler block
11032 * @cur: a pointer to an array of xmlChar
11033 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11034 * documents
11035 *
11036 * parse an XML in-memory document and build a tree.
11037 * It use the given SAX function block to handle the parsing callback.
11038 * If sax is NULL, fallback to the default DOM tree building routines.
11039 *
11040 * Returns the resulting document tree
11041 */
11042
11043xmlDocPtr
11044xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11045 xmlDocPtr ret;
11046 xmlParserCtxtPtr ctxt;
11047
11048 if (cur == NULL) return(NULL);
11049
11050
11051 ctxt = xmlCreateDocParserCtxt(cur);
11052 if (ctxt == NULL) return(NULL);
11053 if (sax != NULL) {
11054 ctxt->sax = sax;
11055 ctxt->userData = NULL;
11056 }
11057
11058 xmlParseDocument(ctxt);
11059 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11060 else {
11061 ret = NULL;
11062 xmlFreeDoc(ctxt->myDoc);
11063 ctxt->myDoc = NULL;
11064 }
11065 if (sax != NULL)
11066 ctxt->sax = NULL;
11067 xmlFreeParserCtxt(ctxt);
11068
11069 return(ret);
11070}
11071
11072/**
11073 * xmlParseDoc:
11074 * @cur: a pointer to an array of xmlChar
11075 *
11076 * parse an XML in-memory document and build a tree.
11077 *
11078 * Returns the resulting document tree
11079 */
11080
11081xmlDocPtr
11082xmlParseDoc(xmlChar *cur) {
11083 return(xmlSAXParseDoc(NULL, cur, 0));
11084}
11085
Daniel Veillard8107a222002-01-13 14:10:10 +000011086/************************************************************************
11087 * *
11088 * Specific function to keep track of entities references *
11089 * and used by the XSLT debugger *
11090 * *
11091 ************************************************************************/
11092
11093static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11094
11095/**
11096 * xmlAddEntityReference:
11097 * @ent : A valid entity
11098 * @firstNode : A valid first node for children of entity
11099 * @lastNode : A valid last node of children entity
11100 *
11101 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11102 */
11103static void
11104xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11105 xmlNodePtr lastNode)
11106{
11107 if (xmlEntityRefFunc != NULL) {
11108 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11109 }
11110}
11111
11112
11113/**
11114 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011115 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011116 *
11117 * Set the function to call call back when a xml reference has been made
11118 */
11119void
11120xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11121{
11122 xmlEntityRefFunc = func;
11123}
Owen Taylor3473f882001-02-23 17:55:21 +000011124
11125/************************************************************************
11126 * *
11127 * Miscellaneous *
11128 * *
11129 ************************************************************************/
11130
11131#ifdef LIBXML_XPATH_ENABLED
11132#include <libxml/xpath.h>
11133#endif
11134
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011135extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011136static int xmlParserInitialized = 0;
11137
11138/**
11139 * xmlInitParser:
11140 *
11141 * Initialization function for the XML parser.
11142 * This is not reentrant. Call once before processing in case of
11143 * use in multithreaded programs.
11144 */
11145
11146void
11147xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011148 if (xmlParserInitialized != 0)
11149 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011150
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011151 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11152 (xmlGenericError == NULL))
11153 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011154 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011155 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011156 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011157 xmlInitCharEncodingHandlers();
11158 xmlInitializePredefinedEntities();
11159 xmlDefaultSAXHandlerInit();
11160 xmlRegisterDefaultInputCallbacks();
11161 xmlRegisterDefaultOutputCallbacks();
11162#ifdef LIBXML_HTML_ENABLED
11163 htmlInitAutoClose();
11164 htmlDefaultSAXHandlerInit();
11165#endif
11166#ifdef LIBXML_XPATH_ENABLED
11167 xmlXPathInit();
11168#endif
11169 xmlParserInitialized = 1;
11170}
11171
11172/**
11173 * xmlCleanupParser:
11174 *
11175 * Cleanup function for the XML parser. It tries to reclaim all
11176 * parsing related global memory allocated for the parser processing.
11177 * It doesn't deallocate any document related memory. Calling this
11178 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011179 * One should call xmlCleanupParser() only when the process has
11180 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011181 */
11182
11183void
11184xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000011185 xmlCleanupCharEncodingHandlers();
11186 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011187#ifdef LIBXML_CATALOG_ENABLED
11188 xmlCatalogCleanup();
11189#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011190 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011191 xmlCleanupGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011192 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011193}