blob: 8c75a3553198715ab2eafcc3d544fad2acd91d69 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard21a0f912001-02-25 19:54:14 +000088#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000089#define XML_PARSER_BUFFER_SIZE 100
90
Daniel Veillard5997aca2002-03-18 18:36:20 +000091#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
92
Owen Taylor3473f882001-02-23 17:55:21 +000093/*
Owen Taylor3473f882001-02-23 17:55:21 +000094 * List of XML prefixed PI allowed by W3C specs
95 */
96
Daniel Veillardb44025c2001-10-11 22:55:55 +000097static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000098 "xml-stylesheet",
99 NULL
100};
101
102/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000103xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
104 const xmlChar **str);
105
Daniel Veillard257d9102001-05-08 10:41:44 +0000106static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000107xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
108 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000109 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000111
Daniel Veillard8107a222002-01-13 14:10:10 +0000112static void
113xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
114 xmlNodePtr lastNode);
115
Daniel Veillard328f48c2002-11-15 15:24:34 +0000116static int
117xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
118 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000119/************************************************************************
120 * *
121 * Parser stacks related functions and macros *
122 * *
123 ************************************************************************/
124
125xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
126 const xmlChar ** str);
127
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128/**
129 * inputPush:
130 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000131 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000132 *
133 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000134 *
135 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000137extern int
138inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
139{
140 if (ctxt->inputNr >= ctxt->inputMax) {
141 ctxt->inputMax *= 2;
142 ctxt->inputTab =
143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
144 ctxt->inputMax *
145 sizeof(ctxt->inputTab[0]));
146 if (ctxt->inputTab == NULL) {
147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
148 return (0);
149 }
150 }
151 ctxt->inputTab[ctxt->inputNr] = value;
152 ctxt->input = value;
153 return (ctxt->inputNr++);
154}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000155/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000156 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000157 * @ctxt: an XML parser context
158 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000159 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000160 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000161 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000163extern xmlParserInputPtr
164inputPop(xmlParserCtxtPtr ctxt)
165{
166 xmlParserInputPtr ret;
167
168 if (ctxt->inputNr <= 0)
169 return (0);
170 ctxt->inputNr--;
171 if (ctxt->inputNr > 0)
172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
173 else
174 ctxt->input = NULL;
175 ret = ctxt->inputTab[ctxt->inputNr];
176 ctxt->inputTab[ctxt->inputNr] = 0;
177 return (ret);
178}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179/**
180 * nodePush:
181 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000182 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 *
184 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000185 *
186 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000187 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000188extern int
189nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
190{
191 if (ctxt->nodeNr >= ctxt->nodeMax) {
192 ctxt->nodeMax *= 2;
193 ctxt->nodeTab =
194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
195 ctxt->nodeMax *
196 sizeof(ctxt->nodeTab[0]));
197 if (ctxt->nodeTab == NULL) {
198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
199 return (0);
200 }
201 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202#ifdef MAX_DEPTH
203 if (ctxt->nodeNr > MAX_DEPTH) {
204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
205 ctxt->sax->error(ctxt->userData,
206 "Excessive depth in document: change MAX_DEPTH = %d\n",
207 MAX_DEPTH);
208 ctxt->wellFormed = 0;
209 ctxt->instate = XML_PARSER_EOF;
210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
211 return(0);
212 }
213#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000214 ctxt->nodeTab[ctxt->nodeNr] = value;
215 ctxt->node = value;
216 return (ctxt->nodeNr++);
217}
218/**
219 * nodePop:
220 * @ctxt: an XML parser context
221 *
222 * Pops the top element node from the node stack
223 *
224 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000226extern xmlNodePtr
227nodePop(xmlParserCtxtPtr ctxt)
228{
229 xmlNodePtr ret;
230
231 if (ctxt->nodeNr <= 0)
232 return (0);
233 ctxt->nodeNr--;
234 if (ctxt->nodeNr > 0)
235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
236 else
237 ctxt->node = NULL;
238 ret = ctxt->nodeTab[ctxt->nodeNr];
239 ctxt->nodeTab[ctxt->nodeNr] = 0;
240 return (ret);
241}
242/**
243 * namePush:
244 * @ctxt: an XML parser context
245 * @value: the element name
246 *
247 * Pushes a new element name on top of the name stack
248 *
249 * Returns 0 in case of error, the index in the stack otherwise
250 */
251extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000252namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +0000253{
254 if (ctxt->nameNr >= ctxt->nameMax) {
255 ctxt->nameMax *= 2;
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000256 ctxt->nameTab = (const xmlChar * *)
Igor Zlatkovicd37c1392003-08-28 10:34:33 +0000257 xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +0000258 ctxt->nameMax *
259 sizeof(ctxt->nameTab[0]));
260 if (ctxt->nameTab == NULL) {
261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
262 return (0);
263 }
264 }
265 ctxt->nameTab[ctxt->nameNr] = value;
266 ctxt->name = value;
267 return (ctxt->nameNr++);
268}
269/**
270 * namePop:
271 * @ctxt: an XML parser context
272 *
273 * Pops the top element name from the name stack
274 *
275 * Returns the name just removed
276 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000277extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000278namePop(xmlParserCtxtPtr ctxt)
279{
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000280 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +0000281
282 if (ctxt->nameNr <= 0)
283 return (0);
284 ctxt->nameNr--;
285 if (ctxt->nameNr > 0)
286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
287 else
288 ctxt->name = NULL;
289 ret = ctxt->nameTab[ctxt->nameNr];
290 ctxt->nameTab[ctxt->nameNr] = 0;
291 return (ret);
292}
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000294static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000295 if (ctxt->spaceNr >= ctxt->spaceMax) {
296 ctxt->spaceMax *= 2;
297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
299 if (ctxt->spaceTab == NULL) {
300 xmlGenericError(xmlGenericErrorContext,
301 "realloc failed !\n");
302 return(0);
303 }
304 }
305 ctxt->spaceTab[ctxt->spaceNr] = val;
306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
307 return(ctxt->spaceNr++);
308}
309
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000311 int ret;
312 if (ctxt->spaceNr <= 0) return(0);
313 ctxt->spaceNr--;
314 if (ctxt->spaceNr > 0)
315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
316 else
317 ctxt->space = NULL;
318 ret = ctxt->spaceTab[ctxt->spaceNr];
319 ctxt->spaceTab[ctxt->spaceNr] = -1;
320 return(ret);
321}
322
323/*
324 * Macros for accessing the content. Those should be used only by the parser,
325 * and not exported.
326 *
327 * Dirty macros, i.e. one often need to make assumption on the context to
328 * use them
329 *
330 * CUR_PTR return the current pointer to the xmlChar to be parsed.
331 * To be used with extreme caution since operations consuming
332 * characters may move the input buffer to a different location !
333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
334 * This should be used internally by the parser
335 * only to compare to ASCII values otherwise it would break when
336 * running with UTF-8 encoding.
337 * RAW same as CUR but in the input buffer, bypass any token
338 * extraction that may have been done
339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
340 * to compare on ASCII based substring.
341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +0000342 * strings without newlines within the parser.
343 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
344 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +0000345 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
346 *
347 * NEXT Skip to the next character, this does the proper decoding
348 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +0000349 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +0000350 * CUR_CHAR(l) returns the current unicode character (int), set l
351 * to the number of xmlChars used for the encoding [0-5].
352 * CUR_SCHAR same but operate on a string instead of the context
353 * COPY_BUF copy the current unicode char to the target buffer, increment
354 * the index
355 * GROW, SHRINK handling of input buffers
356 */
357
Daniel Veillardfdc91562002-07-01 21:52:03 +0000358#define RAW (*ctxt->input->cur)
359#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000360#define NXT(val) ctxt->input->cur[(val)]
361#define CUR_PTR ctxt->input->cur
362
363#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000364 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +0000365 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000366 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000367 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
368 xmlPopInput(ctxt); \
369 } while (0)
370
Daniel Veillarda880b122003-04-21 21:36:41 +0000371#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +0000372 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
373 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +0000374 xmlSHRINK (ctxt);
375
376static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
377 xmlParserInputShrink(ctxt->input);
378 if ((*ctxt->input->cur == 0) &&
379 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
380 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000381 }
Owen Taylor3473f882001-02-23 17:55:21 +0000382
Daniel Veillarda880b122003-04-21 21:36:41 +0000383#define GROW if ((ctxt->progressive == 0) && \
384 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +0000385 xmlGROW (ctxt);
386
387static void xmlGROW (xmlParserCtxtPtr ctxt) {
388 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
389 if ((*ctxt->input->cur == 0) &&
390 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
391 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +0000392}
Owen Taylor3473f882001-02-23 17:55:21 +0000393
394#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
395
396#define NEXT xmlNextChar(ctxt)
397
Daniel Veillard21a0f912001-02-25 19:54:14 +0000398#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000399 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000400 ctxt->input->cur++; \
401 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000402 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000403 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
404 }
405
Owen Taylor3473f882001-02-23 17:55:21 +0000406#define NEXTL(l) do { \
407 if (*(ctxt->input->cur) == '\n') { \
408 ctxt->input->line++; ctxt->input->col = 1; \
409 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000410 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000411 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000412 } while (0)
413
414#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
415#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
416
417#define COPY_BUF(l,b,i,v) \
418 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000419 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000420
421/**
422 * xmlSkipBlankChars:
423 * @ctxt: the XML parser context
424 *
425 * skip all blanks character found at that point in the input streams.
426 * It pops up finished entities in the process if allowable at that point.
427 *
428 * Returns the number of space chars skipped
429 */
430
431int
432xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000433 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000434
435 /*
436 * It's Okay to use CUR/NEXT here since all the blanks are on
437 * the ASCII range.
438 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000439 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
440 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000441 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000442 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000443 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000444 cur = ctxt->input->cur;
445 while (IS_BLANK(*cur)) {
446 if (*cur == '\n') {
447 ctxt->input->line++; ctxt->input->col = 1;
448 }
449 cur++;
450 res++;
451 if (*cur == 0) {
452 ctxt->input->cur = cur;
453 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
454 cur = ctxt->input->cur;
455 }
456 }
457 ctxt->input->cur = cur;
458 } else {
459 int cur;
460 do {
461 cur = CUR;
462 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
463 NEXT;
464 cur = CUR;
465 res++;
466 }
467 while ((cur == 0) && (ctxt->inputNr > 1) &&
468 (ctxt->instate != XML_PARSER_COMMENT)) {
469 xmlPopInput(ctxt);
470 cur = CUR;
471 }
472 /*
473 * Need to handle support of entities branching here
474 */
475 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
476 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
477 }
Owen Taylor3473f882001-02-23 17:55:21 +0000478 return(res);
479}
480
481/************************************************************************
482 * *
483 * Commodity functions to handle entities *
484 * *
485 ************************************************************************/
486
487/**
488 * xmlPopInput:
489 * @ctxt: an XML parser context
490 *
491 * xmlPopInput: the current input pointed by ctxt->input came to an end
492 * pop it and return the next char.
493 *
494 * Returns the current xmlChar in the parser context
495 */
496xmlChar
497xmlPopInput(xmlParserCtxtPtr ctxt) {
498 if (ctxt->inputNr == 1) return(0); /* End of main Input */
499 if (xmlParserDebugEntities)
500 xmlGenericError(xmlGenericErrorContext,
501 "Popping input %d\n", ctxt->inputNr);
502 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000503 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000504 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
505 return(xmlPopInput(ctxt));
506 return(CUR);
507}
508
509/**
510 * xmlPushInput:
511 * @ctxt: an XML parser context
512 * @input: an XML parser input fragment (entity, XML fragment ...).
513 *
514 * xmlPushInput: switch to a new input stream which is stacked on top
515 * of the previous one(s).
516 */
517void
518xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
519 if (input == NULL) return;
520
521 if (xmlParserDebugEntities) {
522 if ((ctxt->input != NULL) && (ctxt->input->filename))
523 xmlGenericError(xmlGenericErrorContext,
524 "%s(%d): ", ctxt->input->filename,
525 ctxt->input->line);
526 xmlGenericError(xmlGenericErrorContext,
527 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
528 }
529 inputPush(ctxt, input);
530 GROW;
531}
532
533/**
534 * xmlParseCharRef:
535 * @ctxt: an XML parser context
536 *
537 * parse Reference declarations
538 *
539 * [66] CharRef ::= '&#' [0-9]+ ';' |
540 * '&#x' [0-9a-fA-F]+ ';'
541 *
542 * [ WFC: Legal Character ]
543 * Characters referred to using character references must match the
544 * production for Char.
545 *
546 * Returns the value parsed (as an int), 0 in case of error
547 */
548int
549xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000550 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000551 int count = 0;
552
Owen Taylor3473f882001-02-23 17:55:21 +0000553 /*
554 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
555 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000556 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000557 (NXT(2) == 'x')) {
558 SKIP(3);
559 GROW;
560 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000561 if (count++ > 20) {
562 count = 0;
563 GROW;
564 }
565 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000566 val = val * 16 + (CUR - '0');
567 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
568 val = val * 16 + (CUR - 'a') + 10;
569 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
570 val = val * 16 + (CUR - 'A') + 10;
571 else {
572 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
574 ctxt->sax->error(ctxt->userData,
575 "xmlParseCharRef: invalid hexadecimal value\n");
576 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000577 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000578 val = 0;
579 break;
580 }
581 NEXT;
582 count++;
583 }
584 if (RAW == ';') {
585 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000586 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000587 ctxt->nbChars ++;
588 ctxt->input->cur++;
589 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000590 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000591 SKIP(2);
592 GROW;
593 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000594 if (count++ > 20) {
595 count = 0;
596 GROW;
597 }
598 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000599 val = val * 10 + (CUR - '0');
600 else {
601 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
603 ctxt->sax->error(ctxt->userData,
604 "xmlParseCharRef: invalid decimal value\n");
605 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000606 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000607 val = 0;
608 break;
609 }
610 NEXT;
611 count++;
612 }
613 if (RAW == ';') {
614 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000615 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000616 ctxt->nbChars ++;
617 ctxt->input->cur++;
618 }
619 } else {
620 ctxt->errNo = XML_ERR_INVALID_CHARREF;
621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
622 ctxt->sax->error(ctxt->userData,
623 "xmlParseCharRef: invalid value\n");
624 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000625 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000626 }
627
628 /*
629 * [ WFC: Legal Character ]
630 * Characters referred to using character references must match the
631 * production for Char.
632 */
633 if (IS_CHAR(val)) {
634 return(val);
635 } else {
636 ctxt->errNo = XML_ERR_INVALID_CHAR;
637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000638 ctxt->sax->error(ctxt->userData,
639 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000640 val);
641 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000642 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000643 }
644 return(0);
645}
646
647/**
648 * xmlParseStringCharRef:
649 * @ctxt: an XML parser context
650 * @str: a pointer to an index in the string
651 *
652 * parse Reference declarations, variant parsing from a string rather
653 * than an an input flow.
654 *
655 * [66] CharRef ::= '&#' [0-9]+ ';' |
656 * '&#x' [0-9a-fA-F]+ ';'
657 *
658 * [ WFC: Legal Character ]
659 * Characters referred to using character references must match the
660 * production for Char.
661 *
662 * Returns the value parsed (as an int), 0 in case of error, str will be
663 * updated to the current value of the index
664 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000665static int
Owen Taylor3473f882001-02-23 17:55:21 +0000666xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
667 const xmlChar *ptr;
668 xmlChar cur;
669 int val = 0;
670
671 if ((str == NULL) || (*str == NULL)) return(0);
672 ptr = *str;
673 cur = *ptr;
674 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
675 ptr += 3;
676 cur = *ptr;
677 while (cur != ';') { /* Non input consuming loop */
678 if ((cur >= '0') && (cur <= '9'))
679 val = val * 16 + (cur - '0');
680 else if ((cur >= 'a') && (cur <= 'f'))
681 val = val * 16 + (cur - 'a') + 10;
682 else if ((cur >= 'A') && (cur <= 'F'))
683 val = val * 16 + (cur - 'A') + 10;
684 else {
685 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
687 ctxt->sax->error(ctxt->userData,
688 "xmlParseStringCharRef: invalid hexadecimal value\n");
689 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000690 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000691 val = 0;
692 break;
693 }
694 ptr++;
695 cur = *ptr;
696 }
697 if (cur == ';')
698 ptr++;
699 } else if ((cur == '&') && (ptr[1] == '#')){
700 ptr += 2;
701 cur = *ptr;
702 while (cur != ';') { /* Non input consuming loops */
703 if ((cur >= '0') && (cur <= '9'))
704 val = val * 10 + (cur - '0');
705 else {
706 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
708 ctxt->sax->error(ctxt->userData,
709 "xmlParseStringCharRef: invalid decimal value\n");
710 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000711 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000712 val = 0;
713 break;
714 }
715 ptr++;
716 cur = *ptr;
717 }
718 if (cur == ';')
719 ptr++;
720 } else {
721 ctxt->errNo = XML_ERR_INVALID_CHARREF;
722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
723 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000724 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000725 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000726 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000727 return(0);
728 }
729 *str = ptr;
730
731 /*
732 * [ WFC: Legal Character ]
733 * Characters referred to using character references must match the
734 * production for Char.
735 */
736 if (IS_CHAR(val)) {
737 return(val);
738 } else {
739 ctxt->errNo = XML_ERR_INVALID_CHAR;
740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
741 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000742 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000743 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000744 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000745 }
746 return(0);
747}
748
749/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000750 * xmlNewBlanksWrapperInputStream:
751 * @ctxt: an XML parser context
752 * @entity: an Entity pointer
753 *
754 * Create a new input stream for wrapping
755 * blanks around a PEReference
756 *
757 * Returns the new input stream or NULL
758 */
759
760static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
761
Daniel Veillardf4862f02002-09-10 11:13:43 +0000762static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000763xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
764 xmlParserInputPtr input;
765 xmlChar *buffer;
766 size_t length;
767 if (entity == NULL) {
768 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
770 ctxt->sax->error(ctxt->userData,
771 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
772 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
773 return(NULL);
774 }
775 if (xmlParserDebugEntities)
776 xmlGenericError(xmlGenericErrorContext,
777 "new blanks wrapper for entity: %s\n", entity->name);
778 input = xmlNewInputStream(ctxt);
779 if (input == NULL) {
780 return(NULL);
781 }
782 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000783 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +0000784 if (buffer == NULL) {
785 return(NULL);
786 }
787 buffer [0] = ' ';
788 buffer [1] = '%';
789 buffer [length-3] = ';';
790 buffer [length-2] = ' ';
791 buffer [length-1] = 0;
792 memcpy(buffer + 2, entity->name, length - 5);
793 input->free = deallocblankswrapper;
794 input->base = buffer;
795 input->cur = buffer;
796 input->length = length;
797 input->end = &buffer[length];
798 return(input);
799}
800
801/**
Owen Taylor3473f882001-02-23 17:55:21 +0000802 * xmlParserHandlePEReference:
803 * @ctxt: the parser context
804 *
805 * [69] PEReference ::= '%' Name ';'
806 *
807 * [ WFC: No Recursion ]
808 * A parsed entity must not contain a recursive
809 * reference to itself, either directly or indirectly.
810 *
811 * [ WFC: Entity Declared ]
812 * In a document without any DTD, a document with only an internal DTD
813 * subset which contains no parameter entity references, or a document
814 * with "standalone='yes'", ... ... The declaration of a parameter
815 * entity must precede any reference to it...
816 *
817 * [ VC: Entity Declared ]
818 * In a document with an external subset or external parameter entities
819 * with "standalone='no'", ... ... The declaration of a parameter entity
820 * must precede any reference to it...
821 *
822 * [ WFC: In DTD ]
823 * Parameter-entity references may only appear in the DTD.
824 * NOTE: misleading but this is handled.
825 *
826 * A PEReference may have been detected in the current input stream
827 * the handling is done accordingly to
828 * http://www.w3.org/TR/REC-xml#entproc
829 * i.e.
830 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000831 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000832 */
833void
834xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000835 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +0000836 xmlEntityPtr entity = NULL;
837 xmlParserInputPtr input;
838
Owen Taylor3473f882001-02-23 17:55:21 +0000839 if (RAW != '%') return;
840 switch(ctxt->instate) {
841 case XML_PARSER_CDATA_SECTION:
842 return;
843 case XML_PARSER_COMMENT:
844 return;
845 case XML_PARSER_START_TAG:
846 return;
847 case XML_PARSER_END_TAG:
848 return;
849 case XML_PARSER_EOF:
850 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
852 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
853 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000854 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000855 return;
856 case XML_PARSER_PROLOG:
857 case XML_PARSER_START:
858 case XML_PARSER_MISC:
859 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
861 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
862 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000863 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000864 return;
865 case XML_PARSER_ENTITY_DECL:
866 case XML_PARSER_CONTENT:
867 case XML_PARSER_ATTRIBUTE_VALUE:
868 case XML_PARSER_PI:
869 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000870 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000871 /* we just ignore it there */
872 return;
873 case XML_PARSER_EPILOG:
874 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
876 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
877 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000878 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000879 return;
880 case XML_PARSER_ENTITY_VALUE:
881 /*
882 * NOTE: in the case of entity values, we don't do the
883 * substitution here since we need the literal
884 * entity value to be able to save the internal
885 * subset of the document.
886 * This will be handled by xmlStringDecodeEntities
887 */
888 return;
889 case XML_PARSER_DTD:
890 /*
891 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
892 * In the internal DTD subset, parameter-entity references
893 * can occur only where markup declarations can occur, not
894 * within markup declarations.
895 * In that case this is handled in xmlParseMarkupDecl
896 */
897 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
898 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000899 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
900 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000901 break;
902 case XML_PARSER_IGNORE:
903 return;
904 }
905
906 NEXT;
907 name = xmlParseName(ctxt);
908 if (xmlParserDebugEntities)
909 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000910 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000911 if (name == NULL) {
912 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000914 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000915 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000916 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000917 } else {
918 if (RAW == ';') {
919 NEXT;
920 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
921 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
922 if (entity == NULL) {
923
924 /*
925 * [ WFC: Entity Declared ]
926 * In a document without any DTD, a document with only an
927 * internal DTD subset which contains no parameter entity
928 * references, or a document with "standalone='yes'", ...
929 * ... The declaration of a parameter entity must precede
930 * any reference to it...
931 */
932 if ((ctxt->standalone == 1) ||
933 ((ctxt->hasExternalSubset == 0) &&
934 (ctxt->hasPErefs == 0))) {
935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
936 ctxt->sax->error(ctxt->userData,
937 "PEReference: %%%s; not found\n", name);
938 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000939 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000940 } else {
941 /*
942 * [ VC: Entity Declared ]
943 * In a document with an external subset or external
944 * parameter entities with "standalone='no'", ...
945 * ... The declaration of a parameter entity must precede
946 * any reference to it...
947 */
948 if ((!ctxt->disableSAX) &&
949 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
950 ctxt->vctxt.error(ctxt->vctxt.userData,
951 "PEReference: %%%s; not found\n", name);
952 } else if ((!ctxt->disableSAX) &&
953 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
954 ctxt->sax->warning(ctxt->userData,
955 "PEReference: %%%s; not found\n", name);
956 ctxt->valid = 0;
957 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000958 } else if (ctxt->input->free != deallocblankswrapper) {
959 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
960 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000961 } else {
962 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
963 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000964 xmlChar start[4];
965 xmlCharEncoding enc;
966
Owen Taylor3473f882001-02-23 17:55:21 +0000967 /*
968 * handle the extra spaces added before and after
969 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000970 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000971 */
972 input = xmlNewEntityInputStream(ctxt, entity);
973 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000974
975 /*
976 * Get the 4 first bytes and decode the charset
977 * if enc != XML_CHAR_ENCODING_NONE
978 * plug some encoding conversion routines.
979 */
980 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000981 if (entity->length >= 4) {
982 start[0] = RAW;
983 start[1] = NXT(1);
984 start[2] = NXT(2);
985 start[3] = NXT(3);
986 enc = xmlDetectCharEncoding(start, 4);
987 if (enc != XML_CHAR_ENCODING_NONE) {
988 xmlSwitchEncoding(ctxt, enc);
989 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000990 }
991
Owen Taylor3473f882001-02-23 17:55:21 +0000992 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
993 (RAW == '<') && (NXT(1) == '?') &&
994 (NXT(2) == 'x') && (NXT(3) == 'm') &&
995 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
996 xmlParseTextDecl(ctxt);
997 }
Owen Taylor3473f882001-02-23 17:55:21 +0000998 } else {
999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1000 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001001 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001002 name);
1003 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001004 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001005 }
1006 }
1007 } else {
1008 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1010 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001011 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001012 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001013 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001014 }
Owen Taylor3473f882001-02-23 17:55:21 +00001015 }
1016}
1017
1018/*
1019 * Macro used to grow the current buffer.
1020 */
1021#define growBuffer(buffer) { \
1022 buffer##_size *= 2; \
1023 buffer = (xmlChar *) \
1024 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1025 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001026 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001027 return(NULL); \
1028 } \
1029}
1030
1031/**
1032 * xmlStringDecodeEntities:
1033 * @ctxt: the parser context
1034 * @str: the input string
1035 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1036 * @end: an end marker xmlChar, 0 if none
1037 * @end2: an end marker xmlChar, 0 if none
1038 * @end3: an end marker xmlChar, 0 if none
1039 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001040 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001041 *
1042 * [67] Reference ::= EntityRef | CharRef
1043 *
1044 * [69] PEReference ::= '%' Name ';'
1045 *
1046 * Returns A newly allocated string with the substitution done. The caller
1047 * must deallocate it !
1048 */
1049xmlChar *
1050xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1051 xmlChar end, xmlChar end2, xmlChar end3) {
1052 xmlChar *buffer = NULL;
1053 int buffer_size = 0;
1054
1055 xmlChar *current = NULL;
1056 xmlEntityPtr ent;
1057 int c,l;
1058 int nbchars = 0;
1059
1060 if (str == NULL)
1061 return(NULL);
1062
1063 if (ctxt->depth > 40) {
1064 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1066 ctxt->sax->error(ctxt->userData,
1067 "Detected entity reference loop\n");
1068 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001069 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001070 return(NULL);
1071 }
1072
1073 /*
1074 * allocate a translation buffer.
1075 */
1076 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001077 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001078 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001079 xmlGenericError(xmlGenericErrorContext,
1080 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001081 return(NULL);
1082 }
1083
1084 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001085 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001086 * we are operating on already parsed values.
1087 */
1088 c = CUR_SCHAR(str, l);
1089 while ((c != 0) && (c != end) && /* non input consuming loop */
1090 (c != end2) && (c != end3)) {
1091
1092 if (c == 0) break;
1093 if ((c == '&') && (str[1] == '#')) {
1094 int val = xmlParseStringCharRef(ctxt, &str);
1095 if (val != 0) {
1096 COPY_BUF(0,buffer,nbchars,val);
1097 }
1098 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1099 if (xmlParserDebugEntities)
1100 xmlGenericError(xmlGenericErrorContext,
1101 "String decoding Entity Reference: %.30s\n",
1102 str);
1103 ent = xmlParseStringEntityRef(ctxt, &str);
1104 if ((ent != NULL) &&
1105 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1106 if (ent->content != NULL) {
1107 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1108 } else {
1109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1110 ctxt->sax->error(ctxt->userData,
1111 "internal error entity has no content\n");
1112 }
1113 } else if ((ent != NULL) && (ent->content != NULL)) {
1114 xmlChar *rep;
1115
1116 ctxt->depth++;
1117 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1118 0, 0, 0);
1119 ctxt->depth--;
1120 if (rep != NULL) {
1121 current = rep;
1122 while (*current != 0) { /* non input consuming loop */
1123 buffer[nbchars++] = *current++;
1124 if (nbchars >
1125 buffer_size - XML_PARSER_BUFFER_SIZE) {
1126 growBuffer(buffer);
1127 }
1128 }
1129 xmlFree(rep);
1130 }
1131 } else if (ent != NULL) {
1132 int i = xmlStrlen(ent->name);
1133 const xmlChar *cur = ent->name;
1134
1135 buffer[nbchars++] = '&';
1136 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1137 growBuffer(buffer);
1138 }
1139 for (;i > 0;i--)
1140 buffer[nbchars++] = *cur++;
1141 buffer[nbchars++] = ';';
1142 }
1143 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1144 if (xmlParserDebugEntities)
1145 xmlGenericError(xmlGenericErrorContext,
1146 "String decoding PE Reference: %.30s\n", str);
1147 ent = xmlParseStringPEReference(ctxt, &str);
1148 if (ent != NULL) {
1149 xmlChar *rep;
1150
1151 ctxt->depth++;
1152 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1153 0, 0, 0);
1154 ctxt->depth--;
1155 if (rep != NULL) {
1156 current = rep;
1157 while (*current != 0) { /* non input consuming loop */
1158 buffer[nbchars++] = *current++;
1159 if (nbchars >
1160 buffer_size - XML_PARSER_BUFFER_SIZE) {
1161 growBuffer(buffer);
1162 }
1163 }
1164 xmlFree(rep);
1165 }
1166 }
1167 } else {
1168 COPY_BUF(l,buffer,nbchars,c);
1169 str += l;
1170 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1171 growBuffer(buffer);
1172 }
1173 }
1174 c = CUR_SCHAR(str, l);
1175 }
1176 buffer[nbchars++] = 0;
1177 return(buffer);
1178}
1179
1180
1181/************************************************************************
1182 * *
1183 * Commodity functions to handle xmlChars *
1184 * *
1185 ************************************************************************/
1186
1187/**
1188 * xmlStrndup:
1189 * @cur: the input xmlChar *
1190 * @len: the len of @cur
1191 *
1192 * a strndup for array of xmlChar's
1193 *
1194 * Returns a new xmlChar * or NULL
1195 */
1196xmlChar *
1197xmlStrndup(const xmlChar *cur, int len) {
1198 xmlChar *ret;
1199
1200 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001201 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001202 if (ret == NULL) {
1203 xmlGenericError(xmlGenericErrorContext,
1204 "malloc of %ld byte failed\n",
1205 (len + 1) * (long)sizeof(xmlChar));
1206 return(NULL);
1207 }
1208 memcpy(ret, cur, len * sizeof(xmlChar));
1209 ret[len] = 0;
1210 return(ret);
1211}
1212
1213/**
1214 * xmlStrdup:
1215 * @cur: the input xmlChar *
1216 *
1217 * a strdup for array of xmlChar's. Since they are supposed to be
1218 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1219 * a termination mark of '0'.
1220 *
1221 * Returns a new xmlChar * or NULL
1222 */
1223xmlChar *
1224xmlStrdup(const xmlChar *cur) {
1225 const xmlChar *p = cur;
1226
1227 if (cur == NULL) return(NULL);
1228 while (*p != 0) p++; /* non input consuming */
1229 return(xmlStrndup(cur, p - cur));
1230}
1231
1232/**
1233 * xmlCharStrndup:
1234 * @cur: the input char *
1235 * @len: the len of @cur
1236 *
1237 * a strndup for char's to xmlChar's
1238 *
1239 * Returns a new xmlChar * or NULL
1240 */
1241
1242xmlChar *
1243xmlCharStrndup(const char *cur, int len) {
1244 int i;
1245 xmlChar *ret;
1246
1247 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001248 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001249 if (ret == NULL) {
1250 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1251 (len + 1) * (long)sizeof(xmlChar));
1252 return(NULL);
1253 }
1254 for (i = 0;i < len;i++)
1255 ret[i] = (xmlChar) cur[i];
1256 ret[len] = 0;
1257 return(ret);
1258}
1259
1260/**
1261 * xmlCharStrdup:
1262 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001263 *
1264 * a strdup for char's to xmlChar's
1265 *
1266 * Returns a new xmlChar * or NULL
1267 */
1268
1269xmlChar *
1270xmlCharStrdup(const char *cur) {
1271 const char *p = cur;
1272
1273 if (cur == NULL) return(NULL);
1274 while (*p != '\0') p++; /* non input consuming */
1275 return(xmlCharStrndup(cur, p - cur));
1276}
1277
1278/**
1279 * xmlStrcmp:
1280 * @str1: the first xmlChar *
1281 * @str2: the second xmlChar *
1282 *
1283 * a strcmp for xmlChar's
1284 *
1285 * Returns the integer result of the comparison
1286 */
1287
1288int
1289xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1290 register int tmp;
1291
1292 if (str1 == str2) return(0);
1293 if (str1 == NULL) return(-1);
1294 if (str2 == NULL) return(1);
1295 do {
1296 tmp = *str1++ - *str2;
1297 if (tmp != 0) return(tmp);
1298 } while (*str2++ != 0);
1299 return 0;
1300}
1301
1302/**
1303 * xmlStrEqual:
1304 * @str1: the first xmlChar *
1305 * @str2: the second xmlChar *
1306 *
1307 * Check if both string are equal of have same content
1308 * Should be a bit more readable and faster than xmlStrEqual()
1309 *
1310 * Returns 1 if they are equal, 0 if they are different
1311 */
1312
1313int
1314xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1315 if (str1 == str2) return(1);
1316 if (str1 == NULL) return(0);
1317 if (str2 == NULL) return(0);
1318 do {
1319 if (*str1++ != *str2) return(0);
1320 } while (*str2++);
1321 return(1);
1322}
1323
1324/**
1325 * xmlStrncmp:
1326 * @str1: the first xmlChar *
1327 * @str2: the second xmlChar *
1328 * @len: the max comparison length
1329 *
1330 * a strncmp for xmlChar's
1331 *
1332 * Returns the integer result of the comparison
1333 */
1334
1335int
1336xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1337 register int tmp;
1338
1339 if (len <= 0) return(0);
1340 if (str1 == str2) return(0);
1341 if (str1 == NULL) return(-1);
1342 if (str2 == NULL) return(1);
1343 do {
1344 tmp = *str1++ - *str2;
1345 if (tmp != 0 || --len == 0) return(tmp);
1346 } while (*str2++ != 0);
1347 return 0;
1348}
1349
Daniel Veillardb44025c2001-10-11 22:55:55 +00001350static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001351 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1352 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1353 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1354 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1355 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1356 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1357 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1358 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1359 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1360 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1361 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1362 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1363 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1364 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1365 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1366 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1367 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1368 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1369 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1370 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1371 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1372 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1373 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1374 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1375 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1376 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1377 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1378 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1379 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1380 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1381 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1382 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1383};
1384
1385/**
1386 * xmlStrcasecmp:
1387 * @str1: the first xmlChar *
1388 * @str2: the second xmlChar *
1389 *
1390 * a strcasecmp for xmlChar's
1391 *
1392 * Returns the integer result of the comparison
1393 */
1394
1395int
1396xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1397 register int tmp;
1398
1399 if (str1 == str2) return(0);
1400 if (str1 == NULL) return(-1);
1401 if (str2 == NULL) return(1);
1402 do {
1403 tmp = casemap[*str1++] - casemap[*str2];
1404 if (tmp != 0) return(tmp);
1405 } while (*str2++ != 0);
1406 return 0;
1407}
1408
1409/**
1410 * xmlStrncasecmp:
1411 * @str1: the first xmlChar *
1412 * @str2: the second xmlChar *
1413 * @len: the max comparison length
1414 *
1415 * a strncasecmp for xmlChar's
1416 *
1417 * Returns the integer result of the comparison
1418 */
1419
1420int
1421xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1422 register int tmp;
1423
1424 if (len <= 0) return(0);
1425 if (str1 == str2) return(0);
1426 if (str1 == NULL) return(-1);
1427 if (str2 == NULL) return(1);
1428 do {
1429 tmp = casemap[*str1++] - casemap[*str2];
1430 if (tmp != 0 || --len == 0) return(tmp);
1431 } while (*str2++ != 0);
1432 return 0;
1433}
1434
1435/**
1436 * xmlStrchr:
1437 * @str: the xmlChar * array
1438 * @val: the xmlChar to search
1439 *
1440 * a strchr for xmlChar's
1441 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001442 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001443 */
1444
1445const xmlChar *
1446xmlStrchr(const xmlChar *str, xmlChar val) {
1447 if (str == NULL) return(NULL);
1448 while (*str != 0) { /* non input consuming */
1449 if (*str == val) return((xmlChar *) str);
1450 str++;
1451 }
1452 return(NULL);
1453}
1454
1455/**
1456 * xmlStrstr:
1457 * @str: the xmlChar * array (haystack)
1458 * @val: the xmlChar to search (needle)
1459 *
1460 * a strstr for xmlChar's
1461 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001462 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001463 */
1464
1465const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001466xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001467 int n;
1468
1469 if (str == NULL) return(NULL);
1470 if (val == NULL) return(NULL);
1471 n = xmlStrlen(val);
1472
1473 if (n == 0) return(str);
1474 while (*str != 0) { /* non input consuming */
1475 if (*str == *val) {
1476 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1477 }
1478 str++;
1479 }
1480 return(NULL);
1481}
1482
1483/**
1484 * xmlStrcasestr:
1485 * @str: the xmlChar * array (haystack)
1486 * @val: the xmlChar to search (needle)
1487 *
1488 * a case-ignoring strstr for xmlChar's
1489 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001490 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001491 */
1492
1493const xmlChar *
1494xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1495 int n;
1496
1497 if (str == NULL) return(NULL);
1498 if (val == NULL) return(NULL);
1499 n = xmlStrlen(val);
1500
1501 if (n == 0) return(str);
1502 while (*str != 0) { /* non input consuming */
1503 if (casemap[*str] == casemap[*val])
1504 if (!xmlStrncasecmp(str, val, n)) return(str);
1505 str++;
1506 }
1507 return(NULL);
1508}
1509
1510/**
1511 * xmlStrsub:
1512 * @str: the xmlChar * array (haystack)
1513 * @start: the index of the first char (zero based)
1514 * @len: the length of the substring
1515 *
1516 * Extract a substring of a given string
1517 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001518 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001519 */
1520
1521xmlChar *
1522xmlStrsub(const xmlChar *str, int start, int len) {
1523 int i;
1524
1525 if (str == NULL) return(NULL);
1526 if (start < 0) return(NULL);
1527 if (len < 0) return(NULL);
1528
1529 for (i = 0;i < start;i++) {
1530 if (*str == 0) return(NULL);
1531 str++;
1532 }
1533 if (*str == 0) return(NULL);
1534 return(xmlStrndup(str, len));
1535}
1536
1537/**
1538 * xmlStrlen:
1539 * @str: the xmlChar * array
1540 *
1541 * length of a xmlChar's string
1542 *
1543 * Returns the number of xmlChar contained in the ARRAY.
1544 */
1545
1546int
1547xmlStrlen(const xmlChar *str) {
1548 int len = 0;
1549
1550 if (str == NULL) return(0);
1551 while (*str != 0) { /* non input consuming */
1552 str++;
1553 len++;
1554 }
1555 return(len);
1556}
1557
1558/**
1559 * xmlStrncat:
1560 * @cur: the original xmlChar * array
1561 * @add: the xmlChar * array added
1562 * @len: the length of @add
1563 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001564 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001565 * first bytes of @add.
1566 *
1567 * Returns a new xmlChar *, the original @cur is reallocated if needed
1568 * and should not be freed
1569 */
1570
1571xmlChar *
1572xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1573 int size;
1574 xmlChar *ret;
1575
1576 if ((add == NULL) || (len == 0))
1577 return(cur);
1578 if (cur == NULL)
1579 return(xmlStrndup(add, len));
1580
1581 size = xmlStrlen(cur);
1582 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1583 if (ret == NULL) {
1584 xmlGenericError(xmlGenericErrorContext,
1585 "xmlStrncat: realloc of %ld byte failed\n",
1586 (size + len + 1) * (long)sizeof(xmlChar));
1587 return(cur);
1588 }
1589 memcpy(&ret[size], add, len * sizeof(xmlChar));
1590 ret[size + len] = 0;
1591 return(ret);
1592}
1593
1594/**
1595 * xmlStrcat:
1596 * @cur: the original xmlChar * array
1597 * @add: the xmlChar * array added
1598 *
1599 * a strcat for array of xmlChar's. Since they are supposed to be
1600 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1601 * a termination mark of '0'.
1602 *
1603 * Returns a new xmlChar * containing the concatenated string.
1604 */
1605xmlChar *
1606xmlStrcat(xmlChar *cur, const xmlChar *add) {
1607 const xmlChar *p = add;
1608
1609 if (add == NULL) return(cur);
1610 if (cur == NULL)
1611 return(xmlStrdup(add));
1612
1613 while (*p != 0) p++; /* non input consuming */
1614 return(xmlStrncat(cur, add, p - add));
1615}
1616
1617/************************************************************************
1618 * *
1619 * Commodity functions, cleanup needed ? *
1620 * *
1621 ************************************************************************/
1622
1623/**
1624 * areBlanks:
1625 * @ctxt: an XML parser context
1626 * @str: a xmlChar *
1627 * @len: the size of @str
1628 *
1629 * Is this a sequence of blank chars that one can ignore ?
1630 *
1631 * Returns 1 if ignorable 0 otherwise.
1632 */
1633
1634static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1635 int i, ret;
1636 xmlNodePtr lastChild;
1637
Daniel Veillard05c13a22001-09-09 08:38:09 +00001638 /*
1639 * Don't spend time trying to differentiate them, the same callback is
1640 * used !
1641 */
1642 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001643 return(0);
1644
Owen Taylor3473f882001-02-23 17:55:21 +00001645 /*
1646 * Check for xml:space value.
1647 */
1648 if (*(ctxt->space) == 1)
1649 return(0);
1650
1651 /*
1652 * Check that the string is made of blanks
1653 */
1654 for (i = 0;i < len;i++)
1655 if (!(IS_BLANK(str[i]))) return(0);
1656
1657 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001658 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001659 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001660 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001661 if (ctxt->myDoc != NULL) {
1662 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1663 if (ret == 0) return(1);
1664 if (ret == 1) return(0);
1665 }
1666
1667 /*
1668 * Otherwise, heuristic :-\
1669 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001671 if ((ctxt->node->children == NULL) &&
1672 (RAW == '<') && (NXT(1) == '/')) return(0);
1673
1674 lastChild = xmlGetLastChild(ctxt->node);
1675 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001676 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1677 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001678 } else if (xmlNodeIsText(lastChild))
1679 return(0);
1680 else if ((ctxt->node->children != NULL) &&
1681 (xmlNodeIsText(ctxt->node->children)))
1682 return(0);
1683 return(1);
1684}
1685
Owen Taylor3473f882001-02-23 17:55:21 +00001686/************************************************************************
1687 * *
1688 * Extra stuff for namespace support *
1689 * Relates to http://www.w3.org/TR/WD-xml-names *
1690 * *
1691 ************************************************************************/
1692
1693/**
1694 * xmlSplitQName:
1695 * @ctxt: an XML parser context
1696 * @name: an XML parser context
1697 * @prefix: a xmlChar **
1698 *
1699 * parse an UTF8 encoded XML qualified name string
1700 *
1701 * [NS 5] QName ::= (Prefix ':')? LocalPart
1702 *
1703 * [NS 6] Prefix ::= NCName
1704 *
1705 * [NS 7] LocalPart ::= NCName
1706 *
1707 * Returns the local part, and prefix is updated
1708 * to get the Prefix if any.
1709 */
1710
1711xmlChar *
1712xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1713 xmlChar buf[XML_MAX_NAMELEN + 5];
1714 xmlChar *buffer = NULL;
1715 int len = 0;
1716 int max = XML_MAX_NAMELEN;
1717 xmlChar *ret = NULL;
1718 const xmlChar *cur = name;
1719 int c;
1720
1721 *prefix = NULL;
1722
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00001723 if (cur == NULL) return(NULL);
1724
Owen Taylor3473f882001-02-23 17:55:21 +00001725#ifndef XML_XML_NAMESPACE
1726 /* xml: prefix is not really a namespace */
1727 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1728 (cur[2] == 'l') && (cur[3] == ':'))
1729 return(xmlStrdup(name));
1730#endif
1731
Daniel Veillard597bc482003-07-24 16:08:28 +00001732 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00001733 if (cur[0] == ':')
1734 return(xmlStrdup(name));
1735
1736 c = *cur++;
1737 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1738 buf[len++] = c;
1739 c = *cur++;
1740 }
1741 if (len >= max) {
1742 /*
1743 * Okay someone managed to make a huge name, so he's ready to pay
1744 * for the processing speed.
1745 */
1746 max = len * 2;
1747
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001748 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001749 if (buffer == NULL) {
1750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1751 ctxt->sax->error(ctxt->userData,
1752 "xmlSplitQName: out of memory\n");
1753 return(NULL);
1754 }
1755 memcpy(buffer, buf, len);
1756 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1757 if (len + 10 > max) {
1758 max *= 2;
1759 buffer = (xmlChar *) xmlRealloc(buffer,
1760 max * sizeof(xmlChar));
1761 if (buffer == NULL) {
1762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1763 ctxt->sax->error(ctxt->userData,
1764 "xmlSplitQName: out of memory\n");
1765 return(NULL);
1766 }
1767 }
1768 buffer[len++] = c;
1769 c = *cur++;
1770 }
1771 buffer[len] = 0;
1772 }
1773
Daniel Veillard597bc482003-07-24 16:08:28 +00001774 /* nasty but well=formed
1775 if ((c == ':') && (*cur == 0)) {
1776 return(xmlStrdup(name));
1777 } */
1778
Owen Taylor3473f882001-02-23 17:55:21 +00001779 if (buffer == NULL)
1780 ret = xmlStrndup(buf, len);
1781 else {
1782 ret = buffer;
1783 buffer = NULL;
1784 max = XML_MAX_NAMELEN;
1785 }
1786
1787
1788 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001789 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001790 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00001791 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00001792 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00001793 }
Owen Taylor3473f882001-02-23 17:55:21 +00001794 len = 0;
1795
Daniel Veillardbb284f42002-10-16 18:02:47 +00001796 /*
1797 * Check that the first character is proper to start
1798 * a new name
1799 */
1800 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1801 ((c >= 0x41) && (c <= 0x5A)) ||
1802 (c == '_') || (c == ':'))) {
1803 int l;
1804 int first = CUR_SCHAR(cur, l);
1805
1806 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001807 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1808 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001809 ctxt->sax->error(ctxt->userData,
1810 "Name %s is not XML Namespace compliant\n",
1811 name);
1812 }
1813 }
1814 cur++;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1817 buf[len++] = c;
1818 c = *cur++;
1819 }
1820 if (len >= max) {
1821 /*
1822 * Okay someone managed to make a huge name, so he's ready to pay
1823 * for the processing speed.
1824 */
1825 max = len * 2;
1826
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001827 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001828 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001829 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1830 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001831 ctxt->sax->error(ctxt->userData,
1832 "xmlSplitQName: out of memory\n");
1833 return(NULL);
1834 }
1835 memcpy(buffer, buf, len);
1836 while (c != 0) { /* tested bigname2.xml */
1837 if (len + 10 > max) {
1838 max *= 2;
1839 buffer = (xmlChar *) xmlRealloc(buffer,
1840 max * sizeof(xmlChar));
1841 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001842 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1843 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001844 ctxt->sax->error(ctxt->userData,
1845 "xmlSplitQName: out of memory\n");
1846 return(NULL);
1847 }
1848 }
1849 buffer[len++] = c;
1850 c = *cur++;
1851 }
1852 buffer[len] = 0;
1853 }
1854
1855 if (buffer == NULL)
1856 ret = xmlStrndup(buf, len);
1857 else {
1858 ret = buffer;
1859 }
1860 }
1861
1862 return(ret);
1863}
1864
1865/************************************************************************
1866 * *
1867 * The parser itself *
1868 * Relates to http://www.w3.org/TR/REC-xml *
1869 * *
1870 ************************************************************************/
1871
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001872static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001873/**
1874 * xmlParseName:
1875 * @ctxt: an XML parser context
1876 *
1877 * parse an XML name.
1878 *
1879 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1880 * CombiningChar | Extender
1881 *
1882 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1883 *
1884 * [6] Names ::= Name (S Name)*
1885 *
1886 * Returns the Name parsed or NULL
1887 */
1888
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001889const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001890xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001891 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001892 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001893 int count = 0;
1894
1895 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001896
1897 /*
1898 * Accelerator for simple ASCII names
1899 */
1900 in = ctxt->input->cur;
1901 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1902 ((*in >= 0x41) && (*in <= 0x5A)) ||
1903 (*in == '_') || (*in == ':')) {
1904 in++;
1905 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1906 ((*in >= 0x41) && (*in <= 0x5A)) ||
1907 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001908 (*in == '_') || (*in == '-') ||
1909 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001910 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001911 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001912 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001913 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001914 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001915 ctxt->nbChars += count;
1916 ctxt->input->col += count;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00001917 if (ret == NULL) {
1918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1919 ctxt->sax->error(ctxt->userData,
1920 "XML parser: out of memory\n");
1921 ctxt->errNo = XML_ERR_NO_MEMORY;
1922 ctxt->instate = XML_PARSER_EOF;
1923 ctxt->disableSAX = 1;
1924 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001925 return(ret);
1926 }
1927 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001928 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001929}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001930
Daniel Veillard46de64e2002-05-29 08:21:33 +00001931/**
1932 * xmlParseNameAndCompare:
1933 * @ctxt: an XML parser context
1934 *
1935 * parse an XML name and compares for match
1936 * (specialized for endtag parsing)
1937 *
1938 *
1939 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1940 * and the name for mismatch
1941 */
1942
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001943static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001944xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1945 const xmlChar *cmp = other;
1946 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001947 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001948
1949 GROW;
1950
1951 in = ctxt->input->cur;
1952 while (*in != 0 && *in == *cmp) {
1953 ++in;
1954 ++cmp;
1955 }
1956 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1957 /* success */
1958 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001959 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001960 }
1961 /* failure (or end of input buffer), check with full function */
1962 ret = xmlParseName (ctxt);
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001963 if ((ret != NULL) && (xmlStrEqual (ret, other))) {
1964 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001965 }
1966 return ret;
1967}
1968
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001969static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001970xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00001971 int len = 0, l;
1972 int c;
1973 int count = 0;
1974
1975 /*
1976 * Handler for more complex cases
1977 */
1978 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001979 c = CUR_CHAR(l);
1980 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1981 (!IS_LETTER(c) && (c != '_') &&
1982 (c != ':'))) {
1983 return(NULL);
1984 }
1985
1986 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1987 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1988 (c == '.') || (c == '-') ||
1989 (c == '_') || (c == ':') ||
1990 (IS_COMBINING(c)) ||
1991 (IS_EXTENDER(c)))) {
1992 if (count++ > 100) {
1993 count = 0;
1994 GROW;
1995 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001996 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00001997 NEXTL(l);
1998 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00001999 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002000 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002001}
2002
2003/**
2004 * xmlParseStringName:
2005 * @ctxt: an XML parser context
2006 * @str: a pointer to the string pointer (IN/OUT)
2007 *
2008 * parse an XML name.
2009 *
2010 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2011 * CombiningChar | Extender
2012 *
2013 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2014 *
2015 * [6] Names ::= Name (S Name)*
2016 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002017 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002018 * is updated to the current location in the string.
2019 */
2020
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002021static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002022xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2023 xmlChar buf[XML_MAX_NAMELEN + 5];
2024 const xmlChar *cur = *str;
2025 int len = 0, l;
2026 int c;
2027
2028 c = CUR_SCHAR(cur, l);
2029 if (!IS_LETTER(c) && (c != '_') &&
2030 (c != ':')) {
2031 return(NULL);
2032 }
2033
2034 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2035 (c == '.') || (c == '-') ||
2036 (c == '_') || (c == ':') ||
2037 (IS_COMBINING(c)) ||
2038 (IS_EXTENDER(c))) {
2039 COPY_BUF(l,buf,len,c);
2040 cur += l;
2041 c = CUR_SCHAR(cur, l);
2042 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2043 /*
2044 * Okay someone managed to make a huge name, so he's ready to pay
2045 * for the processing speed.
2046 */
2047 xmlChar *buffer;
2048 int max = len * 2;
2049
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002050 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002051 if (buffer == NULL) {
2052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2053 ctxt->sax->error(ctxt->userData,
2054 "xmlParseStringName: out of memory\n");
2055 return(NULL);
2056 }
2057 memcpy(buffer, buf, len);
2058 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2059 (c == '.') || (c == '-') ||
2060 (c == '_') || (c == ':') ||
2061 (IS_COMBINING(c)) ||
2062 (IS_EXTENDER(c))) {
2063 if (len + 10 > max) {
2064 max *= 2;
2065 buffer = (xmlChar *) xmlRealloc(buffer,
2066 max * sizeof(xmlChar));
2067 if (buffer == NULL) {
2068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2069 ctxt->sax->error(ctxt->userData,
2070 "xmlParseStringName: out of memory\n");
2071 return(NULL);
2072 }
2073 }
2074 COPY_BUF(l,buffer,len,c);
2075 cur += l;
2076 c = CUR_SCHAR(cur, l);
2077 }
2078 buffer[len] = 0;
2079 *str = cur;
2080 return(buffer);
2081 }
2082 }
2083 *str = cur;
2084 return(xmlStrndup(buf, len));
2085}
2086
2087/**
2088 * xmlParseNmtoken:
2089 * @ctxt: an XML parser context
2090 *
2091 * parse an XML Nmtoken.
2092 *
2093 * [7] Nmtoken ::= (NameChar)+
2094 *
2095 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2096 *
2097 * Returns the Nmtoken parsed or NULL
2098 */
2099
2100xmlChar *
2101xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2102 xmlChar buf[XML_MAX_NAMELEN + 5];
2103 int len = 0, l;
2104 int c;
2105 int count = 0;
2106
2107 GROW;
2108 c = CUR_CHAR(l);
2109
2110 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2111 (c == '.') || (c == '-') ||
2112 (c == '_') || (c == ':') ||
2113 (IS_COMBINING(c)) ||
2114 (IS_EXTENDER(c))) {
2115 if (count++ > 100) {
2116 count = 0;
2117 GROW;
2118 }
2119 COPY_BUF(l,buf,len,c);
2120 NEXTL(l);
2121 c = CUR_CHAR(l);
2122 if (len >= XML_MAX_NAMELEN) {
2123 /*
2124 * Okay someone managed to make a huge token, so he's ready to pay
2125 * for the processing speed.
2126 */
2127 xmlChar *buffer;
2128 int max = len * 2;
2129
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002130 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002131 if (buffer == NULL) {
2132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2133 ctxt->sax->error(ctxt->userData,
2134 "xmlParseNmtoken: out of memory\n");
2135 return(NULL);
2136 }
2137 memcpy(buffer, buf, len);
2138 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2139 (c == '.') || (c == '-') ||
2140 (c == '_') || (c == ':') ||
2141 (IS_COMBINING(c)) ||
2142 (IS_EXTENDER(c))) {
2143 if (count++ > 100) {
2144 count = 0;
2145 GROW;
2146 }
2147 if (len + 10 > max) {
2148 max *= 2;
2149 buffer = (xmlChar *) xmlRealloc(buffer,
2150 max * sizeof(xmlChar));
2151 if (buffer == NULL) {
2152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2153 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002154 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002155 return(NULL);
2156 }
2157 }
2158 COPY_BUF(l,buffer,len,c);
2159 NEXTL(l);
2160 c = CUR_CHAR(l);
2161 }
2162 buffer[len] = 0;
2163 return(buffer);
2164 }
2165 }
2166 if (len == 0)
2167 return(NULL);
2168 return(xmlStrndup(buf, len));
2169}
2170
2171/**
2172 * xmlParseEntityValue:
2173 * @ctxt: an XML parser context
2174 * @orig: if non-NULL store a copy of the original entity value
2175 *
2176 * parse a value for ENTITY declarations
2177 *
2178 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2179 * "'" ([^%&'] | PEReference | Reference)* "'"
2180 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002181 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002182 */
2183
2184xmlChar *
2185xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2186 xmlChar *buf = NULL;
2187 int len = 0;
2188 int size = XML_PARSER_BUFFER_SIZE;
2189 int c, l;
2190 xmlChar stop;
2191 xmlChar *ret = NULL;
2192 const xmlChar *cur = NULL;
2193 xmlParserInputPtr input;
2194
2195 if (RAW == '"') stop = '"';
2196 else if (RAW == '\'') stop = '\'';
2197 else {
2198 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2200 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2201 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002202 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 return(NULL);
2204 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002205 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002206 if (buf == NULL) {
2207 xmlGenericError(xmlGenericErrorContext,
2208 "malloc of %d byte failed\n", size);
2209 return(NULL);
2210 }
2211
2212 /*
2213 * The content of the entity definition is copied in a buffer.
2214 */
2215
2216 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2217 input = ctxt->input;
2218 GROW;
2219 NEXT;
2220 c = CUR_CHAR(l);
2221 /*
2222 * NOTE: 4.4.5 Included in Literal
2223 * When a parameter entity reference appears in a literal entity
2224 * value, ... a single or double quote character in the replacement
2225 * text is always treated as a normal data character and will not
2226 * terminate the literal.
2227 * In practice it means we stop the loop only when back at parsing
2228 * the initial entity and the quote is found
2229 */
2230 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2231 (ctxt->input != input))) {
2232 if (len + 5 >= size) {
2233 size *= 2;
2234 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2235 if (buf == NULL) {
2236 xmlGenericError(xmlGenericErrorContext,
2237 "realloc of %d byte failed\n", size);
2238 return(NULL);
2239 }
2240 }
2241 COPY_BUF(l,buf,len,c);
2242 NEXTL(l);
2243 /*
2244 * Pop-up of finished entities.
2245 */
2246 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2247 xmlPopInput(ctxt);
2248
2249 GROW;
2250 c = CUR_CHAR(l);
2251 if (c == 0) {
2252 GROW;
2253 c = CUR_CHAR(l);
2254 }
2255 }
2256 buf[len] = 0;
2257
2258 /*
2259 * Raise problem w.r.t. '&' and '%' being used in non-entities
2260 * reference constructs. Note Charref will be handled in
2261 * xmlStringDecodeEntities()
2262 */
2263 cur = buf;
2264 while (*cur != 0) { /* non input consuming */
2265 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2266 xmlChar *name;
2267 xmlChar tmp = *cur;
2268
2269 cur++;
2270 name = xmlParseStringName(ctxt, &cur);
2271 if ((name == NULL) || (*cur != ';')) {
2272 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2274 ctxt->sax->error(ctxt->userData,
2275 "EntityValue: '%c' forbidden except for entities references\n",
2276 tmp);
2277 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002278 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002279 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002280 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2281 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002282 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2284 ctxt->sax->error(ctxt->userData,
2285 "EntityValue: PEReferences forbidden in internal subset\n",
2286 tmp);
2287 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002288 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002289 }
2290 if (name != NULL)
2291 xmlFree(name);
2292 }
2293 cur++;
2294 }
2295
2296 /*
2297 * Then PEReference entities are substituted.
2298 */
2299 if (c != stop) {
2300 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2302 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2303 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002305 xmlFree(buf);
2306 } else {
2307 NEXT;
2308 /*
2309 * NOTE: 4.4.7 Bypassed
2310 * When a general entity reference appears in the EntityValue in
2311 * an entity declaration, it is bypassed and left as is.
2312 * so XML_SUBSTITUTE_REF is not set here.
2313 */
2314 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2315 0, 0, 0);
2316 if (orig != NULL)
2317 *orig = buf;
2318 else
2319 xmlFree(buf);
2320 }
2321
2322 return(ret);
2323}
2324
2325/**
2326 * xmlParseAttValue:
2327 * @ctxt: an XML parser context
2328 *
2329 * parse a value for an attribute
2330 * Note: the parser won't do substitution of entities here, this
2331 * will be handled later in xmlStringGetNodeList
2332 *
2333 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2334 * "'" ([^<&'] | Reference)* "'"
2335 *
2336 * 3.3.3 Attribute-Value Normalization:
2337 * Before the value of an attribute is passed to the application or
2338 * checked for validity, the XML processor must normalize it as follows:
2339 * - a character reference is processed by appending the referenced
2340 * character to the attribute value
2341 * - an entity reference is processed by recursively processing the
2342 * replacement text of the entity
2343 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2344 * appending #x20 to the normalized value, except that only a single
2345 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2346 * parsed entity or the literal entity value of an internal parsed entity
2347 * - other characters are processed by appending them to the normalized value
2348 * If the declared value is not CDATA, then the XML processor must further
2349 * process the normalized attribute value by discarding any leading and
2350 * trailing space (#x20) characters, and by replacing sequences of space
2351 * (#x20) characters by a single space (#x20) character.
2352 * All attributes for which no declaration has been read should be treated
2353 * by a non-validating parser as if declared CDATA.
2354 *
2355 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2356 */
2357
2358xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002359xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2360
2361xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002362xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2363 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002364 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002365 xmlChar *ret = NULL;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002366
Daniel Veillarde72c7562002-05-31 09:47:30 +00002367 SHRINK;
2368 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002369 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002370 if (*in != '"' && *in != '\'') {
2371 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2373 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2374 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002375 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002376 return(NULL);
2377 }
2378 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2379 limit = *in;
2380 ++in;
2381
2382 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2383 *in != '&' && *in != '<'
2384 ) {
2385 ++in;
2386 }
2387 if (*in != limit) {
2388 return xmlParseAttValueComplex(ctxt);
2389 }
2390 ++in;
2391 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2392 CUR_PTR = in;
2393 return ret;
2394}
2395
Daniel Veillard01c13b52002-12-10 15:19:08 +00002396/**
2397 * xmlParseAttValueComplex:
2398 * @ctxt: an XML parser context
2399 *
2400 * parse a value for an attribute, this is the fallback function
2401 * of xmlParseAttValue() when the attribute parsing requires handling
2402 * of non-ASCII characters.
2403 *
2404 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2405 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002406xmlChar *
2407xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2408 xmlChar limit = 0;
2409 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002410 int len = 0;
2411 int buf_size = 0;
2412 int c, l;
2413 xmlChar *current = NULL;
2414 xmlEntityPtr ent;
2415
Owen Taylor3473f882001-02-23 17:55:21 +00002416 if (NXT(0) == '"') {
2417 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2418 limit = '"';
2419 NEXT;
2420 } else if (NXT(0) == '\'') {
2421 limit = '\'';
2422 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2423 NEXT;
2424 } else {
2425 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2427 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2428 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002429 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002430 return(NULL);
2431 }
2432
2433 /*
2434 * allocate a translation buffer.
2435 */
2436 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002437 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002438 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002439 xmlGenericError(xmlGenericErrorContext,
2440 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002441 return(NULL);
2442 }
2443
2444 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002445 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002446 */
2447 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002448 while ((NXT(0) != limit) && /* checked */
2449 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002450 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002451 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002452 if (NXT(1) == '#') {
2453 int val = xmlParseCharRef(ctxt);
2454 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002455 if (ctxt->replaceEntities) {
2456 if (len > buf_size - 10) {
2457 growBuffer(buf);
2458 }
2459 buf[len++] = '&';
2460 } else {
2461 /*
2462 * The reparsing will be done in xmlStringGetNodeList()
2463 * called by the attribute() function in SAX.c
2464 */
2465 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002466
Daniel Veillard319a7422001-09-11 09:27:09 +00002467 if (len > buf_size - 10) {
2468 growBuffer(buf);
2469 }
2470 current = &buffer[0];
2471 while (*current != 0) { /* non input consuming */
2472 buf[len++] = *current++;
2473 }
Owen Taylor3473f882001-02-23 17:55:21 +00002474 }
2475 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002476 if (len > buf_size - 10) {
2477 growBuffer(buf);
2478 }
Owen Taylor3473f882001-02-23 17:55:21 +00002479 len += xmlCopyChar(0, &buf[len], val);
2480 }
2481 } else {
2482 ent = xmlParseEntityRef(ctxt);
2483 if ((ent != NULL) &&
2484 (ctxt->replaceEntities != 0)) {
2485 xmlChar *rep;
2486
2487 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2488 rep = xmlStringDecodeEntities(ctxt, ent->content,
2489 XML_SUBSTITUTE_REF, 0, 0, 0);
2490 if (rep != NULL) {
2491 current = rep;
2492 while (*current != 0) { /* non input consuming */
2493 buf[len++] = *current++;
2494 if (len > buf_size - 10) {
2495 growBuffer(buf);
2496 }
2497 }
2498 xmlFree(rep);
2499 }
2500 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002501 if (len > buf_size - 10) {
2502 growBuffer(buf);
2503 }
Owen Taylor3473f882001-02-23 17:55:21 +00002504 if (ent->content != NULL)
2505 buf[len++] = ent->content[0];
2506 }
2507 } else if (ent != NULL) {
2508 int i = xmlStrlen(ent->name);
2509 const xmlChar *cur = ent->name;
2510
2511 /*
2512 * This may look absurd but is needed to detect
2513 * entities problems
2514 */
2515 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2516 (ent->content != NULL)) {
2517 xmlChar *rep;
2518 rep = xmlStringDecodeEntities(ctxt, ent->content,
2519 XML_SUBSTITUTE_REF, 0, 0, 0);
2520 if (rep != NULL)
2521 xmlFree(rep);
2522 }
2523
2524 /*
2525 * Just output the reference
2526 */
2527 buf[len++] = '&';
2528 if (len > buf_size - i - 10) {
2529 growBuffer(buf);
2530 }
2531 for (;i > 0;i--)
2532 buf[len++] = *cur++;
2533 buf[len++] = ';';
2534 }
2535 }
2536 } else {
2537 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2538 COPY_BUF(l,buf,len,0x20);
2539 if (len > buf_size - 10) {
2540 growBuffer(buf);
2541 }
2542 } else {
2543 COPY_BUF(l,buf,len,c);
2544 if (len > buf_size - 10) {
2545 growBuffer(buf);
2546 }
2547 }
2548 NEXTL(l);
2549 }
2550 GROW;
2551 c = CUR_CHAR(l);
2552 }
2553 buf[len++] = 0;
2554 if (RAW == '<') {
2555 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2557 ctxt->sax->error(ctxt->userData,
2558 "Unescaped '<' not allowed in attributes values\n");
2559 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002560 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002561 } else if (RAW != limit) {
2562 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2564 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2565 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002566 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002567 } else
2568 NEXT;
2569 return(buf);
2570}
2571
2572/**
2573 * xmlParseSystemLiteral:
2574 * @ctxt: an XML parser context
2575 *
2576 * parse an XML Literal
2577 *
2578 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2579 *
2580 * Returns the SystemLiteral parsed or NULL
2581 */
2582
2583xmlChar *
2584xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2585 xmlChar *buf = NULL;
2586 int len = 0;
2587 int size = XML_PARSER_BUFFER_SIZE;
2588 int cur, l;
2589 xmlChar stop;
2590 int state = ctxt->instate;
2591 int count = 0;
2592
2593 SHRINK;
2594 if (RAW == '"') {
2595 NEXT;
2596 stop = '"';
2597 } else if (RAW == '\'') {
2598 NEXT;
2599 stop = '\'';
2600 } else {
2601 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2603 ctxt->sax->error(ctxt->userData,
2604 "SystemLiteral \" or ' expected\n");
2605 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002606 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002607 return(NULL);
2608 }
2609
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002610 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002611 if (buf == NULL) {
2612 xmlGenericError(xmlGenericErrorContext,
2613 "malloc of %d byte failed\n", size);
2614 return(NULL);
2615 }
2616 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2617 cur = CUR_CHAR(l);
2618 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2619 if (len + 5 >= size) {
2620 size *= 2;
2621 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2622 if (buf == NULL) {
2623 xmlGenericError(xmlGenericErrorContext,
2624 "realloc of %d byte failed\n", size);
2625 ctxt->instate = (xmlParserInputState) state;
2626 return(NULL);
2627 }
2628 }
2629 count++;
2630 if (count > 50) {
2631 GROW;
2632 count = 0;
2633 }
2634 COPY_BUF(l,buf,len,cur);
2635 NEXTL(l);
2636 cur = CUR_CHAR(l);
2637 if (cur == 0) {
2638 GROW;
2639 SHRINK;
2640 cur = CUR_CHAR(l);
2641 }
2642 }
2643 buf[len] = 0;
2644 ctxt->instate = (xmlParserInputState) state;
2645 if (!IS_CHAR(cur)) {
2646 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2648 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2649 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002650 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002651 } else {
2652 NEXT;
2653 }
2654 return(buf);
2655}
2656
2657/**
2658 * xmlParsePubidLiteral:
2659 * @ctxt: an XML parser context
2660 *
2661 * parse an XML public literal
2662 *
2663 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2664 *
2665 * Returns the PubidLiteral parsed or NULL.
2666 */
2667
2668xmlChar *
2669xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2670 xmlChar *buf = NULL;
2671 int len = 0;
2672 int size = XML_PARSER_BUFFER_SIZE;
2673 xmlChar cur;
2674 xmlChar stop;
2675 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002676 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002677
2678 SHRINK;
2679 if (RAW == '"') {
2680 NEXT;
2681 stop = '"';
2682 } else if (RAW == '\'') {
2683 NEXT;
2684 stop = '\'';
2685 } else {
2686 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2688 ctxt->sax->error(ctxt->userData,
2689 "SystemLiteral \" or ' expected\n");
2690 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002691 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002692 return(NULL);
2693 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002694 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002695 if (buf == NULL) {
2696 xmlGenericError(xmlGenericErrorContext,
2697 "malloc of %d byte failed\n", size);
2698 return(NULL);
2699 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002700 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002701 cur = CUR;
2702 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2703 if (len + 1 >= size) {
2704 size *= 2;
2705 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2706 if (buf == NULL) {
2707 xmlGenericError(xmlGenericErrorContext,
2708 "realloc of %d byte failed\n", size);
2709 return(NULL);
2710 }
2711 }
2712 buf[len++] = cur;
2713 count++;
2714 if (count > 50) {
2715 GROW;
2716 count = 0;
2717 }
2718 NEXT;
2719 cur = CUR;
2720 if (cur == 0) {
2721 GROW;
2722 SHRINK;
2723 cur = CUR;
2724 }
2725 }
2726 buf[len] = 0;
2727 if (cur != stop) {
2728 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2730 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2731 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002732 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002733 } else {
2734 NEXT;
2735 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002736 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002737 return(buf);
2738}
2739
Daniel Veillard48b2f892001-02-25 16:11:03 +00002740void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002741/**
2742 * xmlParseCharData:
2743 * @ctxt: an XML parser context
2744 * @cdata: int indicating whether we are within a CDATA section
2745 *
2746 * parse a CharData section.
2747 * if we are within a CDATA section ']]>' marks an end of section.
2748 *
2749 * The right angle bracket (>) may be represented using the string "&gt;",
2750 * and must, for compatibility, be escaped using "&gt;" or a character
2751 * reference when it appears in the string "]]>" in content, when that
2752 * string is not marking the end of a CDATA section.
2753 *
2754 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2755 */
2756
2757void
2758xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002759 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002760 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002761 int line = ctxt->input->line;
2762 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002763
2764 SHRINK;
2765 GROW;
2766 /*
2767 * Accelerated common case where input don't need to be
2768 * modified before passing it to the handler.
2769 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002770 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002771 in = ctxt->input->cur;
2772 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002773get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002774 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2775 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002776 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002777 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002778 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002779 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002780 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002781 ctxt->input->line++;
2782 in++;
2783 }
2784 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002785 }
2786 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002787 if ((in[1] == ']') && (in[2] == '>')) {
2788 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2790 ctxt->sax->error(ctxt->userData,
2791 "Sequence ']]>' not allowed in content\n");
2792 ctxt->input->cur = in;
2793 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002794 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002795 return;
2796 }
2797 in++;
2798 goto get_more;
2799 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002800 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002801 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002802 if (IS_BLANK(*ctxt->input->cur)) {
2803 const xmlChar *tmp = ctxt->input->cur;
2804 ctxt->input->cur = in;
2805 if (areBlanks(ctxt, tmp, nbchar)) {
2806 if (ctxt->sax->ignorableWhitespace != NULL)
2807 ctxt->sax->ignorableWhitespace(ctxt->userData,
2808 tmp, nbchar);
2809 } else {
2810 if (ctxt->sax->characters != NULL)
2811 ctxt->sax->characters(ctxt->userData,
2812 tmp, nbchar);
2813 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002814 line = ctxt->input->line;
2815 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002816 } else {
2817 if (ctxt->sax->characters != NULL)
2818 ctxt->sax->characters(ctxt->userData,
2819 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002820 line = ctxt->input->line;
2821 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002822 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002823 }
2824 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002825 if (*in == 0xD) {
2826 in++;
2827 if (*in == 0xA) {
2828 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002829 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002830 ctxt->input->line++;
2831 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002832 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002833 in--;
2834 }
2835 if (*in == '<') {
2836 return;
2837 }
2838 if (*in == '&') {
2839 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002840 }
2841 SHRINK;
2842 GROW;
2843 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002844 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002845 nbchar = 0;
2846 }
Daniel Veillard50582112001-03-26 22:52:16 +00002847 ctxt->input->line = line;
2848 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002849 xmlParseCharDataComplex(ctxt, cdata);
2850}
2851
Daniel Veillard01c13b52002-12-10 15:19:08 +00002852/**
2853 * xmlParseCharDataComplex:
2854 * @ctxt: an XML parser context
2855 * @cdata: int indicating whether we are within a CDATA section
2856 *
2857 * parse a CharData section.this is the fallback function
2858 * of xmlParseCharData() when the parsing requires handling
2859 * of non-ASCII characters.
2860 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002861void
2862xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002863 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2864 int nbchar = 0;
2865 int cur, l;
2866 int count = 0;
2867
2868 SHRINK;
2869 GROW;
2870 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002871 while ((cur != '<') && /* checked */
2872 (cur != '&') &&
2873 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002874 if ((cur == ']') && (NXT(1) == ']') &&
2875 (NXT(2) == '>')) {
2876 if (cdata) break;
2877 else {
2878 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2880 ctxt->sax->error(ctxt->userData,
2881 "Sequence ']]>' not allowed in content\n");
2882 /* Should this be relaxed ??? I see a "must here */
2883 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002884 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002885 }
2886 }
2887 COPY_BUF(l,buf,nbchar,cur);
2888 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2889 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002890 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002891 */
2892 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2893 if (areBlanks(ctxt, buf, nbchar)) {
2894 if (ctxt->sax->ignorableWhitespace != NULL)
2895 ctxt->sax->ignorableWhitespace(ctxt->userData,
2896 buf, nbchar);
2897 } else {
2898 if (ctxt->sax->characters != NULL)
2899 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2900 }
2901 }
2902 nbchar = 0;
2903 }
2904 count++;
2905 if (count > 50) {
2906 GROW;
2907 count = 0;
2908 }
2909 NEXTL(l);
2910 cur = CUR_CHAR(l);
2911 }
2912 if (nbchar != 0) {
2913 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002914 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002915 */
2916 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2917 if (areBlanks(ctxt, buf, nbchar)) {
2918 if (ctxt->sax->ignorableWhitespace != NULL)
2919 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2920 } else {
2921 if (ctxt->sax->characters != NULL)
2922 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2923 }
2924 }
2925 }
2926}
2927
2928/**
2929 * xmlParseExternalID:
2930 * @ctxt: an XML parser context
2931 * @publicID: a xmlChar** receiving PubidLiteral
2932 * @strict: indicate whether we should restrict parsing to only
2933 * production [75], see NOTE below
2934 *
2935 * Parse an External ID or a Public ID
2936 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002937 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002938 * 'PUBLIC' S PubidLiteral S SystemLiteral
2939 *
2940 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2941 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2942 *
2943 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2944 *
2945 * Returns the function returns SystemLiteral and in the second
2946 * case publicID receives PubidLiteral, is strict is off
2947 * it is possible to return NULL and have publicID set.
2948 */
2949
2950xmlChar *
2951xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2952 xmlChar *URI = NULL;
2953
2954 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002955
2956 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002957 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2958 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2959 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2960 SKIP(6);
2961 if (!IS_BLANK(CUR)) {
2962 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2964 ctxt->sax->error(ctxt->userData,
2965 "Space required after 'SYSTEM'\n");
2966 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002967 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002968 }
2969 SKIP_BLANKS;
2970 URI = xmlParseSystemLiteral(ctxt);
2971 if (URI == NULL) {
2972 ctxt->errNo = XML_ERR_URI_REQUIRED;
2973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2974 ctxt->sax->error(ctxt->userData,
2975 "xmlParseExternalID: SYSTEM, no URI\n");
2976 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002977 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002978 }
2979 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2980 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2981 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2982 SKIP(6);
2983 if (!IS_BLANK(CUR)) {
2984 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2986 ctxt->sax->error(ctxt->userData,
2987 "Space required after 'PUBLIC'\n");
2988 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002989 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002990 }
2991 SKIP_BLANKS;
2992 *publicID = xmlParsePubidLiteral(ctxt);
2993 if (*publicID == NULL) {
2994 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2996 ctxt->sax->error(ctxt->userData,
2997 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2998 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002999 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003000 }
3001 if (strict) {
3002 /*
3003 * We don't handle [83] so "S SystemLiteral" is required.
3004 */
3005 if (!IS_BLANK(CUR)) {
3006 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3008 ctxt->sax->error(ctxt->userData,
3009 "Space required after the Public Identifier\n");
3010 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003011 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003012 }
3013 } else {
3014 /*
3015 * We handle [83] so we return immediately, if
3016 * "S SystemLiteral" is not detected. From a purely parsing
3017 * point of view that's a nice mess.
3018 */
3019 const xmlChar *ptr;
3020 GROW;
3021
3022 ptr = CUR_PTR;
3023 if (!IS_BLANK(*ptr)) return(NULL);
3024
3025 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3026 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3027 }
3028 SKIP_BLANKS;
3029 URI = xmlParseSystemLiteral(ctxt);
3030 if (URI == NULL) {
3031 ctxt->errNo = XML_ERR_URI_REQUIRED;
3032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3033 ctxt->sax->error(ctxt->userData,
3034 "xmlParseExternalID: PUBLIC, no URI\n");
3035 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003036 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003037 }
3038 }
3039 return(URI);
3040}
3041
3042/**
3043 * xmlParseComment:
3044 * @ctxt: an XML parser context
3045 *
3046 * Skip an XML (SGML) comment <!-- .... -->
3047 * The spec says that "For compatibility, the string "--" (double-hyphen)
3048 * must not occur within comments. "
3049 *
3050 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3051 */
3052void
3053xmlParseComment(xmlParserCtxtPtr ctxt) {
3054 xmlChar *buf = NULL;
3055 int len;
3056 int size = XML_PARSER_BUFFER_SIZE;
3057 int q, ql;
3058 int r, rl;
3059 int cur, l;
3060 xmlParserInputState state;
3061 xmlParserInputPtr input = ctxt->input;
3062 int count = 0;
3063
3064 /*
3065 * Check that there is a comment right here.
3066 */
3067 if ((RAW != '<') || (NXT(1) != '!') ||
3068 (NXT(2) != '-') || (NXT(3) != '-')) return;
3069
3070 state = ctxt->instate;
3071 ctxt->instate = XML_PARSER_COMMENT;
3072 SHRINK;
3073 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003074 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003075 if (buf == NULL) {
3076 xmlGenericError(xmlGenericErrorContext,
3077 "malloc of %d byte failed\n", size);
3078 ctxt->instate = state;
3079 return;
3080 }
3081 q = CUR_CHAR(ql);
3082 NEXTL(ql);
3083 r = CUR_CHAR(rl);
3084 NEXTL(rl);
3085 cur = CUR_CHAR(l);
3086 len = 0;
3087 while (IS_CHAR(cur) && /* checked */
3088 ((cur != '>') ||
3089 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003090 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003091 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3093 ctxt->sax->error(ctxt->userData,
3094 "Comment must not contain '--' (double-hyphen)`\n");
3095 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003096 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003097 }
3098 if (len + 5 >= size) {
3099 size *= 2;
3100 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3101 if (buf == NULL) {
3102 xmlGenericError(xmlGenericErrorContext,
3103 "realloc of %d byte failed\n", size);
3104 ctxt->instate = state;
3105 return;
3106 }
3107 }
3108 COPY_BUF(ql,buf,len,q);
3109 q = r;
3110 ql = rl;
3111 r = cur;
3112 rl = l;
3113
3114 count++;
3115 if (count > 50) {
3116 GROW;
3117 count = 0;
3118 }
3119 NEXTL(l);
3120 cur = CUR_CHAR(l);
3121 if (cur == 0) {
3122 SHRINK;
3123 GROW;
3124 cur = CUR_CHAR(l);
3125 }
3126 }
3127 buf[len] = 0;
3128 if (!IS_CHAR(cur)) {
3129 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3131 ctxt->sax->error(ctxt->userData,
3132 "Comment not terminated \n<!--%.50s\n", buf);
3133 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003134 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003135 xmlFree(buf);
3136 } else {
3137 if (input != ctxt->input) {
3138 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3139 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3140 ctxt->sax->error(ctxt->userData,
3141"Comment doesn't start and stop in the same entity\n");
3142 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003143 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003144 }
3145 NEXT;
3146 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3147 (!ctxt->disableSAX))
3148 ctxt->sax->comment(ctxt->userData, buf);
3149 xmlFree(buf);
3150 }
3151 ctxt->instate = state;
3152}
3153
3154/**
3155 * xmlParsePITarget:
3156 * @ctxt: an XML parser context
3157 *
3158 * parse the name of a PI
3159 *
3160 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3161 *
3162 * Returns the PITarget name or NULL
3163 */
3164
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003165const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003166xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003167 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003168
3169 name = xmlParseName(ctxt);
3170 if ((name != NULL) &&
3171 ((name[0] == 'x') || (name[0] == 'X')) &&
3172 ((name[1] == 'm') || (name[1] == 'M')) &&
3173 ((name[2] == 'l') || (name[2] == 'L'))) {
3174 int i;
3175 if ((name[0] == 'x') && (name[1] == 'm') &&
3176 (name[2] == 'l') && (name[3] == 0)) {
3177 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3179 ctxt->sax->error(ctxt->userData,
3180 "XML declaration allowed only at the start of the document\n");
3181 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003182 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003183 return(name);
3184 } else if (name[3] == 0) {
3185 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3187 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3188 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003189 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003190 return(name);
3191 }
3192 for (i = 0;;i++) {
3193 if (xmlW3CPIs[i] == NULL) break;
3194 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3195 return(name);
3196 }
3197 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3198 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3199 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003200 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003201 }
3202 }
3203 return(name);
3204}
3205
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003206#ifdef LIBXML_CATALOG_ENABLED
3207/**
3208 * xmlParseCatalogPI:
3209 * @ctxt: an XML parser context
3210 * @catalog: the PI value string
3211 *
3212 * parse an XML Catalog Processing Instruction.
3213 *
3214 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3215 *
3216 * Occurs only if allowed by the user and if happening in the Misc
3217 * part of the document before any doctype informations
3218 * This will add the given catalog to the parsing context in order
3219 * to be used if there is a resolution need further down in the document
3220 */
3221
3222static void
3223xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3224 xmlChar *URL = NULL;
3225 const xmlChar *tmp, *base;
3226 xmlChar marker;
3227
3228 tmp = catalog;
3229 while (IS_BLANK(*tmp)) tmp++;
3230 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3231 goto error;
3232 tmp += 7;
3233 while (IS_BLANK(*tmp)) tmp++;
3234 if (*tmp != '=') {
3235 return;
3236 }
3237 tmp++;
3238 while (IS_BLANK(*tmp)) tmp++;
3239 marker = *tmp;
3240 if ((marker != '\'') && (marker != '"'))
3241 goto error;
3242 tmp++;
3243 base = tmp;
3244 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3245 if (*tmp == 0)
3246 goto error;
3247 URL = xmlStrndup(base, tmp - base);
3248 tmp++;
3249 while (IS_BLANK(*tmp)) tmp++;
3250 if (*tmp != 0)
3251 goto error;
3252
3253 if (URL != NULL) {
3254 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3255 xmlFree(URL);
3256 }
3257 return;
3258
3259error:
3260 ctxt->errNo = XML_WAR_CATALOG_PI;
3261 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3262 ctxt->sax->warning(ctxt->userData,
3263 "Catalog PI syntax error: %s\n", catalog);
3264 if (URL != NULL)
3265 xmlFree(URL);
3266}
3267#endif
3268
Owen Taylor3473f882001-02-23 17:55:21 +00003269/**
3270 * xmlParsePI:
3271 * @ctxt: an XML parser context
3272 *
3273 * parse an XML Processing Instruction.
3274 *
3275 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3276 *
3277 * The processing is transfered to SAX once parsed.
3278 */
3279
3280void
3281xmlParsePI(xmlParserCtxtPtr ctxt) {
3282 xmlChar *buf = NULL;
3283 int len = 0;
3284 int size = XML_PARSER_BUFFER_SIZE;
3285 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003286 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003287 xmlParserInputState state;
3288 int count = 0;
3289
3290 if ((RAW == '<') && (NXT(1) == '?')) {
3291 xmlParserInputPtr input = ctxt->input;
3292 state = ctxt->instate;
3293 ctxt->instate = XML_PARSER_PI;
3294 /*
3295 * this is a Processing Instruction.
3296 */
3297 SKIP(2);
3298 SHRINK;
3299
3300 /*
3301 * Parse the target name and check for special support like
3302 * namespace.
3303 */
3304 target = xmlParsePITarget(ctxt);
3305 if (target != NULL) {
3306 if ((RAW == '?') && (NXT(1) == '>')) {
3307 if (input != ctxt->input) {
3308 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3310 ctxt->sax->error(ctxt->userData,
3311 "PI declaration doesn't start and stop in the same entity\n");
3312 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003313 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003314 }
3315 SKIP(2);
3316
3317 /*
3318 * SAX: PI detected.
3319 */
3320 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3321 (ctxt->sax->processingInstruction != NULL))
3322 ctxt->sax->processingInstruction(ctxt->userData,
3323 target, NULL);
3324 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003325 return;
3326 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003327 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003328 if (buf == NULL) {
3329 xmlGenericError(xmlGenericErrorContext,
3330 "malloc of %d byte failed\n", size);
3331 ctxt->instate = state;
3332 return;
3333 }
3334 cur = CUR;
3335 if (!IS_BLANK(cur)) {
3336 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3338 ctxt->sax->error(ctxt->userData,
3339 "xmlParsePI: PI %s space expected\n", target);
3340 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003341 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003342 }
3343 SKIP_BLANKS;
3344 cur = CUR_CHAR(l);
3345 while (IS_CHAR(cur) && /* checked */
3346 ((cur != '?') || (NXT(1) != '>'))) {
3347 if (len + 5 >= size) {
3348 size *= 2;
3349 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3350 if (buf == NULL) {
3351 xmlGenericError(xmlGenericErrorContext,
3352 "realloc of %d byte failed\n", size);
3353 ctxt->instate = state;
3354 return;
3355 }
3356 }
3357 count++;
3358 if (count > 50) {
3359 GROW;
3360 count = 0;
3361 }
3362 COPY_BUF(l,buf,len,cur);
3363 NEXTL(l);
3364 cur = CUR_CHAR(l);
3365 if (cur == 0) {
3366 SHRINK;
3367 GROW;
3368 cur = CUR_CHAR(l);
3369 }
3370 }
3371 buf[len] = 0;
3372 if (cur != '?') {
3373 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3375 ctxt->sax->error(ctxt->userData,
3376 "xmlParsePI: PI %s never end ...\n", target);
3377 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003378 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 } else {
3380 if (input != ctxt->input) {
3381 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3383 ctxt->sax->error(ctxt->userData,
3384 "PI declaration doesn't start and stop in the same entity\n");
3385 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003386 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003387 }
3388 SKIP(2);
3389
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003390#ifdef LIBXML_CATALOG_ENABLED
3391 if (((state == XML_PARSER_MISC) ||
3392 (state == XML_PARSER_START)) &&
3393 (xmlStrEqual(target, XML_CATALOG_PI))) {
3394 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3395 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3396 (allow == XML_CATA_ALLOW_ALL))
3397 xmlParseCatalogPI(ctxt, buf);
3398 }
3399#endif
3400
3401
Owen Taylor3473f882001-02-23 17:55:21 +00003402 /*
3403 * SAX: PI detected.
3404 */
3405 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3406 (ctxt->sax->processingInstruction != NULL))
3407 ctxt->sax->processingInstruction(ctxt->userData,
3408 target, buf);
3409 }
3410 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003411 } else {
3412 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3414 ctxt->sax->error(ctxt->userData,
3415 "xmlParsePI : no target name\n");
3416 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003417 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003418 }
3419 ctxt->instate = state;
3420 }
3421}
3422
3423/**
3424 * xmlParseNotationDecl:
3425 * @ctxt: an XML parser context
3426 *
3427 * parse a notation declaration
3428 *
3429 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3430 *
3431 * Hence there is actually 3 choices:
3432 * 'PUBLIC' S PubidLiteral
3433 * 'PUBLIC' S PubidLiteral S SystemLiteral
3434 * and 'SYSTEM' S SystemLiteral
3435 *
3436 * See the NOTE on xmlParseExternalID().
3437 */
3438
3439void
3440xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003441 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003442 xmlChar *Pubid;
3443 xmlChar *Systemid;
3444
3445 if ((RAW == '<') && (NXT(1) == '!') &&
3446 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3447 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3448 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3449 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3450 xmlParserInputPtr input = ctxt->input;
3451 SHRINK;
3452 SKIP(10);
3453 if (!IS_BLANK(CUR)) {
3454 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3456 ctxt->sax->error(ctxt->userData,
3457 "Space required after '<!NOTATION'\n");
3458 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003459 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003460 return;
3461 }
3462 SKIP_BLANKS;
3463
Daniel Veillard76d66f42001-05-16 21:05:17 +00003464 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003465 if (name == NULL) {
3466 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3468 ctxt->sax->error(ctxt->userData,
3469 "NOTATION: Name expected here\n");
3470 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003471 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003472 return;
3473 }
3474 if (!IS_BLANK(CUR)) {
3475 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3477 ctxt->sax->error(ctxt->userData,
3478 "Space required after the NOTATION name'\n");
3479 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003480 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003481 return;
3482 }
3483 SKIP_BLANKS;
3484
3485 /*
3486 * Parse the IDs.
3487 */
3488 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3489 SKIP_BLANKS;
3490
3491 if (RAW == '>') {
3492 if (input != ctxt->input) {
3493 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3495 ctxt->sax->error(ctxt->userData,
3496"Notation declaration doesn't start and stop in the same entity\n");
3497 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003498 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003499 }
3500 NEXT;
3501 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3502 (ctxt->sax->notationDecl != NULL))
3503 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3504 } else {
3505 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3507 ctxt->sax->error(ctxt->userData,
3508 "'>' required to close NOTATION declaration\n");
3509 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003510 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003511 }
Owen Taylor3473f882001-02-23 17:55:21 +00003512 if (Systemid != NULL) xmlFree(Systemid);
3513 if (Pubid != NULL) xmlFree(Pubid);
3514 }
3515}
3516
3517/**
3518 * xmlParseEntityDecl:
3519 * @ctxt: an XML parser context
3520 *
3521 * parse <!ENTITY declarations
3522 *
3523 * [70] EntityDecl ::= GEDecl | PEDecl
3524 *
3525 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3526 *
3527 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3528 *
3529 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3530 *
3531 * [74] PEDef ::= EntityValue | ExternalID
3532 *
3533 * [76] NDataDecl ::= S 'NDATA' S Name
3534 *
3535 * [ VC: Notation Declared ]
3536 * The Name must match the declared name of a notation.
3537 */
3538
3539void
3540xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003541 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003542 xmlChar *value = NULL;
3543 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003544 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003545 int isParameter = 0;
3546 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003547 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003548
3549 GROW;
3550 if ((RAW == '<') && (NXT(1) == '!') &&
3551 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3552 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3553 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3554 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003555 SHRINK;
3556 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003557 skipped = SKIP_BLANKS;
3558 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003559 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3561 ctxt->sax->error(ctxt->userData,
3562 "Space required after '<!ENTITY'\n");
3563 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003564 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003565 }
Owen Taylor3473f882001-02-23 17:55:21 +00003566
3567 if (RAW == '%') {
3568 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003569 skipped = SKIP_BLANKS;
3570 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003571 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3573 ctxt->sax->error(ctxt->userData,
3574 "Space required after '%'\n");
3575 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003576 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003577 }
Owen Taylor3473f882001-02-23 17:55:21 +00003578 isParameter = 1;
3579 }
3580
Daniel Veillard76d66f42001-05-16 21:05:17 +00003581 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003582 if (name == NULL) {
3583 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3585 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3586 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003587 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003588 return;
3589 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003590 skipped = SKIP_BLANKS;
3591 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003592 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3594 ctxt->sax->error(ctxt->userData,
3595 "Space required after the entity name\n");
3596 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003597 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003598 }
Owen Taylor3473f882001-02-23 17:55:21 +00003599
Daniel Veillardf5582f12002-06-11 10:08:16 +00003600 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003601 /*
3602 * handle the various case of definitions...
3603 */
3604 if (isParameter) {
3605 if ((RAW == '"') || (RAW == '\'')) {
3606 value = xmlParseEntityValue(ctxt, &orig);
3607 if (value) {
3608 if ((ctxt->sax != NULL) &&
3609 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3610 ctxt->sax->entityDecl(ctxt->userData, name,
3611 XML_INTERNAL_PARAMETER_ENTITY,
3612 NULL, NULL, value);
3613 }
3614 } else {
3615 URI = xmlParseExternalID(ctxt, &literal, 1);
3616 if ((URI == NULL) && (literal == NULL)) {
3617 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3619 ctxt->sax->error(ctxt->userData,
3620 "Entity value required\n");
3621 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003622 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003623 }
3624 if (URI) {
3625 xmlURIPtr uri;
3626
3627 uri = xmlParseURI((const char *) URI);
3628 if (uri == NULL) {
3629 ctxt->errNo = XML_ERR_INVALID_URI;
3630 if ((ctxt->sax != NULL) &&
3631 (!ctxt->disableSAX) &&
3632 (ctxt->sax->error != NULL))
3633 ctxt->sax->error(ctxt->userData,
3634 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003635 /*
3636 * This really ought to be a well formedness error
3637 * but the XML Core WG decided otherwise c.f. issue
3638 * E26 of the XML erratas.
3639 */
Owen Taylor3473f882001-02-23 17:55:21 +00003640 } else {
3641 if (uri->fragment != NULL) {
3642 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3643 if ((ctxt->sax != NULL) &&
3644 (!ctxt->disableSAX) &&
3645 (ctxt->sax->error != NULL))
3646 ctxt->sax->error(ctxt->userData,
3647 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003648 /*
3649 * Okay this is foolish to block those but not
3650 * invalid URIs.
3651 */
Owen Taylor3473f882001-02-23 17:55:21 +00003652 ctxt->wellFormed = 0;
3653 } else {
3654 if ((ctxt->sax != NULL) &&
3655 (!ctxt->disableSAX) &&
3656 (ctxt->sax->entityDecl != NULL))
3657 ctxt->sax->entityDecl(ctxt->userData, name,
3658 XML_EXTERNAL_PARAMETER_ENTITY,
3659 literal, URI, NULL);
3660 }
3661 xmlFreeURI(uri);
3662 }
3663 }
3664 }
3665 } else {
3666 if ((RAW == '"') || (RAW == '\'')) {
3667 value = xmlParseEntityValue(ctxt, &orig);
3668 if ((ctxt->sax != NULL) &&
3669 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3670 ctxt->sax->entityDecl(ctxt->userData, name,
3671 XML_INTERNAL_GENERAL_ENTITY,
3672 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003673 /*
3674 * For expat compatibility in SAX mode.
3675 */
3676 if ((ctxt->myDoc == NULL) ||
3677 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3678 if (ctxt->myDoc == NULL) {
3679 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3680 }
3681 if (ctxt->myDoc->intSubset == NULL)
3682 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3683 BAD_CAST "fake", NULL, NULL);
3684
Daniel Veillard1af9a412003-08-20 22:54:39 +00003685 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3686 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003687 }
Owen Taylor3473f882001-02-23 17:55:21 +00003688 } else {
3689 URI = xmlParseExternalID(ctxt, &literal, 1);
3690 if ((URI == NULL) && (literal == NULL)) {
3691 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3693 ctxt->sax->error(ctxt->userData,
3694 "Entity value required\n");
3695 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003696 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003697 }
3698 if (URI) {
3699 xmlURIPtr uri;
3700
3701 uri = xmlParseURI((const char *)URI);
3702 if (uri == NULL) {
3703 ctxt->errNo = XML_ERR_INVALID_URI;
3704 if ((ctxt->sax != NULL) &&
3705 (!ctxt->disableSAX) &&
3706 (ctxt->sax->error != NULL))
3707 ctxt->sax->error(ctxt->userData,
3708 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003709 /*
3710 * This really ought to be a well formedness error
3711 * but the XML Core WG decided otherwise c.f. issue
3712 * E26 of the XML erratas.
3713 */
Owen Taylor3473f882001-02-23 17:55:21 +00003714 } else {
3715 if (uri->fragment != NULL) {
3716 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3717 if ((ctxt->sax != NULL) &&
3718 (!ctxt->disableSAX) &&
3719 (ctxt->sax->error != NULL))
3720 ctxt->sax->error(ctxt->userData,
3721 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003722 /*
3723 * Okay this is foolish to block those but not
3724 * invalid URIs.
3725 */
Owen Taylor3473f882001-02-23 17:55:21 +00003726 ctxt->wellFormed = 0;
3727 }
3728 xmlFreeURI(uri);
3729 }
3730 }
3731 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3732 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3734 ctxt->sax->error(ctxt->userData,
3735 "Space required before 'NDATA'\n");
3736 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003737 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003738 }
3739 SKIP_BLANKS;
3740 if ((RAW == 'N') && (NXT(1) == 'D') &&
3741 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3742 (NXT(4) == 'A')) {
3743 SKIP(5);
3744 if (!IS_BLANK(CUR)) {
3745 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3747 ctxt->sax->error(ctxt->userData,
3748 "Space required after 'NDATA'\n");
3749 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003750 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003751 }
3752 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003753 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003754 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3755 (ctxt->sax->unparsedEntityDecl != NULL))
3756 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3757 literal, URI, ndata);
3758 } else {
3759 if ((ctxt->sax != NULL) &&
3760 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3761 ctxt->sax->entityDecl(ctxt->userData, name,
3762 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3763 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003764 /*
3765 * For expat compatibility in SAX mode.
3766 * assuming the entity repalcement was asked for
3767 */
3768 if ((ctxt->replaceEntities != 0) &&
3769 ((ctxt->myDoc == NULL) ||
3770 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3771 if (ctxt->myDoc == NULL) {
3772 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3773 }
3774
3775 if (ctxt->myDoc->intSubset == NULL)
3776 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3777 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00003778 xmlSAX2EntityDecl(ctxt, name,
3779 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3780 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003781 }
Owen Taylor3473f882001-02-23 17:55:21 +00003782 }
3783 }
3784 }
3785 SKIP_BLANKS;
3786 if (RAW != '>') {
3787 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3789 ctxt->sax->error(ctxt->userData,
3790 "xmlParseEntityDecl: entity %s not terminated\n", name);
3791 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003792 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003793 } else {
3794 if (input != ctxt->input) {
3795 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3797 ctxt->sax->error(ctxt->userData,
3798"Entity declaration doesn't start and stop in the same entity\n");
3799 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003800 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003801 }
3802 NEXT;
3803 }
3804 if (orig != NULL) {
3805 /*
3806 * Ugly mechanism to save the raw entity value.
3807 */
3808 xmlEntityPtr cur = NULL;
3809
3810 if (isParameter) {
3811 if ((ctxt->sax != NULL) &&
3812 (ctxt->sax->getParameterEntity != NULL))
3813 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3814 } else {
3815 if ((ctxt->sax != NULL) &&
3816 (ctxt->sax->getEntity != NULL))
3817 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003818 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00003819 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003820 }
Owen Taylor3473f882001-02-23 17:55:21 +00003821 }
3822 if (cur != NULL) {
3823 if (cur->orig != NULL)
3824 xmlFree(orig);
3825 else
3826 cur->orig = orig;
3827 } else
3828 xmlFree(orig);
3829 }
Owen Taylor3473f882001-02-23 17:55:21 +00003830 if (value != NULL) xmlFree(value);
3831 if (URI != NULL) xmlFree(URI);
3832 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00003833 }
3834}
3835
3836/**
3837 * xmlParseDefaultDecl:
3838 * @ctxt: an XML parser context
3839 * @value: Receive a possible fixed default value for the attribute
3840 *
3841 * Parse an attribute default declaration
3842 *
3843 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3844 *
3845 * [ VC: Required Attribute ]
3846 * if the default declaration is the keyword #REQUIRED, then the
3847 * attribute must be specified for all elements of the type in the
3848 * attribute-list declaration.
3849 *
3850 * [ VC: Attribute Default Legal ]
3851 * The declared default value must meet the lexical constraints of
3852 * the declared attribute type c.f. xmlValidateAttributeDecl()
3853 *
3854 * [ VC: Fixed Attribute Default ]
3855 * if an attribute has a default value declared with the #FIXED
3856 * keyword, instances of that attribute must match the default value.
3857 *
3858 * [ WFC: No < in Attribute Values ]
3859 * handled in xmlParseAttValue()
3860 *
3861 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3862 * or XML_ATTRIBUTE_FIXED.
3863 */
3864
3865int
3866xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3867 int val;
3868 xmlChar *ret;
3869
3870 *value = NULL;
3871 if ((RAW == '#') && (NXT(1) == 'R') &&
3872 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3873 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3874 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3875 (NXT(8) == 'D')) {
3876 SKIP(9);
3877 return(XML_ATTRIBUTE_REQUIRED);
3878 }
3879 if ((RAW == '#') && (NXT(1) == 'I') &&
3880 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3881 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3882 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3883 SKIP(8);
3884 return(XML_ATTRIBUTE_IMPLIED);
3885 }
3886 val = XML_ATTRIBUTE_NONE;
3887 if ((RAW == '#') && (NXT(1) == 'F') &&
3888 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3889 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3890 SKIP(6);
3891 val = XML_ATTRIBUTE_FIXED;
3892 if (!IS_BLANK(CUR)) {
3893 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3895 ctxt->sax->error(ctxt->userData,
3896 "Space required after '#FIXED'\n");
3897 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003898 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003899 }
3900 SKIP_BLANKS;
3901 }
3902 ret = xmlParseAttValue(ctxt);
3903 ctxt->instate = XML_PARSER_DTD;
3904 if (ret == NULL) {
3905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3906 ctxt->sax->error(ctxt->userData,
3907 "Attribute default value declaration error\n");
3908 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003909 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003910 } else
3911 *value = ret;
3912 return(val);
3913}
3914
3915/**
3916 * xmlParseNotationType:
3917 * @ctxt: an XML parser context
3918 *
3919 * parse an Notation attribute type.
3920 *
3921 * Note: the leading 'NOTATION' S part has already being parsed...
3922 *
3923 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3924 *
3925 * [ VC: Notation Attributes ]
3926 * Values of this type must match one of the notation names included
3927 * in the declaration; all notation names in the declaration must be declared.
3928 *
3929 * Returns: the notation attribute tree built while parsing
3930 */
3931
3932xmlEnumerationPtr
3933xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003934 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003935 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3936
3937 if (RAW != '(') {
3938 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3940 ctxt->sax->error(ctxt->userData,
3941 "'(' required to start 'NOTATION'\n");
3942 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003943 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003944 return(NULL);
3945 }
3946 SHRINK;
3947 do {
3948 NEXT;
3949 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003950 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003951 if (name == NULL) {
3952 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3954 ctxt->sax->error(ctxt->userData,
3955 "Name expected in NOTATION declaration\n");
3956 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003957 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003958 return(ret);
3959 }
3960 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00003961 if (cur == NULL) return(ret);
3962 if (last == NULL) ret = last = cur;
3963 else {
3964 last->next = cur;
3965 last = cur;
3966 }
3967 SKIP_BLANKS;
3968 } while (RAW == '|');
3969 if (RAW != ')') {
3970 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3972 ctxt->sax->error(ctxt->userData,
3973 "')' required to finish NOTATION declaration\n");
3974 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003975 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003976 if ((last != NULL) && (last != ret))
3977 xmlFreeEnumeration(last);
3978 return(ret);
3979 }
3980 NEXT;
3981 return(ret);
3982}
3983
3984/**
3985 * xmlParseEnumerationType:
3986 * @ctxt: an XML parser context
3987 *
3988 * parse an Enumeration attribute type.
3989 *
3990 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3991 *
3992 * [ VC: Enumeration ]
3993 * Values of this type must match one of the Nmtoken tokens in
3994 * the declaration
3995 *
3996 * Returns: the enumeration attribute tree built while parsing
3997 */
3998
3999xmlEnumerationPtr
4000xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4001 xmlChar *name;
4002 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4003
4004 if (RAW != '(') {
4005 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4007 ctxt->sax->error(ctxt->userData,
4008 "'(' required to start ATTLIST enumeration\n");
4009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004011 return(NULL);
4012 }
4013 SHRINK;
4014 do {
4015 NEXT;
4016 SKIP_BLANKS;
4017 name = xmlParseNmtoken(ctxt);
4018 if (name == NULL) {
4019 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4021 ctxt->sax->error(ctxt->userData,
4022 "NmToken expected in ATTLIST enumeration\n");
4023 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004024 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004025 return(ret);
4026 }
4027 cur = xmlCreateEnumeration(name);
4028 xmlFree(name);
4029 if (cur == NULL) return(ret);
4030 if (last == NULL) ret = last = cur;
4031 else {
4032 last->next = cur;
4033 last = cur;
4034 }
4035 SKIP_BLANKS;
4036 } while (RAW == '|');
4037 if (RAW != ')') {
4038 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4040 ctxt->sax->error(ctxt->userData,
4041 "')' required to finish ATTLIST enumeration\n");
4042 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004044 return(ret);
4045 }
4046 NEXT;
4047 return(ret);
4048}
4049
4050/**
4051 * xmlParseEnumeratedType:
4052 * @ctxt: an XML parser context
4053 * @tree: the enumeration tree built while parsing
4054 *
4055 * parse an Enumerated attribute type.
4056 *
4057 * [57] EnumeratedType ::= NotationType | Enumeration
4058 *
4059 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4060 *
4061 *
4062 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4063 */
4064
4065int
4066xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4067 if ((RAW == 'N') && (NXT(1) == 'O') &&
4068 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4069 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4070 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4071 SKIP(8);
4072 if (!IS_BLANK(CUR)) {
4073 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4075 ctxt->sax->error(ctxt->userData,
4076 "Space required after 'NOTATION'\n");
4077 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004078 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004079 return(0);
4080 }
4081 SKIP_BLANKS;
4082 *tree = xmlParseNotationType(ctxt);
4083 if (*tree == NULL) return(0);
4084 return(XML_ATTRIBUTE_NOTATION);
4085 }
4086 *tree = xmlParseEnumerationType(ctxt);
4087 if (*tree == NULL) return(0);
4088 return(XML_ATTRIBUTE_ENUMERATION);
4089}
4090
4091/**
4092 * xmlParseAttributeType:
4093 * @ctxt: an XML parser context
4094 * @tree: the enumeration tree built while parsing
4095 *
4096 * parse the Attribute list def for an element
4097 *
4098 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4099 *
4100 * [55] StringType ::= 'CDATA'
4101 *
4102 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4103 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4104 *
4105 * Validity constraints for attribute values syntax are checked in
4106 * xmlValidateAttributeValue()
4107 *
4108 * [ VC: ID ]
4109 * Values of type ID must match the Name production. A name must not
4110 * appear more than once in an XML document as a value of this type;
4111 * i.e., ID values must uniquely identify the elements which bear them.
4112 *
4113 * [ VC: One ID per Element Type ]
4114 * No element type may have more than one ID attribute specified.
4115 *
4116 * [ VC: ID Attribute Default ]
4117 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4118 *
4119 * [ VC: IDREF ]
4120 * Values of type IDREF must match the Name production, and values
4121 * of type IDREFS must match Names; each IDREF Name must match the value
4122 * of an ID attribute on some element in the XML document; i.e. IDREF
4123 * values must match the value of some ID attribute.
4124 *
4125 * [ VC: Entity Name ]
4126 * Values of type ENTITY must match the Name production, values
4127 * of type ENTITIES must match Names; each Entity Name must match the
4128 * name of an unparsed entity declared in the DTD.
4129 *
4130 * [ VC: Name Token ]
4131 * Values of type NMTOKEN must match the Nmtoken production; values
4132 * of type NMTOKENS must match Nmtokens.
4133 *
4134 * Returns the attribute type
4135 */
4136int
4137xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4138 SHRINK;
4139 if ((RAW == 'C') && (NXT(1) == 'D') &&
4140 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4141 (NXT(4) == 'A')) {
4142 SKIP(5);
4143 return(XML_ATTRIBUTE_CDATA);
4144 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4145 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4146 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4147 SKIP(6);
4148 return(XML_ATTRIBUTE_IDREFS);
4149 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4150 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4151 (NXT(4) == 'F')) {
4152 SKIP(5);
4153 return(XML_ATTRIBUTE_IDREF);
4154 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4155 SKIP(2);
4156 return(XML_ATTRIBUTE_ID);
4157 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4158 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4159 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4160 SKIP(6);
4161 return(XML_ATTRIBUTE_ENTITY);
4162 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4163 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4164 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4165 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4166 SKIP(8);
4167 return(XML_ATTRIBUTE_ENTITIES);
4168 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4169 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4170 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4171 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4172 SKIP(8);
4173 return(XML_ATTRIBUTE_NMTOKENS);
4174 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4175 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4176 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4177 (NXT(6) == 'N')) {
4178 SKIP(7);
4179 return(XML_ATTRIBUTE_NMTOKEN);
4180 }
4181 return(xmlParseEnumeratedType(ctxt, tree));
4182}
4183
4184/**
4185 * xmlParseAttributeListDecl:
4186 * @ctxt: an XML parser context
4187 *
4188 * : parse the Attribute list def for an element
4189 *
4190 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4191 *
4192 * [53] AttDef ::= S Name S AttType S DefaultDecl
4193 *
4194 */
4195void
4196xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004197 const xmlChar *elemName;
4198 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004199 xmlEnumerationPtr tree;
4200
4201 if ((RAW == '<') && (NXT(1) == '!') &&
4202 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4203 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4204 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4205 (NXT(8) == 'T')) {
4206 xmlParserInputPtr input = ctxt->input;
4207
4208 SKIP(9);
4209 if (!IS_BLANK(CUR)) {
4210 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4212 ctxt->sax->error(ctxt->userData,
4213 "Space required after '<!ATTLIST'\n");
4214 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004215 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004216 }
4217 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004218 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004219 if (elemName == NULL) {
4220 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4221 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4222 ctxt->sax->error(ctxt->userData,
4223 "ATTLIST: no name for Element\n");
4224 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004225 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004226 return;
4227 }
4228 SKIP_BLANKS;
4229 GROW;
4230 while (RAW != '>') {
4231 const xmlChar *check = CUR_PTR;
4232 int type;
4233 int def;
4234 xmlChar *defaultValue = NULL;
4235
4236 GROW;
4237 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004238 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004239 if (attrName == NULL) {
4240 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4242 ctxt->sax->error(ctxt->userData,
4243 "ATTLIST: no name for Attribute\n");
4244 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004245 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004246 break;
4247 }
4248 GROW;
4249 if (!IS_BLANK(CUR)) {
4250 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4251 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4252 ctxt->sax->error(ctxt->userData,
4253 "Space required after the attribute name\n");
4254 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004255 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004256 if (defaultValue != NULL)
4257 xmlFree(defaultValue);
4258 break;
4259 }
4260 SKIP_BLANKS;
4261
4262 type = xmlParseAttributeType(ctxt, &tree);
4263 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004264 if (defaultValue != NULL)
4265 xmlFree(defaultValue);
4266 break;
4267 }
4268
4269 GROW;
4270 if (!IS_BLANK(CUR)) {
4271 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4273 ctxt->sax->error(ctxt->userData,
4274 "Space required after the attribute type\n");
4275 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004276 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004277 if (defaultValue != NULL)
4278 xmlFree(defaultValue);
4279 if (tree != NULL)
4280 xmlFreeEnumeration(tree);
4281 break;
4282 }
4283 SKIP_BLANKS;
4284
4285 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4286 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004287 if (defaultValue != NULL)
4288 xmlFree(defaultValue);
4289 if (tree != NULL)
4290 xmlFreeEnumeration(tree);
4291 break;
4292 }
4293
4294 GROW;
4295 if (RAW != '>') {
4296 if (!IS_BLANK(CUR)) {
4297 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4298 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4299 ctxt->sax->error(ctxt->userData,
4300 "Space required after the attribute default value\n");
4301 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004302 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004303 if (defaultValue != NULL)
4304 xmlFree(defaultValue);
4305 if (tree != NULL)
4306 xmlFreeEnumeration(tree);
4307 break;
4308 }
4309 SKIP_BLANKS;
4310 }
4311 if (check == CUR_PTR) {
4312 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4314 ctxt->sax->error(ctxt->userData,
4315 "xmlParseAttributeListDecl: detected internal error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004316 if (defaultValue != NULL)
4317 xmlFree(defaultValue);
4318 if (tree != NULL)
4319 xmlFreeEnumeration(tree);
4320 break;
4321 }
4322 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4323 (ctxt->sax->attributeDecl != NULL))
4324 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4325 type, def, defaultValue, tree);
Owen Taylor3473f882001-02-23 17:55:21 +00004326 if (defaultValue != NULL)
4327 xmlFree(defaultValue);
4328 GROW;
4329 }
4330 if (RAW == '>') {
4331 if (input != ctxt->input) {
4332 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4334 ctxt->sax->error(ctxt->userData,
4335"Attribute list declaration doesn't start and stop in the same entity\n");
4336 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004337 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004338 }
4339 NEXT;
4340 }
Owen Taylor3473f882001-02-23 17:55:21 +00004341 }
4342}
4343
4344/**
4345 * xmlParseElementMixedContentDecl:
4346 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004347 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004348 *
4349 * parse the declaration for a Mixed Element content
4350 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4351 *
4352 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4353 * '(' S? '#PCDATA' S? ')'
4354 *
4355 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4356 *
4357 * [ VC: No Duplicate Types ]
4358 * The same name must not appear more than once in a single
4359 * mixed-content declaration.
4360 *
4361 * returns: the list of the xmlElementContentPtr describing the element choices
4362 */
4363xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004364xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004365 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004366 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004367
4368 GROW;
4369 if ((RAW == '#') && (NXT(1) == 'P') &&
4370 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4371 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4372 (NXT(6) == 'A')) {
4373 SKIP(7);
4374 SKIP_BLANKS;
4375 SHRINK;
4376 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004377 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4378 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4379 if (ctxt->vctxt.error != NULL)
4380 ctxt->vctxt.error(ctxt->vctxt.userData,
4381"Element content declaration doesn't start and stop in the same entity\n");
4382 ctxt->valid = 0;
4383 }
Owen Taylor3473f882001-02-23 17:55:21 +00004384 NEXT;
4385 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4386 if (RAW == '*') {
4387 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4388 NEXT;
4389 }
4390 return(ret);
4391 }
4392 if ((RAW == '(') || (RAW == '|')) {
4393 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4394 if (ret == NULL) return(NULL);
4395 }
4396 while (RAW == '|') {
4397 NEXT;
4398 if (elem == NULL) {
4399 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4400 if (ret == NULL) return(NULL);
4401 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004402 if (cur != NULL)
4403 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004404 cur = ret;
4405 } else {
4406 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4407 if (n == NULL) return(NULL);
4408 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004409 if (n->c1 != NULL)
4410 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004411 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004412 if (n != NULL)
4413 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004414 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004415 }
4416 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004417 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004418 if (elem == NULL) {
4419 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4420 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4421 ctxt->sax->error(ctxt->userData,
4422 "xmlParseElementMixedContentDecl : Name expected\n");
4423 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004424 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004425 xmlFreeElementContent(cur);
4426 return(NULL);
4427 }
4428 SKIP_BLANKS;
4429 GROW;
4430 }
4431 if ((RAW == ')') && (NXT(1) == '*')) {
4432 if (elem != NULL) {
4433 cur->c2 = xmlNewElementContent(elem,
4434 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004435 if (cur->c2 != NULL)
4436 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004437 }
4438 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004439 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4440 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4441 if (ctxt->vctxt.error != NULL)
4442 ctxt->vctxt.error(ctxt->vctxt.userData,
4443"Element content declaration doesn't start and stop in the same entity\n");
4444 ctxt->valid = 0;
4445 }
Owen Taylor3473f882001-02-23 17:55:21 +00004446 SKIP(2);
4447 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004448 xmlFreeElementContent(ret);
4449 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4450 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4451 ctxt->sax->error(ctxt->userData,
4452 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4453 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004454 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004455 return(NULL);
4456 }
4457
4458 } else {
4459 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4461 ctxt->sax->error(ctxt->userData,
4462 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4463 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004464 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004465 }
4466 return(ret);
4467}
4468
4469/**
4470 * xmlParseElementChildrenContentDecl:
4471 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004472 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004473 *
4474 * parse the declaration for a Mixed Element content
4475 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4476 *
4477 *
4478 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4479 *
4480 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4481 *
4482 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4483 *
4484 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4485 *
4486 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4487 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004488 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004489 * opening or closing parentheses in a choice, seq, or Mixed
4490 * construct is contained in the replacement text for a parameter
4491 * entity, both must be contained in the same replacement text. For
4492 * interoperability, if a parameter-entity reference appears in a
4493 * choice, seq, or Mixed construct, its replacement text should not
4494 * be empty, and neither the first nor last non-blank character of
4495 * the replacement text should be a connector (| or ,).
4496 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004497 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004498 * hierarchy.
4499 */
4500xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004501xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004502(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004503 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004504 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004505 xmlChar type = 0;
4506
4507 SKIP_BLANKS;
4508 GROW;
4509 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004510 xmlParserInputPtr input = ctxt->input;
4511
Owen Taylor3473f882001-02-23 17:55:21 +00004512 /* Recurse on first child */
4513 NEXT;
4514 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004515 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004516 SKIP_BLANKS;
4517 GROW;
4518 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004519 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004520 if (elem == NULL) {
4521 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4523 ctxt->sax->error(ctxt->userData,
4524 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4525 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004526 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004527 return(NULL);
4528 }
4529 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004530 if (cur == NULL) {
4531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4532 ctxt->sax->error(ctxt->userData,
4533 "xmlParseElementChildrenContentDecl : out of memory\n");
4534 ctxt->errNo = XML_ERR_NO_MEMORY;
4535 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004536 return(NULL);
4537 }
Owen Taylor3473f882001-02-23 17:55:21 +00004538 GROW;
4539 if (RAW == '?') {
4540 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4541 NEXT;
4542 } else if (RAW == '*') {
4543 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4544 NEXT;
4545 } else if (RAW == '+') {
4546 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4547 NEXT;
4548 } else {
4549 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4550 }
Owen Taylor3473f882001-02-23 17:55:21 +00004551 GROW;
4552 }
4553 SKIP_BLANKS;
4554 SHRINK;
4555 while (RAW != ')') {
4556 /*
4557 * Each loop we parse one separator and one element.
4558 */
4559 if (RAW == ',') {
4560 if (type == 0) type = CUR;
4561
4562 /*
4563 * Detect "Name | Name , Name" error
4564 */
4565 else if (type != CUR) {
4566 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4568 ctxt->sax->error(ctxt->userData,
4569 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4570 type);
4571 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004572 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004573 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004574 xmlFreeElementContent(last);
4575 if (ret != NULL)
4576 xmlFreeElementContent(ret);
4577 return(NULL);
4578 }
4579 NEXT;
4580
4581 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4582 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004583 if ((last != NULL) && (last != ret))
4584 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004585 xmlFreeElementContent(ret);
4586 return(NULL);
4587 }
4588 if (last == NULL) {
4589 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004590 if (ret != NULL)
4591 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004592 ret = cur = op;
4593 } else {
4594 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004595 if (op != NULL)
4596 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004597 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004598 if (last != NULL)
4599 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004600 cur =op;
4601 last = NULL;
4602 }
4603 } else if (RAW == '|') {
4604 if (type == 0) type = CUR;
4605
4606 /*
4607 * Detect "Name , Name | Name" error
4608 */
4609 else if (type != CUR) {
4610 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4612 ctxt->sax->error(ctxt->userData,
4613 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4614 type);
4615 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004616 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004617 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004618 xmlFreeElementContent(last);
4619 if (ret != NULL)
4620 xmlFreeElementContent(ret);
4621 return(NULL);
4622 }
4623 NEXT;
4624
4625 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4626 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004627 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004628 xmlFreeElementContent(last);
4629 if (ret != NULL)
4630 xmlFreeElementContent(ret);
4631 return(NULL);
4632 }
4633 if (last == NULL) {
4634 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004635 if (ret != NULL)
4636 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004637 ret = cur = op;
4638 } else {
4639 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004640 if (op != NULL)
4641 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004642 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004643 if (last != NULL)
4644 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004645 cur =op;
4646 last = NULL;
4647 }
4648 } else {
4649 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4651 ctxt->sax->error(ctxt->userData,
4652 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4653 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004654 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004655 if (ret != NULL)
4656 xmlFreeElementContent(ret);
4657 return(NULL);
4658 }
4659 GROW;
4660 SKIP_BLANKS;
4661 GROW;
4662 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004663 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004664 /* Recurse on second child */
4665 NEXT;
4666 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004667 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004668 SKIP_BLANKS;
4669 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004670 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004671 if (elem == NULL) {
4672 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4674 ctxt->sax->error(ctxt->userData,
4675 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4676 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004677 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004678 if (ret != NULL)
4679 xmlFreeElementContent(ret);
4680 return(NULL);
4681 }
4682 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004683 if (RAW == '?') {
4684 last->ocur = XML_ELEMENT_CONTENT_OPT;
4685 NEXT;
4686 } else if (RAW == '*') {
4687 last->ocur = XML_ELEMENT_CONTENT_MULT;
4688 NEXT;
4689 } else if (RAW == '+') {
4690 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4691 NEXT;
4692 } else {
4693 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4694 }
4695 }
4696 SKIP_BLANKS;
4697 GROW;
4698 }
4699 if ((cur != NULL) && (last != NULL)) {
4700 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004701 if (last != NULL)
4702 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004703 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004704 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4705 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4706 if (ctxt->vctxt.error != NULL)
4707 ctxt->vctxt.error(ctxt->vctxt.userData,
4708"Element content declaration doesn't start and stop in the same entity\n");
4709 ctxt->valid = 0;
4710 }
Owen Taylor3473f882001-02-23 17:55:21 +00004711 NEXT;
4712 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004713 if (ret != NULL)
4714 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004715 NEXT;
4716 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004717 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004718 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004719 cur = ret;
4720 /*
4721 * Some normalization:
4722 * (a | b* | c?)* == (a | b | c)*
4723 */
4724 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4725 if ((cur->c1 != NULL) &&
4726 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4727 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4728 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4729 if ((cur->c2 != NULL) &&
4730 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4731 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4732 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4733 cur = cur->c2;
4734 }
4735 }
Owen Taylor3473f882001-02-23 17:55:21 +00004736 NEXT;
4737 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004738 if (ret != NULL) {
4739 int found = 0;
4740
Daniel Veillarde470df72001-04-18 21:41:07 +00004741 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004742 /*
4743 * Some normalization:
4744 * (a | b*)+ == (a | b)*
4745 * (a | b?)+ == (a | b)*
4746 */
4747 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4748 if ((cur->c1 != NULL) &&
4749 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4750 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4751 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4752 found = 1;
4753 }
4754 if ((cur->c2 != NULL) &&
4755 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4756 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4757 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4758 found = 1;
4759 }
4760 cur = cur->c2;
4761 }
4762 if (found)
4763 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4764 }
Owen Taylor3473f882001-02-23 17:55:21 +00004765 NEXT;
4766 }
4767 return(ret);
4768}
4769
4770/**
4771 * xmlParseElementContentDecl:
4772 * @ctxt: an XML parser context
4773 * @name: the name of the element being defined.
4774 * @result: the Element Content pointer will be stored here if any
4775 *
4776 * parse the declaration for an Element content either Mixed or Children,
4777 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4778 *
4779 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4780 *
4781 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4782 */
4783
4784int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004785xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004786 xmlElementContentPtr *result) {
4787
4788 xmlElementContentPtr tree = NULL;
4789 xmlParserInputPtr input = ctxt->input;
4790 int res;
4791
4792 *result = NULL;
4793
4794 if (RAW != '(') {
4795 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4797 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004798 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004799 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004800 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004801 return(-1);
4802 }
4803 NEXT;
4804 GROW;
4805 SKIP_BLANKS;
4806 if ((RAW == '#') && (NXT(1) == 'P') &&
4807 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4808 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4809 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004810 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004811 res = XML_ELEMENT_TYPE_MIXED;
4812 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004813 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004814 res = XML_ELEMENT_TYPE_ELEMENT;
4815 }
Owen Taylor3473f882001-02-23 17:55:21 +00004816 SKIP_BLANKS;
4817 *result = tree;
4818 return(res);
4819}
4820
4821/**
4822 * xmlParseElementDecl:
4823 * @ctxt: an XML parser context
4824 *
4825 * parse an Element declaration.
4826 *
4827 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4828 *
4829 * [ VC: Unique Element Type Declaration ]
4830 * No element type may be declared more than once
4831 *
4832 * Returns the type of the element, or -1 in case of error
4833 */
4834int
4835xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004836 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004837 int ret = -1;
4838 xmlElementContentPtr content = NULL;
4839
4840 GROW;
4841 if ((RAW == '<') && (NXT(1) == '!') &&
4842 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4843 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4844 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4845 (NXT(8) == 'T')) {
4846 xmlParserInputPtr input = ctxt->input;
4847
4848 SKIP(9);
4849 if (!IS_BLANK(CUR)) {
4850 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4852 ctxt->sax->error(ctxt->userData,
4853 "Space required after 'ELEMENT'\n");
4854 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004855 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004856 }
4857 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004858 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004859 if (name == NULL) {
4860 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4862 ctxt->sax->error(ctxt->userData,
4863 "xmlParseElementDecl: no name for Element\n");
4864 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004865 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004866 return(-1);
4867 }
4868 while ((RAW == 0) && (ctxt->inputNr > 1))
4869 xmlPopInput(ctxt);
4870 if (!IS_BLANK(CUR)) {
4871 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4873 ctxt->sax->error(ctxt->userData,
4874 "Space required after the element name\n");
4875 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004876 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004877 }
4878 SKIP_BLANKS;
4879 if ((RAW == 'E') && (NXT(1) == 'M') &&
4880 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4881 (NXT(4) == 'Y')) {
4882 SKIP(5);
4883 /*
4884 * Element must always be empty.
4885 */
4886 ret = XML_ELEMENT_TYPE_EMPTY;
4887 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4888 (NXT(2) == 'Y')) {
4889 SKIP(3);
4890 /*
4891 * Element is a generic container.
4892 */
4893 ret = XML_ELEMENT_TYPE_ANY;
4894 } else if (RAW == '(') {
4895 ret = xmlParseElementContentDecl(ctxt, name, &content);
4896 } else {
4897 /*
4898 * [ WFC: PEs in Internal Subset ] error handling.
4899 */
4900 if ((RAW == '%') && (ctxt->external == 0) &&
4901 (ctxt->inputNr == 1)) {
4902 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4904 ctxt->sax->error(ctxt->userData,
4905 "PEReference: forbidden within markup decl in internal subset\n");
4906 } else {
4907 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4909 ctxt->sax->error(ctxt->userData,
4910 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4911 }
4912 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004913 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004914 return(-1);
4915 }
4916
4917 SKIP_BLANKS;
4918 /*
4919 * Pop-up of finished entities.
4920 */
4921 while ((RAW == 0) && (ctxt->inputNr > 1))
4922 xmlPopInput(ctxt);
4923 SKIP_BLANKS;
4924
4925 if (RAW != '>') {
4926 ctxt->errNo = XML_ERR_GT_REQUIRED;
4927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4928 ctxt->sax->error(ctxt->userData,
4929 "xmlParseElementDecl: expected '>' at the end\n");
4930 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004931 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004932 } else {
4933 if (input != ctxt->input) {
4934 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4936 ctxt->sax->error(ctxt->userData,
4937"Element declaration doesn't start and stop in the same entity\n");
4938 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004939 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004940 }
4941
4942 NEXT;
4943 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4944 (ctxt->sax->elementDecl != NULL))
4945 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4946 content);
4947 }
4948 if (content != NULL) {
4949 xmlFreeElementContent(content);
4950 }
Owen Taylor3473f882001-02-23 17:55:21 +00004951 }
4952 return(ret);
4953}
4954
4955/**
Owen Taylor3473f882001-02-23 17:55:21 +00004956 * xmlParseConditionalSections
4957 * @ctxt: an XML parser context
4958 *
4959 * [61] conditionalSect ::= includeSect | ignoreSect
4960 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4961 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4962 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4963 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4964 */
4965
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004966static void
Owen Taylor3473f882001-02-23 17:55:21 +00004967xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4968 SKIP(3);
4969 SKIP_BLANKS;
4970 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4971 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4972 (NXT(6) == 'E')) {
4973 SKIP(7);
4974 SKIP_BLANKS;
4975 if (RAW != '[') {
4976 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4978 ctxt->sax->error(ctxt->userData,
4979 "XML conditional section '[' expected\n");
4980 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004981 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004982 } else {
4983 NEXT;
4984 }
4985 if (xmlParserDebugEntities) {
4986 if ((ctxt->input != NULL) && (ctxt->input->filename))
4987 xmlGenericError(xmlGenericErrorContext,
4988 "%s(%d): ", ctxt->input->filename,
4989 ctxt->input->line);
4990 xmlGenericError(xmlGenericErrorContext,
4991 "Entering INCLUDE Conditional Section\n");
4992 }
4993
4994 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4995 (NXT(2) != '>'))) {
4996 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00004997 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00004998
4999 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5000 xmlParseConditionalSections(ctxt);
5001 } else if (IS_BLANK(CUR)) {
5002 NEXT;
5003 } else if (RAW == '%') {
5004 xmlParsePEReference(ctxt);
5005 } else
5006 xmlParseMarkupDecl(ctxt);
5007
5008 /*
5009 * Pop-up of finished entities.
5010 */
5011 while ((RAW == 0) && (ctxt->inputNr > 1))
5012 xmlPopInput(ctxt);
5013
Daniel Veillardfdc91562002-07-01 21:52:03 +00005014 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005015 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5017 ctxt->sax->error(ctxt->userData,
5018 "Content error in the external subset\n");
5019 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005020 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005021 break;
5022 }
5023 }
5024 if (xmlParserDebugEntities) {
5025 if ((ctxt->input != NULL) && (ctxt->input->filename))
5026 xmlGenericError(xmlGenericErrorContext,
5027 "%s(%d): ", ctxt->input->filename,
5028 ctxt->input->line);
5029 xmlGenericError(xmlGenericErrorContext,
5030 "Leaving INCLUDE Conditional Section\n");
5031 }
5032
5033 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5034 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5035 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005036 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005037 int depth = 0;
5038
5039 SKIP(6);
5040 SKIP_BLANKS;
5041 if (RAW != '[') {
5042 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5044 ctxt->sax->error(ctxt->userData,
5045 "XML conditional section '[' expected\n");
5046 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005047 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005048 } else {
5049 NEXT;
5050 }
5051 if (xmlParserDebugEntities) {
5052 if ((ctxt->input != NULL) && (ctxt->input->filename))
5053 xmlGenericError(xmlGenericErrorContext,
5054 "%s(%d): ", ctxt->input->filename,
5055 ctxt->input->line);
5056 xmlGenericError(xmlGenericErrorContext,
5057 "Entering IGNORE Conditional Section\n");
5058 }
5059
5060 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005061 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005062 * But disable SAX event generating DTD building in the meantime
5063 */
5064 state = ctxt->disableSAX;
5065 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005066 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005067 ctxt->instate = XML_PARSER_IGNORE;
5068
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005069 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005070 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5071 depth++;
5072 SKIP(3);
5073 continue;
5074 }
5075 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5076 if (--depth >= 0) SKIP(3);
5077 continue;
5078 }
5079 NEXT;
5080 continue;
5081 }
5082
5083 ctxt->disableSAX = state;
5084 ctxt->instate = instate;
5085
5086 if (xmlParserDebugEntities) {
5087 if ((ctxt->input != NULL) && (ctxt->input->filename))
5088 xmlGenericError(xmlGenericErrorContext,
5089 "%s(%d): ", ctxt->input->filename,
5090 ctxt->input->line);
5091 xmlGenericError(xmlGenericErrorContext,
5092 "Leaving IGNORE Conditional Section\n");
5093 }
5094
5095 } else {
5096 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5098 ctxt->sax->error(ctxt->userData,
5099 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5100 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005101 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005102 }
5103
5104 if (RAW == 0)
5105 SHRINK;
5106
5107 if (RAW == 0) {
5108 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5110 ctxt->sax->error(ctxt->userData,
5111 "XML conditional section not closed\n");
5112 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005113 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005114 } else {
5115 SKIP(3);
5116 }
5117}
5118
5119/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005120 * xmlParseMarkupDecl:
5121 * @ctxt: an XML parser context
5122 *
5123 * parse Markup declarations
5124 *
5125 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5126 * NotationDecl | PI | Comment
5127 *
5128 * [ VC: Proper Declaration/PE Nesting ]
5129 * Parameter-entity replacement text must be properly nested with
5130 * markup declarations. That is to say, if either the first character
5131 * or the last character of a markup declaration (markupdecl above) is
5132 * contained in the replacement text for a parameter-entity reference,
5133 * both must be contained in the same replacement text.
5134 *
5135 * [ WFC: PEs in Internal Subset ]
5136 * In the internal DTD subset, parameter-entity references can occur
5137 * only where markup declarations can occur, not within markup declarations.
5138 * (This does not apply to references that occur in external parameter
5139 * entities or to the external subset.)
5140 */
5141void
5142xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5143 GROW;
5144 xmlParseElementDecl(ctxt);
5145 xmlParseAttributeListDecl(ctxt);
5146 xmlParseEntityDecl(ctxt);
5147 xmlParseNotationDecl(ctxt);
5148 xmlParsePI(ctxt);
5149 xmlParseComment(ctxt);
5150 /*
5151 * This is only for internal subset. On external entities,
5152 * the replacement is done before parsing stage
5153 */
5154 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5155 xmlParsePEReference(ctxt);
5156
5157 /*
5158 * Conditional sections are allowed from entities included
5159 * by PE References in the internal subset.
5160 */
5161 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5162 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5163 xmlParseConditionalSections(ctxt);
5164 }
5165 }
5166
5167 ctxt->instate = XML_PARSER_DTD;
5168}
5169
5170/**
5171 * xmlParseTextDecl:
5172 * @ctxt: an XML parser context
5173 *
5174 * parse an XML declaration header for external entities
5175 *
5176 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5177 *
5178 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5179 */
5180
5181void
5182xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5183 xmlChar *version;
5184
5185 /*
5186 * We know that '<?xml' is here.
5187 */
5188 if ((RAW == '<') && (NXT(1) == '?') &&
5189 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5190 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5191 SKIP(5);
5192 } else {
5193 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5194 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5195 ctxt->sax->error(ctxt->userData,
5196 "Text declaration '<?xml' required\n");
5197 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005198 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005199
5200 return;
5201 }
5202
5203 if (!IS_BLANK(CUR)) {
5204 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5206 ctxt->sax->error(ctxt->userData,
5207 "Space needed after '<?xml'\n");
5208 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005209 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005210 }
5211 SKIP_BLANKS;
5212
5213 /*
5214 * We may have the VersionInfo here.
5215 */
5216 version = xmlParseVersionInfo(ctxt);
5217 if (version == NULL)
5218 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005219 else {
5220 if (!IS_BLANK(CUR)) {
5221 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5223 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5224 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005225 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005226 }
5227 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005228 ctxt->input->version = version;
5229
5230 /*
5231 * We must have the encoding declaration
5232 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005233 xmlParseEncodingDecl(ctxt);
5234 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5235 /*
5236 * The XML REC instructs us to stop parsing right here
5237 */
5238 return;
5239 }
5240
5241 SKIP_BLANKS;
5242 if ((RAW == '?') && (NXT(1) == '>')) {
5243 SKIP(2);
5244 } else if (RAW == '>') {
5245 /* Deprecated old WD ... */
5246 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5248 ctxt->sax->error(ctxt->userData,
5249 "XML declaration must end-up with '?>'\n");
5250 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005251 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005252 NEXT;
5253 } else {
5254 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5256 ctxt->sax->error(ctxt->userData,
5257 "parsing XML declaration: '?>' expected\n");
5258 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005259 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005260 MOVETO_ENDTAG(CUR_PTR);
5261 NEXT;
5262 }
5263}
5264
5265/**
Owen Taylor3473f882001-02-23 17:55:21 +00005266 * xmlParseExternalSubset:
5267 * @ctxt: an XML parser context
5268 * @ExternalID: the external identifier
5269 * @SystemID: the system identifier (or URL)
5270 *
5271 * parse Markup declarations from an external subset
5272 *
5273 * [30] extSubset ::= textDecl? extSubsetDecl
5274 *
5275 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5276 */
5277void
5278xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5279 const xmlChar *SystemID) {
5280 GROW;
5281 if ((RAW == '<') && (NXT(1) == '?') &&
5282 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5283 (NXT(4) == 'l')) {
5284 xmlParseTextDecl(ctxt);
5285 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5286 /*
5287 * The XML REC instructs us to stop parsing right here
5288 */
5289 ctxt->instate = XML_PARSER_EOF;
5290 return;
5291 }
5292 }
5293 if (ctxt->myDoc == NULL) {
5294 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5295 }
5296 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5297 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5298
5299 ctxt->instate = XML_PARSER_DTD;
5300 ctxt->external = 1;
5301 while (((RAW == '<') && (NXT(1) == '?')) ||
5302 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005303 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005304 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005305 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005306
5307 GROW;
5308 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5309 xmlParseConditionalSections(ctxt);
5310 } else if (IS_BLANK(CUR)) {
5311 NEXT;
5312 } else if (RAW == '%') {
5313 xmlParsePEReference(ctxt);
5314 } else
5315 xmlParseMarkupDecl(ctxt);
5316
5317 /*
5318 * Pop-up of finished entities.
5319 */
5320 while ((RAW == 0) && (ctxt->inputNr > 1))
5321 xmlPopInput(ctxt);
5322
Daniel Veillardfdc91562002-07-01 21:52:03 +00005323 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005324 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5326 ctxt->sax->error(ctxt->userData,
5327 "Content error in the external subset\n");
5328 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005329 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005330 break;
5331 }
5332 }
5333
5334 if (RAW != 0) {
5335 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5337 ctxt->sax->error(ctxt->userData,
5338 "Extra content at the end of the document\n");
5339 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005340 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005341 }
5342
5343}
5344
5345/**
5346 * xmlParseReference:
5347 * @ctxt: an XML parser context
5348 *
5349 * parse and handle entity references in content, depending on the SAX
5350 * interface, this may end-up in a call to character() if this is a
5351 * CharRef, a predefined entity, if there is no reference() callback.
5352 * or if the parser was asked to switch to that mode.
5353 *
5354 * [67] Reference ::= EntityRef | CharRef
5355 */
5356void
5357xmlParseReference(xmlParserCtxtPtr ctxt) {
5358 xmlEntityPtr ent;
5359 xmlChar *val;
5360 if (RAW != '&') return;
5361
5362 if (NXT(1) == '#') {
5363 int i = 0;
5364 xmlChar out[10];
5365 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005366 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005367
5368 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5369 /*
5370 * So we are using non-UTF-8 buffers
5371 * Check that the char fit on 8bits, if not
5372 * generate a CharRef.
5373 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005374 if (value <= 0xFF) {
5375 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005376 out[1] = 0;
5377 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5378 (!ctxt->disableSAX))
5379 ctxt->sax->characters(ctxt->userData, out, 1);
5380 } else {
5381 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005382 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005383 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005384 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005385 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5386 (!ctxt->disableSAX))
5387 ctxt->sax->reference(ctxt->userData, out);
5388 }
5389 } else {
5390 /*
5391 * Just encode the value in UTF-8
5392 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005393 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005394 out[i] = 0;
5395 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5396 (!ctxt->disableSAX))
5397 ctxt->sax->characters(ctxt->userData, out, i);
5398 }
5399 } else {
5400 ent = xmlParseEntityRef(ctxt);
5401 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005402 if (!ctxt->wellFormed)
5403 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005404 if ((ent->name != NULL) &&
5405 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5406 xmlNodePtr list = NULL;
5407 int ret;
5408
5409
5410 /*
5411 * The first reference to the entity trigger a parsing phase
5412 * where the ent->children is filled with the result from
5413 * the parsing.
5414 */
5415 if (ent->children == NULL) {
5416 xmlChar *value;
5417 value = ent->content;
5418
5419 /*
5420 * Check that this entity is well formed
5421 */
5422 if ((value != NULL) &&
5423 (value[1] == 0) && (value[0] == '<') &&
5424 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5425 /*
5426 * DONE: get definite answer on this !!!
5427 * Lots of entity decls are used to declare a single
5428 * char
5429 * <!ENTITY lt "<">
5430 * Which seems to be valid since
5431 * 2.4: The ampersand character (&) and the left angle
5432 * bracket (<) may appear in their literal form only
5433 * when used ... They are also legal within the literal
5434 * entity value of an internal entity declaration;i
5435 * see "4.3.2 Well-Formed Parsed Entities".
5436 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5437 * Looking at the OASIS test suite and James Clark
5438 * tests, this is broken. However the XML REC uses
5439 * it. Is the XML REC not well-formed ????
5440 * This is a hack to avoid this problem
5441 *
5442 * ANSWER: since lt gt amp .. are already defined,
5443 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005444 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005445 * is lousy but acceptable.
5446 */
5447 list = xmlNewDocText(ctxt->myDoc, value);
5448 if (list != NULL) {
5449 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5450 (ent->children == NULL)) {
5451 ent->children = list;
5452 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005453 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005454 list->parent = (xmlNodePtr) ent;
5455 } else {
5456 xmlFreeNodeList(list);
5457 }
5458 } else if (list != NULL) {
5459 xmlFreeNodeList(list);
5460 }
5461 } else {
5462 /*
5463 * 4.3.2: An internal general parsed entity is well-formed
5464 * if its replacement text matches the production labeled
5465 * content.
5466 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005467
5468 void *user_data;
5469 /*
5470 * This is a bit hackish but this seems the best
5471 * way to make sure both SAX and DOM entity support
5472 * behaves okay.
5473 */
5474 if (ctxt->userData == ctxt)
5475 user_data = NULL;
5476 else
5477 user_data = ctxt->userData;
5478
Owen Taylor3473f882001-02-23 17:55:21 +00005479 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5480 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005481 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5482 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005483 ctxt->depth--;
5484 } else if (ent->etype ==
5485 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5486 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005487 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005488 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005489 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005490 ctxt->depth--;
5491 } else {
5492 ret = -1;
5493 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5494 ctxt->sax->error(ctxt->userData,
5495 "Internal: invalid entity type\n");
5496 }
5497 if (ret == XML_ERR_ENTITY_LOOP) {
5498 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5500 ctxt->sax->error(ctxt->userData,
5501 "Detected entity reference loop\n");
5502 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005503 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005504 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005505 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005506 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5507 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005508 (ent->children == NULL)) {
5509 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005510 if (ctxt->replaceEntities) {
5511 /*
5512 * Prune it directly in the generated document
5513 * except for single text nodes.
5514 */
5515 if ((list->type == XML_TEXT_NODE) &&
5516 (list->next == NULL)) {
5517 list->parent = (xmlNodePtr) ent;
5518 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005519 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005520 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005521 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005522 while (list != NULL) {
5523 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005524 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005525 if (list->next == NULL)
5526 ent->last = list;
5527 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005528 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005529 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005530 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5531 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005532 }
5533 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005534 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005535 while (list != NULL) {
5536 list->parent = (xmlNodePtr) ent;
5537 if (list->next == NULL)
5538 ent->last = list;
5539 list = list->next;
5540 }
Owen Taylor3473f882001-02-23 17:55:21 +00005541 }
5542 } else {
5543 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005544 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005545 }
5546 } else if (ret > 0) {
5547 ctxt->errNo = ret;
5548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5549 ctxt->sax->error(ctxt->userData,
5550 "Entity value required\n");
5551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005553 } else if (list != NULL) {
5554 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005555 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005556 }
5557 }
5558 }
5559 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5560 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5561 /*
5562 * Create a node.
5563 */
5564 ctxt->sax->reference(ctxt->userData, ent->name);
5565 return;
5566 } else if (ctxt->replaceEntities) {
5567 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5568 /*
5569 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005570 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005571 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005572 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005573 if ((list == NULL) && (ent->owner == 0)) {
5574 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005575 cur = ent->children;
5576 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005577 nw = xmlCopyNode(cur, 1);
5578 if (nw != NULL) {
5579 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005580 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005581 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005582 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005583 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005584 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005585 if (cur == ent->last)
5586 break;
5587 cur = cur->next;
5588 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005589 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005590 xmlAddEntityReference(ent, firstChild, nw);
5591 } else if (list == NULL) {
5592 xmlNodePtr nw = NULL, cur, next, last,
5593 firstChild = NULL;
5594 /*
5595 * Copy the entity child list and make it the new
5596 * entity child list. The goal is to make sure any
5597 * ID or REF referenced will be the one from the
5598 * document content and not the entity copy.
5599 */
5600 cur = ent->children;
5601 ent->children = NULL;
5602 last = ent->last;
5603 ent->last = NULL;
5604 while (cur != NULL) {
5605 next = cur->next;
5606 cur->next = NULL;
5607 cur->parent = NULL;
5608 nw = xmlCopyNode(cur, 1);
5609 if (nw != NULL) {
5610 nw->_private = cur->_private;
5611 if (firstChild == NULL){
5612 firstChild = cur;
5613 }
5614 xmlAddChild((xmlNodePtr) ent, nw);
5615 xmlAddChild(ctxt->node, cur);
5616 }
5617 if (cur == last)
5618 break;
5619 cur = next;
5620 }
5621 ent->owner = 1;
5622 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5623 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005624 } else {
5625 /*
5626 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005627 * node with a possible previous text one which
5628 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005629 */
5630 if (ent->children->type == XML_TEXT_NODE)
5631 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5632 if ((ent->last != ent->children) &&
5633 (ent->last->type == XML_TEXT_NODE))
5634 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5635 xmlAddChildList(ctxt->node, ent->children);
5636 }
5637
Owen Taylor3473f882001-02-23 17:55:21 +00005638 /*
5639 * This is to avoid a nasty side effect, see
5640 * characters() in SAX.c
5641 */
5642 ctxt->nodemem = 0;
5643 ctxt->nodelen = 0;
5644 return;
5645 } else {
5646 /*
5647 * Probably running in SAX mode
5648 */
5649 xmlParserInputPtr input;
5650
5651 input = xmlNewEntityInputStream(ctxt, ent);
5652 xmlPushInput(ctxt, input);
5653 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5654 (RAW == '<') && (NXT(1) == '?') &&
5655 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5656 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5657 xmlParseTextDecl(ctxt);
5658 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5659 /*
5660 * The XML REC instructs us to stop parsing right here
5661 */
5662 ctxt->instate = XML_PARSER_EOF;
5663 return;
5664 }
5665 if (input->standalone == 1) {
5666 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5668 ctxt->sax->error(ctxt->userData,
5669 "external parsed entities cannot be standalone\n");
5670 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005671 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005672 }
5673 }
5674 return;
5675 }
5676 }
5677 } else {
5678 val = ent->content;
5679 if (val == NULL) return;
5680 /*
5681 * inline the entity.
5682 */
5683 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5684 (!ctxt->disableSAX))
5685 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5686 }
5687 }
5688}
5689
5690/**
5691 * xmlParseEntityRef:
5692 * @ctxt: an XML parser context
5693 *
5694 * parse ENTITY references declarations
5695 *
5696 * [68] EntityRef ::= '&' Name ';'
5697 *
5698 * [ WFC: Entity Declared ]
5699 * In a document without any DTD, a document with only an internal DTD
5700 * subset which contains no parameter entity references, or a document
5701 * with "standalone='yes'", the Name given in the entity reference
5702 * must match that in an entity declaration, except that well-formed
5703 * documents need not declare any of the following entities: amp, lt,
5704 * gt, apos, quot. The declaration of a parameter entity must precede
5705 * any reference to it. Similarly, the declaration of a general entity
5706 * must precede any reference to it which appears in a default value in an
5707 * attribute-list declaration. Note that if entities are declared in the
5708 * external subset or in external parameter entities, a non-validating
5709 * processor is not obligated to read and process their declarations;
5710 * for such documents, the rule that an entity must be declared is a
5711 * well-formedness constraint only if standalone='yes'.
5712 *
5713 * [ WFC: Parsed Entity ]
5714 * An entity reference must not contain the name of an unparsed entity
5715 *
5716 * Returns the xmlEntityPtr if found, or NULL otherwise.
5717 */
5718xmlEntityPtr
5719xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005720 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005721 xmlEntityPtr ent = NULL;
5722
5723 GROW;
5724
5725 if (RAW == '&') {
5726 NEXT;
5727 name = xmlParseName(ctxt);
5728 if (name == NULL) {
5729 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5731 ctxt->sax->error(ctxt->userData,
5732 "xmlParseEntityRef: no name\n");
5733 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005734 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005735 } else {
5736 if (RAW == ';') {
5737 NEXT;
5738 /*
5739 * Ask first SAX for entity resolution, otherwise try the
5740 * predefined set.
5741 */
5742 if (ctxt->sax != NULL) {
5743 if (ctxt->sax->getEntity != NULL)
5744 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005745 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005746 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005747 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5748 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005749 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005750 }
Owen Taylor3473f882001-02-23 17:55:21 +00005751 }
5752 /*
5753 * [ WFC: Entity Declared ]
5754 * In a document without any DTD, a document with only an
5755 * internal DTD subset which contains no parameter entity
5756 * references, or a document with "standalone='yes'", the
5757 * Name given in the entity reference must match that in an
5758 * entity declaration, except that well-formed documents
5759 * need not declare any of the following entities: amp, lt,
5760 * gt, apos, quot.
5761 * The declaration of a parameter entity must precede any
5762 * reference to it.
5763 * Similarly, the declaration of a general entity must
5764 * precede any reference to it which appears in a default
5765 * value in an attribute-list declaration. Note that if
5766 * entities are declared in the external subset or in
5767 * external parameter entities, a non-validating processor
5768 * is not obligated to read and process their declarations;
5769 * for such documents, the rule that an entity must be
5770 * declared is a well-formedness constraint only if
5771 * standalone='yes'.
5772 */
5773 if (ent == NULL) {
5774 if ((ctxt->standalone == 1) ||
5775 ((ctxt->hasExternalSubset == 0) &&
5776 (ctxt->hasPErefs == 0))) {
5777 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5779 ctxt->sax->error(ctxt->userData,
5780 "Entity '%s' not defined\n", name);
5781 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005782 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005783 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005784 } else {
5785 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005787 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005788 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005789 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005790 }
5791 }
5792
5793 /*
5794 * [ WFC: Parsed Entity ]
5795 * An entity reference must not contain the name of an
5796 * unparsed entity
5797 */
5798 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5799 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5801 ctxt->sax->error(ctxt->userData,
5802 "Entity reference to unparsed entity %s\n", name);
5803 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005804 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005805 }
5806
5807 /*
5808 * [ WFC: No External Entity References ]
5809 * Attribute values cannot contain direct or indirect
5810 * entity references to external entities.
5811 */
5812 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5813 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5814 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5815 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5816 ctxt->sax->error(ctxt->userData,
5817 "Attribute references external entity '%s'\n", name);
5818 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005819 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005820 }
5821 /*
5822 * [ WFC: No < in Attribute Values ]
5823 * The replacement text of any entity referred to directly or
5824 * indirectly in an attribute value (other than "&lt;") must
5825 * not contain a <.
5826 */
5827 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5828 (ent != NULL) &&
5829 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5830 (ent->content != NULL) &&
5831 (xmlStrchr(ent->content, '<'))) {
5832 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5834 ctxt->sax->error(ctxt->userData,
5835 "'<' in entity '%s' is not allowed in attributes values\n", name);
5836 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005837 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005838 }
5839
5840 /*
5841 * Internal check, no parameter entities here ...
5842 */
5843 else {
5844 switch (ent->etype) {
5845 case XML_INTERNAL_PARAMETER_ENTITY:
5846 case XML_EXTERNAL_PARAMETER_ENTITY:
5847 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5849 ctxt->sax->error(ctxt->userData,
5850 "Attempt to reference the parameter entity '%s'\n", name);
5851 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005852 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005853 break;
5854 default:
5855 break;
5856 }
5857 }
5858
5859 /*
5860 * [ WFC: No Recursion ]
5861 * A parsed entity must not contain a recursive reference
5862 * to itself, either directly or indirectly.
5863 * Done somewhere else
5864 */
5865
5866 } else {
5867 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5869 ctxt->sax->error(ctxt->userData,
5870 "xmlParseEntityRef: expecting ';'\n");
5871 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005872 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005873 }
Owen Taylor3473f882001-02-23 17:55:21 +00005874 }
5875 }
5876 return(ent);
5877}
5878
5879/**
5880 * xmlParseStringEntityRef:
5881 * @ctxt: an XML parser context
5882 * @str: a pointer to an index in the string
5883 *
5884 * parse ENTITY references declarations, but this version parses it from
5885 * a string value.
5886 *
5887 * [68] EntityRef ::= '&' Name ';'
5888 *
5889 * [ WFC: Entity Declared ]
5890 * In a document without any DTD, a document with only an internal DTD
5891 * subset which contains no parameter entity references, or a document
5892 * with "standalone='yes'", the Name given in the entity reference
5893 * must match that in an entity declaration, except that well-formed
5894 * documents need not declare any of the following entities: amp, lt,
5895 * gt, apos, quot. The declaration of a parameter entity must precede
5896 * any reference to it. Similarly, the declaration of a general entity
5897 * must precede any reference to it which appears in a default value in an
5898 * attribute-list declaration. Note that if entities are declared in the
5899 * external subset or in external parameter entities, a non-validating
5900 * processor is not obligated to read and process their declarations;
5901 * for such documents, the rule that an entity must be declared is a
5902 * well-formedness constraint only if standalone='yes'.
5903 *
5904 * [ WFC: Parsed Entity ]
5905 * An entity reference must not contain the name of an unparsed entity
5906 *
5907 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5908 * is updated to the current location in the string.
5909 */
5910xmlEntityPtr
5911xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5912 xmlChar *name;
5913 const xmlChar *ptr;
5914 xmlChar cur;
5915 xmlEntityPtr ent = NULL;
5916
5917 if ((str == NULL) || (*str == NULL))
5918 return(NULL);
5919 ptr = *str;
5920 cur = *ptr;
5921 if (cur == '&') {
5922 ptr++;
5923 cur = *ptr;
5924 name = xmlParseStringName(ctxt, &ptr);
5925 if (name == NULL) {
5926 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5928 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005929 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005930 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005931 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005932 } else {
5933 if (*ptr == ';') {
5934 ptr++;
5935 /*
5936 * Ask first SAX for entity resolution, otherwise try the
5937 * predefined set.
5938 */
5939 if (ctxt->sax != NULL) {
5940 if (ctxt->sax->getEntity != NULL)
5941 ent = ctxt->sax->getEntity(ctxt->userData, name);
5942 if (ent == NULL)
5943 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005944 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005945 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005946 }
Owen Taylor3473f882001-02-23 17:55:21 +00005947 }
5948 /*
5949 * [ WFC: Entity Declared ]
5950 * In a document without any DTD, a document with only an
5951 * internal DTD subset which contains no parameter entity
5952 * references, or a document with "standalone='yes'", the
5953 * Name given in the entity reference must match that in an
5954 * entity declaration, except that well-formed documents
5955 * need not declare any of the following entities: amp, lt,
5956 * gt, apos, quot.
5957 * The declaration of a parameter entity must precede any
5958 * reference to it.
5959 * Similarly, the declaration of a general entity must
5960 * precede any reference to it which appears in a default
5961 * value in an attribute-list declaration. Note that if
5962 * entities are declared in the external subset or in
5963 * external parameter entities, a non-validating processor
5964 * is not obligated to read and process their declarations;
5965 * for such documents, the rule that an entity must be
5966 * declared is a well-formedness constraint only if
5967 * standalone='yes'.
5968 */
5969 if (ent == NULL) {
5970 if ((ctxt->standalone == 1) ||
5971 ((ctxt->hasExternalSubset == 0) &&
5972 (ctxt->hasPErefs == 0))) {
5973 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5975 ctxt->sax->error(ctxt->userData,
5976 "Entity '%s' not defined\n", name);
5977 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005978 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005979 } else {
5980 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5981 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5982 ctxt->sax->warning(ctxt->userData,
5983 "Entity '%s' not defined\n", name);
5984 }
5985 }
5986
5987 /*
5988 * [ WFC: Parsed Entity ]
5989 * An entity reference must not contain the name of an
5990 * unparsed entity
5991 */
5992 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5993 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5995 ctxt->sax->error(ctxt->userData,
5996 "Entity reference to unparsed entity %s\n", name);
5997 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005998 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005999 }
6000
6001 /*
6002 * [ WFC: No External Entity References ]
6003 * Attribute values cannot contain direct or indirect
6004 * entity references to external entities.
6005 */
6006 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6007 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6008 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6010 ctxt->sax->error(ctxt->userData,
6011 "Attribute references external entity '%s'\n", name);
6012 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006013 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006014 }
6015 /*
6016 * [ WFC: No < in Attribute Values ]
6017 * The replacement text of any entity referred to directly or
6018 * indirectly in an attribute value (other than "&lt;") must
6019 * not contain a <.
6020 */
6021 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6022 (ent != NULL) &&
6023 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6024 (ent->content != NULL) &&
6025 (xmlStrchr(ent->content, '<'))) {
6026 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6028 ctxt->sax->error(ctxt->userData,
6029 "'<' in entity '%s' is not allowed in attributes values\n", name);
6030 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006031 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006032 }
6033
6034 /*
6035 * Internal check, no parameter entities here ...
6036 */
6037 else {
6038 switch (ent->etype) {
6039 case XML_INTERNAL_PARAMETER_ENTITY:
6040 case XML_EXTERNAL_PARAMETER_ENTITY:
6041 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6042 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6043 ctxt->sax->error(ctxt->userData,
6044 "Attempt to reference the parameter entity '%s'\n", name);
6045 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006046 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006047 break;
6048 default:
6049 break;
6050 }
6051 }
6052
6053 /*
6054 * [ WFC: No Recursion ]
6055 * A parsed entity must not contain a recursive reference
6056 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006057 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006058 */
6059
6060 } else {
6061 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6063 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006064 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006065 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006066 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006067 }
6068 xmlFree(name);
6069 }
6070 }
6071 *str = ptr;
6072 return(ent);
6073}
6074
6075/**
6076 * xmlParsePEReference:
6077 * @ctxt: an XML parser context
6078 *
6079 * parse PEReference declarations
6080 * The entity content is handled directly by pushing it's content as
6081 * a new input stream.
6082 *
6083 * [69] PEReference ::= '%' Name ';'
6084 *
6085 * [ WFC: No Recursion ]
6086 * A parsed entity must not contain a recursive
6087 * reference to itself, either directly or indirectly.
6088 *
6089 * [ WFC: Entity Declared ]
6090 * In a document without any DTD, a document with only an internal DTD
6091 * subset which contains no parameter entity references, or a document
6092 * with "standalone='yes'", ... ... The declaration of a parameter
6093 * entity must precede any reference to it...
6094 *
6095 * [ VC: Entity Declared ]
6096 * In a document with an external subset or external parameter entities
6097 * with "standalone='no'", ... ... The declaration of a parameter entity
6098 * must precede any reference to it...
6099 *
6100 * [ WFC: In DTD ]
6101 * Parameter-entity references may only appear in the DTD.
6102 * NOTE: misleading but this is handled.
6103 */
6104void
6105xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006106 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006107 xmlEntityPtr entity = NULL;
6108 xmlParserInputPtr input;
6109
6110 if (RAW == '%') {
6111 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006112 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006113 if (name == NULL) {
6114 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6116 ctxt->sax->error(ctxt->userData,
6117 "xmlParsePEReference: no name\n");
6118 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006119 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006120 } else {
6121 if (RAW == ';') {
6122 NEXT;
6123 if ((ctxt->sax != NULL) &&
6124 (ctxt->sax->getParameterEntity != NULL))
6125 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6126 name);
6127 if (entity == NULL) {
6128 /*
6129 * [ WFC: Entity Declared ]
6130 * In a document without any DTD, a document with only an
6131 * internal DTD subset which contains no parameter entity
6132 * references, or a document with "standalone='yes'", ...
6133 * ... The declaration of a parameter entity must precede
6134 * any reference to it...
6135 */
6136 if ((ctxt->standalone == 1) ||
6137 ((ctxt->hasExternalSubset == 0) &&
6138 (ctxt->hasPErefs == 0))) {
6139 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6140 if ((!ctxt->disableSAX) &&
6141 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6142 ctxt->sax->error(ctxt->userData,
6143 "PEReference: %%%s; not found\n", name);
6144 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006145 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006146 } else {
6147 /*
6148 * [ VC: Entity Declared ]
6149 * In a document with an external subset or external
6150 * parameter entities with "standalone='no'", ...
6151 * ... The declaration of a parameter entity must precede
6152 * any reference to it...
6153 */
6154 if ((!ctxt->disableSAX) &&
6155 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6156 ctxt->sax->warning(ctxt->userData,
6157 "PEReference: %%%s; not found\n", name);
6158 ctxt->valid = 0;
6159 }
6160 } else {
6161 /*
6162 * Internal checking in case the entity quest barfed
6163 */
6164 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6165 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6166 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6167 ctxt->sax->warning(ctxt->userData,
6168 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006169 } else if (ctxt->input->free != deallocblankswrapper) {
6170 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6171 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006172 } else {
6173 /*
6174 * TODO !!!
6175 * handle the extra spaces added before and after
6176 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6177 */
6178 input = xmlNewEntityInputStream(ctxt, entity);
6179 xmlPushInput(ctxt, input);
6180 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6181 (RAW == '<') && (NXT(1) == '?') &&
6182 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6183 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6184 xmlParseTextDecl(ctxt);
6185 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6186 /*
6187 * The XML REC instructs us to stop parsing
6188 * right here
6189 */
6190 ctxt->instate = XML_PARSER_EOF;
Owen Taylor3473f882001-02-23 17:55:21 +00006191 return;
6192 }
6193 }
Owen Taylor3473f882001-02-23 17:55:21 +00006194 }
6195 }
6196 ctxt->hasPErefs = 1;
6197 } else {
6198 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6200 ctxt->sax->error(ctxt->userData,
6201 "xmlParsePEReference: expecting ';'\n");
6202 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006203 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006204 }
Owen Taylor3473f882001-02-23 17:55:21 +00006205 }
6206 }
6207}
6208
6209/**
6210 * xmlParseStringPEReference:
6211 * @ctxt: an XML parser context
6212 * @str: a pointer to an index in the string
6213 *
6214 * parse PEReference declarations
6215 *
6216 * [69] PEReference ::= '%' Name ';'
6217 *
6218 * [ WFC: No Recursion ]
6219 * A parsed entity must not contain a recursive
6220 * reference to itself, either directly or indirectly.
6221 *
6222 * [ WFC: Entity Declared ]
6223 * In a document without any DTD, a document with only an internal DTD
6224 * subset which contains no parameter entity references, or a document
6225 * with "standalone='yes'", ... ... The declaration of a parameter
6226 * entity must precede any reference to it...
6227 *
6228 * [ VC: Entity Declared ]
6229 * In a document with an external subset or external parameter entities
6230 * with "standalone='no'", ... ... The declaration of a parameter entity
6231 * must precede any reference to it...
6232 *
6233 * [ WFC: In DTD ]
6234 * Parameter-entity references may only appear in the DTD.
6235 * NOTE: misleading but this is handled.
6236 *
6237 * Returns the string of the entity content.
6238 * str is updated to the current value of the index
6239 */
6240xmlEntityPtr
6241xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6242 const xmlChar *ptr;
6243 xmlChar cur;
6244 xmlChar *name;
6245 xmlEntityPtr entity = NULL;
6246
6247 if ((str == NULL) || (*str == NULL)) return(NULL);
6248 ptr = *str;
6249 cur = *ptr;
6250 if (cur == '%') {
6251 ptr++;
6252 cur = *ptr;
6253 name = xmlParseStringName(ctxt, &ptr);
6254 if (name == NULL) {
6255 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6257 ctxt->sax->error(ctxt->userData,
6258 "xmlParseStringPEReference: no name\n");
6259 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006260 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006261 } else {
6262 cur = *ptr;
6263 if (cur == ';') {
6264 ptr++;
6265 cur = *ptr;
6266 if ((ctxt->sax != NULL) &&
6267 (ctxt->sax->getParameterEntity != NULL))
6268 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6269 name);
6270 if (entity == NULL) {
6271 /*
6272 * [ WFC: Entity Declared ]
6273 * In a document without any DTD, a document with only an
6274 * internal DTD subset which contains no parameter entity
6275 * references, or a document with "standalone='yes'", ...
6276 * ... The declaration of a parameter entity must precede
6277 * any reference to it...
6278 */
6279 if ((ctxt->standalone == 1) ||
6280 ((ctxt->hasExternalSubset == 0) &&
6281 (ctxt->hasPErefs == 0))) {
6282 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6284 ctxt->sax->error(ctxt->userData,
6285 "PEReference: %%%s; not found\n", name);
6286 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006287 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006288 } else {
6289 /*
6290 * [ VC: Entity Declared ]
6291 * In a document with an external subset or external
6292 * parameter entities with "standalone='no'", ...
6293 * ... The declaration of a parameter entity must
6294 * precede any reference to it...
6295 */
6296 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6297 ctxt->sax->warning(ctxt->userData,
6298 "PEReference: %%%s; not found\n", name);
6299 ctxt->valid = 0;
6300 }
6301 } else {
6302 /*
6303 * Internal checking in case the entity quest barfed
6304 */
6305 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6306 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6307 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6308 ctxt->sax->warning(ctxt->userData,
6309 "Internal: %%%s; is not a parameter entity\n", name);
6310 }
6311 }
6312 ctxt->hasPErefs = 1;
6313 } else {
6314 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6316 ctxt->sax->error(ctxt->userData,
6317 "xmlParseStringPEReference: expecting ';'\n");
6318 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006319 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006320 }
6321 xmlFree(name);
6322 }
6323 }
6324 *str = ptr;
6325 return(entity);
6326}
6327
6328/**
6329 * xmlParseDocTypeDecl:
6330 * @ctxt: an XML parser context
6331 *
6332 * parse a DOCTYPE declaration
6333 *
6334 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6335 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6336 *
6337 * [ VC: Root Element Type ]
6338 * The Name in the document type declaration must match the element
6339 * type of the root element.
6340 */
6341
6342void
6343xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006344 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006345 xmlChar *ExternalID = NULL;
6346 xmlChar *URI = NULL;
6347
6348 /*
6349 * We know that '<!DOCTYPE' has been detected.
6350 */
6351 SKIP(9);
6352
6353 SKIP_BLANKS;
6354
6355 /*
6356 * Parse the DOCTYPE name.
6357 */
6358 name = xmlParseName(ctxt);
6359 if (name == NULL) {
6360 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6362 ctxt->sax->error(ctxt->userData,
6363 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6364 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006365 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006366 }
6367 ctxt->intSubName = name;
6368
6369 SKIP_BLANKS;
6370
6371 /*
6372 * Check for SystemID and ExternalID
6373 */
6374 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6375
6376 if ((URI != NULL) || (ExternalID != NULL)) {
6377 ctxt->hasExternalSubset = 1;
6378 }
6379 ctxt->extSubURI = URI;
6380 ctxt->extSubSystem = ExternalID;
6381
6382 SKIP_BLANKS;
6383
6384 /*
6385 * Create and update the internal subset.
6386 */
6387 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6388 (!ctxt->disableSAX))
6389 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6390
6391 /*
6392 * Is there any internal subset declarations ?
6393 * they are handled separately in xmlParseInternalSubset()
6394 */
6395 if (RAW == '[')
6396 return;
6397
6398 /*
6399 * We should be at the end of the DOCTYPE declaration.
6400 */
6401 if (RAW != '>') {
6402 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006404 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006405 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006406 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006407 }
6408 NEXT;
6409}
6410
6411/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006412 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006413 * @ctxt: an XML parser context
6414 *
6415 * parse the internal subset declaration
6416 *
6417 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6418 */
6419
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006420static void
Owen Taylor3473f882001-02-23 17:55:21 +00006421xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6422 /*
6423 * Is there any DTD definition ?
6424 */
6425 if (RAW == '[') {
6426 ctxt->instate = XML_PARSER_DTD;
6427 NEXT;
6428 /*
6429 * Parse the succession of Markup declarations and
6430 * PEReferences.
6431 * Subsequence (markupdecl | PEReference | S)*
6432 */
6433 while (RAW != ']') {
6434 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006435 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006436
6437 SKIP_BLANKS;
6438 xmlParseMarkupDecl(ctxt);
6439 xmlParsePEReference(ctxt);
6440
6441 /*
6442 * Pop-up of finished entities.
6443 */
6444 while ((RAW == 0) && (ctxt->inputNr > 1))
6445 xmlPopInput(ctxt);
6446
6447 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6448 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6450 ctxt->sax->error(ctxt->userData,
6451 "xmlParseInternalSubset: error detected in Markup declaration\n");
6452 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006453 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006454 break;
6455 }
6456 }
6457 if (RAW == ']') {
6458 NEXT;
6459 SKIP_BLANKS;
6460 }
6461 }
6462
6463 /*
6464 * We should be at the end of the DOCTYPE declaration.
6465 */
6466 if (RAW != '>') {
6467 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006469 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006470 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006471 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006472 }
6473 NEXT;
6474}
6475
6476/**
6477 * xmlParseAttribute:
6478 * @ctxt: an XML parser context
6479 * @value: a xmlChar ** used to store the value of the attribute
6480 *
6481 * parse an attribute
6482 *
6483 * [41] Attribute ::= Name Eq AttValue
6484 *
6485 * [ WFC: No External Entity References ]
6486 * Attribute values cannot contain direct or indirect entity references
6487 * to external entities.
6488 *
6489 * [ WFC: No < in Attribute Values ]
6490 * The replacement text of any entity referred to directly or indirectly in
6491 * an attribute value (other than "&lt;") must not contain a <.
6492 *
6493 * [ VC: Attribute Value Type ]
6494 * The attribute must have been declared; the value must be of the type
6495 * declared for it.
6496 *
6497 * [25] Eq ::= S? '=' S?
6498 *
6499 * With namespace:
6500 *
6501 * [NS 11] Attribute ::= QName Eq AttValue
6502 *
6503 * Also the case QName == xmlns:??? is handled independently as a namespace
6504 * definition.
6505 *
6506 * Returns the attribute name, and the value in *value.
6507 */
6508
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006509const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006510xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006511 const xmlChar *name;
6512 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006513
6514 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006515 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006516 name = xmlParseName(ctxt);
6517 if (name == NULL) {
6518 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6520 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6521 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006522 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006523 return(NULL);
6524 }
6525
6526 /*
6527 * read the value
6528 */
6529 SKIP_BLANKS;
6530 if (RAW == '=') {
6531 NEXT;
6532 SKIP_BLANKS;
6533 val = xmlParseAttValue(ctxt);
6534 ctxt->instate = XML_PARSER_CONTENT;
6535 } else {
6536 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6538 ctxt->sax->error(ctxt->userData,
6539 "Specification mandate value for attribute %s\n", name);
6540 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006541 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006542 return(NULL);
6543 }
6544
6545 /*
6546 * Check that xml:lang conforms to the specification
6547 * No more registered as an error, just generate a warning now
6548 * since this was deprecated in XML second edition
6549 */
6550 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6551 if (!xmlCheckLanguageID(val)) {
6552 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6553 ctxt->sax->warning(ctxt->userData,
6554 "Malformed value for xml:lang : %s\n", val);
6555 }
6556 }
6557
6558 /*
6559 * Check that xml:space conforms to the specification
6560 */
6561 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6562 if (xmlStrEqual(val, BAD_CAST "default"))
6563 *(ctxt->space) = 0;
6564 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6565 *(ctxt->space) = 1;
6566 else {
6567 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6569 ctxt->sax->error(ctxt->userData,
Daniel Veillard642104e2003-03-26 16:32:05 +00006570"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006571 val);
6572 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006573 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006574 }
6575 }
6576
6577 *value = val;
6578 return(name);
6579}
6580
6581/**
6582 * xmlParseStartTag:
6583 * @ctxt: an XML parser context
6584 *
6585 * parse a start of tag either for rule element or
6586 * EmptyElement. In both case we don't parse the tag closing chars.
6587 *
6588 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6589 *
6590 * [ WFC: Unique Att Spec ]
6591 * No attribute name may appear more than once in the same start-tag or
6592 * empty-element tag.
6593 *
6594 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6595 *
6596 * [ WFC: Unique Att Spec ]
6597 * No attribute name may appear more than once in the same start-tag or
6598 * empty-element tag.
6599 *
6600 * With namespace:
6601 *
6602 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6603 *
6604 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6605 *
6606 * Returns the element name parsed
6607 */
6608
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006609const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006610xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006611 const xmlChar *name;
6612 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006613 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006614 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006615 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006616 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006617 int i;
6618
6619 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006620 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006621
6622 name = xmlParseName(ctxt);
6623 if (name == NULL) {
6624 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6626 ctxt->sax->error(ctxt->userData,
6627 "xmlParseStartTag: invalid element name\n");
6628 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006629 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006630 return(NULL);
6631 }
6632
6633 /*
6634 * Now parse the attributes, it ends up with the ending
6635 *
6636 * (S Attribute)* S?
6637 */
6638 SKIP_BLANKS;
6639 GROW;
6640
Daniel Veillard21a0f912001-02-25 19:54:14 +00006641 while ((RAW != '>') &&
6642 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard34ba3872003-07-15 13:34:05 +00006643 (IS_CHAR((unsigned int) RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006644 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006645 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006646
6647 attname = xmlParseAttribute(ctxt, &attvalue);
6648 if ((attname != NULL) && (attvalue != NULL)) {
6649 /*
6650 * [ WFC: Unique Att Spec ]
6651 * No attribute name may appear more than once in the same
6652 * start-tag or empty-element tag.
6653 */
6654 for (i = 0; i < nbatts;i += 2) {
6655 if (xmlStrEqual(atts[i], attname)) {
6656 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6658 ctxt->sax->error(ctxt->userData,
6659 "Attribute %s redefined\n",
6660 attname);
6661 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006662 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006663 xmlFree(attvalue);
6664 goto failed;
6665 }
6666 }
6667
6668 /*
6669 * Add the pair to atts
6670 */
6671 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006672 maxatts = 22; /* allow for 10 attrs by default */
6673 atts = (const xmlChar **)
6674 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006675 if (atts == NULL) {
6676 xmlGenericError(xmlGenericErrorContext,
6677 "malloc of %ld byte failed\n",
6678 maxatts * (long)sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006679 if (attvalue != NULL)
6680 xmlFree(attvalue);
6681 ctxt->errNo = XML_ERR_NO_MEMORY;
6682 ctxt->instate = XML_PARSER_EOF;
6683 ctxt->disableSAX = 1;
6684 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006685 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006686 ctxt->atts = atts;
6687 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006688 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006689 const xmlChar **n;
6690
Owen Taylor3473f882001-02-23 17:55:21 +00006691 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006692 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006693 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006694 if (n == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006695 xmlGenericError(xmlGenericErrorContext,
6696 "realloc of %ld byte failed\n",
6697 maxatts * (long)sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006698 if (attvalue != NULL)
6699 xmlFree(attvalue);
6700 ctxt->errNo = XML_ERR_NO_MEMORY;
6701 ctxt->instate = XML_PARSER_EOF;
6702 ctxt->disableSAX = 1;
6703 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006704 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006705 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006706 ctxt->atts = atts;
6707 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006708 }
6709 atts[nbatts++] = attname;
6710 atts[nbatts++] = attvalue;
6711 atts[nbatts] = NULL;
6712 atts[nbatts + 1] = NULL;
6713 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006714 if (attvalue != NULL)
6715 xmlFree(attvalue);
6716 }
6717
6718failed:
6719
Daniel Veillard3772de32002-12-17 10:31:45 +00006720 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006721 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6722 break;
6723 if (!IS_BLANK(RAW)) {
6724 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6725 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6726 ctxt->sax->error(ctxt->userData,
6727 "attributes construct error\n");
6728 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006729 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006730 }
6731 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006732 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6733 (attname == NULL) && (attvalue == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006734 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6736 ctxt->sax->error(ctxt->userData,
6737 "xmlParseStartTag: problem parsing attributes\n");
6738 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006739 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006740 break;
6741 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006742 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006743 GROW;
6744 }
6745
6746 /*
6747 * SAX: Start of Element !
6748 */
6749 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006750 (!ctxt->disableSAX)) {
6751 if (nbatts > 0)
6752 ctxt->sax->startElement(ctxt->userData, name, atts);
6753 else
6754 ctxt->sax->startElement(ctxt->userData, name, NULL);
6755 }
Owen Taylor3473f882001-02-23 17:55:21 +00006756
6757 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006758 /* Free only the content strings */
6759 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006760 if (atts[i] != NULL)
6761 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006762 }
6763 return(name);
6764}
6765
6766/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006767 * xmlParseEndTagInternal:
Owen Taylor3473f882001-02-23 17:55:21 +00006768 * @ctxt: an XML parser context
6769 *
6770 * parse an end of tag
6771 *
6772 * [42] ETag ::= '</' Name S? '>'
6773 *
6774 * With namespace
6775 *
6776 * [NS 9] ETag ::= '</' QName S? '>'
6777 */
6778
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006779static void
6780xmlParseEndTagInternal(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006781 const xmlChar *name;
6782 const xmlChar *oldname;
Owen Taylor3473f882001-02-23 17:55:21 +00006783
6784 GROW;
6785 if ((RAW != '<') || (NXT(1) != '/')) {
6786 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6788 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6789 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006790 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006791 return;
6792 }
6793 SKIP(2);
6794
Daniel Veillard46de64e2002-05-29 08:21:33 +00006795 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006796
6797 /*
6798 * We should definitely be at the ending "S? '>'" part
6799 */
6800 GROW;
6801 SKIP_BLANKS;
Daniel Veillard34ba3872003-07-15 13:34:05 +00006802 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006803 ctxt->errNo = XML_ERR_GT_REQUIRED;
6804 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6805 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6806 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006807 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006808 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006809 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006810
6811 /*
6812 * [ WFC: Element Type Match ]
6813 * The Name in an element's end-tag must match the element type in the
6814 * start-tag.
6815 *
6816 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006817 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006818 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006820 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006821 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006822 "Opening and ending tag mismatch: %s line %d and %s\n",
6823 ctxt->name, line, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006824 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006825 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006826 "Ending tag error for: %s line %d\n", ctxt->name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00006827 }
6828
6829 }
6830 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006831 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006832 }
6833
6834 /*
6835 * SAX: End of Tag
6836 */
6837 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6838 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006839 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006840
Owen Taylor3473f882001-02-23 17:55:21 +00006841 oldname = namePop(ctxt);
6842 spacePop(ctxt);
6843 if (oldname != NULL) {
6844#ifdef DEBUG_STACK
6845 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6846#endif
Owen Taylor3473f882001-02-23 17:55:21 +00006847 }
6848 return;
6849}
6850
6851/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006852 * xmlParseEndTag:
6853 * @ctxt: an XML parser context
6854 *
6855 * parse an end of tag
6856 *
6857 * [42] ETag ::= '</' Name S? '>'
6858 *
6859 * With namespace
6860 *
6861 * [NS 9] ETag ::= '</' QName S? '>'
6862 */
6863
6864void
6865xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6866 xmlParseEndTagInternal(ctxt, 0);
6867}
6868
6869/**
Owen Taylor3473f882001-02-23 17:55:21 +00006870 * xmlParseCDSect:
6871 * @ctxt: an XML parser context
6872 *
6873 * Parse escaped pure raw content.
6874 *
6875 * [18] CDSect ::= CDStart CData CDEnd
6876 *
6877 * [19] CDStart ::= '<![CDATA['
6878 *
6879 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6880 *
6881 * [21] CDEnd ::= ']]>'
6882 */
6883void
6884xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6885 xmlChar *buf = NULL;
6886 int len = 0;
6887 int size = XML_PARSER_BUFFER_SIZE;
6888 int r, rl;
6889 int s, sl;
6890 int cur, l;
6891 int count = 0;
6892
6893 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6894 (NXT(2) == '[') && (NXT(3) == 'C') &&
6895 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6896 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6897 (NXT(8) == '[')) {
6898 SKIP(9);
6899 } else
6900 return;
6901
6902 ctxt->instate = XML_PARSER_CDATA_SECTION;
6903 r = CUR_CHAR(rl);
6904 if (!IS_CHAR(r)) {
6905 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6907 ctxt->sax->error(ctxt->userData,
6908 "CData section not finished\n");
6909 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006910 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006911 ctxt->instate = XML_PARSER_CONTENT;
6912 return;
6913 }
6914 NEXTL(rl);
6915 s = CUR_CHAR(sl);
6916 if (!IS_CHAR(s)) {
6917 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6919 ctxt->sax->error(ctxt->userData,
6920 "CData section not finished\n");
6921 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006922 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006923 ctxt->instate = XML_PARSER_CONTENT;
6924 return;
6925 }
6926 NEXTL(sl);
6927 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00006928 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00006929 if (buf == NULL) {
6930 xmlGenericError(xmlGenericErrorContext,
6931 "malloc of %d byte failed\n", size);
6932 return;
6933 }
6934 while (IS_CHAR(cur) &&
6935 ((r != ']') || (s != ']') || (cur != '>'))) {
6936 if (len + 5 >= size) {
6937 size *= 2;
6938 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6939 if (buf == NULL) {
6940 xmlGenericError(xmlGenericErrorContext,
6941 "realloc of %d byte failed\n", size);
6942 return;
6943 }
6944 }
6945 COPY_BUF(rl,buf,len,r);
6946 r = s;
6947 rl = sl;
6948 s = cur;
6949 sl = l;
6950 count++;
6951 if (count > 50) {
6952 GROW;
6953 count = 0;
6954 }
6955 NEXTL(l);
6956 cur = CUR_CHAR(l);
6957 }
6958 buf[len] = 0;
6959 ctxt->instate = XML_PARSER_CONTENT;
6960 if (cur != '>') {
6961 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6963 ctxt->sax->error(ctxt->userData,
6964 "CData section not finished\n%.50s\n", buf);
6965 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006966 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006967 xmlFree(buf);
6968 return;
6969 }
6970 NEXTL(l);
6971
6972 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006973 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006974 */
6975 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6976 if (ctxt->sax->cdataBlock != NULL)
6977 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006978 else if (ctxt->sax->characters != NULL)
6979 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006980 }
6981 xmlFree(buf);
6982}
6983
6984/**
6985 * xmlParseContent:
6986 * @ctxt: an XML parser context
6987 *
6988 * Parse a content:
6989 *
6990 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6991 */
6992
6993void
6994xmlParseContent(xmlParserCtxtPtr ctxt) {
6995 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006996 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006997 ((RAW != '<') || (NXT(1) != '/'))) {
6998 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006999 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007000 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007001
7002 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007003 * First case : a Processing Instruction.
7004 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007005 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007006 xmlParsePI(ctxt);
7007 }
7008
7009 /*
7010 * Second case : a CDSection
7011 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007012 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007013 (NXT(2) == '[') && (NXT(3) == 'C') &&
7014 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7015 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7016 (NXT(8) == '[')) {
7017 xmlParseCDSect(ctxt);
7018 }
7019
7020 /*
7021 * Third case : a comment
7022 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007023 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007024 (NXT(2) == '-') && (NXT(3) == '-')) {
7025 xmlParseComment(ctxt);
7026 ctxt->instate = XML_PARSER_CONTENT;
7027 }
7028
7029 /*
7030 * Fourth case : a sub-element.
7031 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007032 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007033 xmlParseElement(ctxt);
7034 }
7035
7036 /*
7037 * Fifth case : a reference. If if has not been resolved,
7038 * parsing returns it's Name, create the node
7039 */
7040
Daniel Veillard21a0f912001-02-25 19:54:14 +00007041 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007042 xmlParseReference(ctxt);
7043 }
7044
7045 /*
7046 * Last case, text. Note that References are handled directly.
7047 */
7048 else {
7049 xmlParseCharData(ctxt, 0);
7050 }
7051
7052 GROW;
7053 /*
7054 * Pop-up of finished entities.
7055 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007056 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007057 xmlPopInput(ctxt);
7058 SHRINK;
7059
Daniel Veillardfdc91562002-07-01 21:52:03 +00007060 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007061 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7063 ctxt->sax->error(ctxt->userData,
7064 "detected an error in element content\n");
7065 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007066 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007067 ctxt->instate = XML_PARSER_EOF;
7068 break;
7069 }
7070 }
7071}
7072
7073/**
7074 * xmlParseElement:
7075 * @ctxt: an XML parser context
7076 *
7077 * parse an XML element, this is highly recursive
7078 *
7079 * [39] element ::= EmptyElemTag | STag content ETag
7080 *
7081 * [ WFC: Element Type Match ]
7082 * The Name in an element's end-tag must match the element type in the
7083 * start-tag.
7084 *
7085 * [ VC: Element Valid ]
7086 * An element is valid if there is a declaration matching elementdecl
7087 * where the Name matches the element type and one of the following holds:
7088 * - The declaration matches EMPTY and the element has no content.
7089 * - The declaration matches children and the sequence of child elements
7090 * belongs to the language generated by the regular expression in the
7091 * content model, with optional white space (characters matching the
7092 * nonterminal S) between each pair of child elements.
7093 * - The declaration matches Mixed and the content consists of character
7094 * data and child elements whose types match names in the content model.
7095 * - The declaration matches ANY, and the types of any child elements have
7096 * been declared.
7097 */
7098
7099void
7100xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007101 const xmlChar *name;
7102 const xmlChar *oldname;
Owen Taylor3473f882001-02-23 17:55:21 +00007103 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007104 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00007105 xmlNodePtr ret;
7106
7107 /* Capture start position */
7108 if (ctxt->record_info) {
7109 node_info.begin_pos = ctxt->input->consumed +
7110 (CUR_PTR - ctxt->input->base);
7111 node_info.begin_line = ctxt->input->line;
7112 }
7113
7114 if (ctxt->spaceNr == 0)
7115 spacePush(ctxt, -1);
7116 else
7117 spacePush(ctxt, *ctxt->space);
7118
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007119 line = ctxt->input->line;
Owen Taylor3473f882001-02-23 17:55:21 +00007120 name = xmlParseStartTag(ctxt);
7121 if (name == NULL) {
7122 spacePop(ctxt);
7123 return;
7124 }
7125 namePush(ctxt, name);
7126 ret = ctxt->node;
7127
7128 /*
7129 * [ VC: Root Element Type ]
7130 * The Name in the document type declaration must match the element
7131 * type of the root element.
7132 */
7133 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7134 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7135 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7136
7137 /*
7138 * Check for an Empty Element.
7139 */
7140 if ((RAW == '/') && (NXT(1) == '>')) {
7141 SKIP(2);
7142 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7143 (!ctxt->disableSAX))
7144 ctxt->sax->endElement(ctxt->userData, name);
7145 oldname = namePop(ctxt);
7146 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007147#ifdef DEBUG_STACK
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007148 if (oldname != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00007149 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Owen Taylor3473f882001-02-23 17:55:21 +00007150 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007151#endif
Owen Taylor3473f882001-02-23 17:55:21 +00007152 if ( ret != NULL && ctxt->record_info ) {
7153 node_info.end_pos = ctxt->input->consumed +
7154 (CUR_PTR - ctxt->input->base);
7155 node_info.end_line = ctxt->input->line;
7156 node_info.node = ret;
7157 xmlParserAddNodeInfo(ctxt, &node_info);
7158 }
7159 return;
7160 }
7161 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007162 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007163 } else {
7164 ctxt->errNo = XML_ERR_GT_REQUIRED;
7165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7166 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007167 "Couldn't find end of Start Tag %s line %d\n",
7168 name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007169 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007170 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007171
7172 /*
7173 * end of parsing of this node.
7174 */
7175 nodePop(ctxt);
7176 oldname = namePop(ctxt);
7177 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007178#ifdef DEBUG_STACK
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007179 if (oldname != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00007180 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Owen Taylor3473f882001-02-23 17:55:21 +00007181 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007182#endif
Owen Taylor3473f882001-02-23 17:55:21 +00007183
7184 /*
7185 * Capture end position and add node
7186 */
7187 if ( ret != NULL && ctxt->record_info ) {
7188 node_info.end_pos = ctxt->input->consumed +
7189 (CUR_PTR - ctxt->input->base);
7190 node_info.end_line = ctxt->input->line;
7191 node_info.node = ret;
7192 xmlParserAddNodeInfo(ctxt, &node_info);
7193 }
7194 return;
7195 }
7196
7197 /*
7198 * Parse the content of the element:
7199 */
7200 xmlParseContent(ctxt);
Daniel Veillard34ba3872003-07-15 13:34:05 +00007201 if (!IS_CHAR((unsigned int) RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007202 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7204 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007205 "Premature end of data in tag %s line %d\n", name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007206 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007207 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007208
7209 /*
7210 * end of parsing of this node.
7211 */
7212 nodePop(ctxt);
7213 oldname = namePop(ctxt);
7214 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007215#ifdef DEBUG_STACK
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007216 if (oldname != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00007217 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Owen Taylor3473f882001-02-23 17:55:21 +00007218 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007219#endif
Owen Taylor3473f882001-02-23 17:55:21 +00007220 return;
7221 }
7222
7223 /*
7224 * parse the end of tag: '</' should be here.
7225 */
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007226 xmlParseEndTagInternal(ctxt, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007227
7228 /*
7229 * Capture end position and add node
7230 */
7231 if ( ret != NULL && ctxt->record_info ) {
7232 node_info.end_pos = ctxt->input->consumed +
7233 (CUR_PTR - ctxt->input->base);
7234 node_info.end_line = ctxt->input->line;
7235 node_info.node = ret;
7236 xmlParserAddNodeInfo(ctxt, &node_info);
7237 }
7238}
7239
7240/**
7241 * xmlParseVersionNum:
7242 * @ctxt: an XML parser context
7243 *
7244 * parse the XML version value.
7245 *
7246 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7247 *
7248 * Returns the string giving the XML version number, or NULL
7249 */
7250xmlChar *
7251xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7252 xmlChar *buf = NULL;
7253 int len = 0;
7254 int size = 10;
7255 xmlChar cur;
7256
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007257 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007258 if (buf == NULL) {
7259 xmlGenericError(xmlGenericErrorContext,
7260 "malloc of %d byte failed\n", size);
7261 return(NULL);
7262 }
7263 cur = CUR;
7264 while (((cur >= 'a') && (cur <= 'z')) ||
7265 ((cur >= 'A') && (cur <= 'Z')) ||
7266 ((cur >= '0') && (cur <= '9')) ||
7267 (cur == '_') || (cur == '.') ||
7268 (cur == ':') || (cur == '-')) {
7269 if (len + 1 >= size) {
7270 size *= 2;
7271 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7272 if (buf == NULL) {
7273 xmlGenericError(xmlGenericErrorContext,
7274 "realloc of %d byte failed\n", size);
7275 return(NULL);
7276 }
7277 }
7278 buf[len++] = cur;
7279 NEXT;
7280 cur=CUR;
7281 }
7282 buf[len] = 0;
7283 return(buf);
7284}
7285
7286/**
7287 * xmlParseVersionInfo:
7288 * @ctxt: an XML parser context
7289 *
7290 * parse the XML version.
7291 *
7292 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7293 *
7294 * [25] Eq ::= S? '=' S?
7295 *
7296 * Returns the version string, e.g. "1.0"
7297 */
7298
7299xmlChar *
7300xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7301 xmlChar *version = NULL;
7302 const xmlChar *q;
7303
7304 if ((RAW == 'v') && (NXT(1) == 'e') &&
7305 (NXT(2) == 'r') && (NXT(3) == 's') &&
7306 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7307 (NXT(6) == 'n')) {
7308 SKIP(7);
7309 SKIP_BLANKS;
7310 if (RAW != '=') {
7311 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7313 ctxt->sax->error(ctxt->userData,
7314 "xmlParseVersionInfo : expected '='\n");
7315 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007316 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007317 return(NULL);
7318 }
7319 NEXT;
7320 SKIP_BLANKS;
7321 if (RAW == '"') {
7322 NEXT;
7323 q = CUR_PTR;
7324 version = xmlParseVersionNum(ctxt);
7325 if (RAW != '"') {
7326 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7328 ctxt->sax->error(ctxt->userData,
7329 "String not closed\n%.50s\n", q);
7330 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007331 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007332 } else
7333 NEXT;
7334 } else if (RAW == '\''){
7335 NEXT;
7336 q = CUR_PTR;
7337 version = xmlParseVersionNum(ctxt);
7338 if (RAW != '\'') {
7339 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7341 ctxt->sax->error(ctxt->userData,
7342 "String not closed\n%.50s\n", q);
7343 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007344 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007345 } else
7346 NEXT;
7347 } else {
7348 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7350 ctxt->sax->error(ctxt->userData,
7351 "xmlParseVersionInfo : expected ' or \"\n");
7352 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007353 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007354 }
7355 }
7356 return(version);
7357}
7358
7359/**
7360 * xmlParseEncName:
7361 * @ctxt: an XML parser context
7362 *
7363 * parse the XML encoding name
7364 *
7365 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7366 *
7367 * Returns the encoding name value or NULL
7368 */
7369xmlChar *
7370xmlParseEncName(xmlParserCtxtPtr ctxt) {
7371 xmlChar *buf = NULL;
7372 int len = 0;
7373 int size = 10;
7374 xmlChar cur;
7375
7376 cur = CUR;
7377 if (((cur >= 'a') && (cur <= 'z')) ||
7378 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007379 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007380 if (buf == NULL) {
7381 xmlGenericError(xmlGenericErrorContext,
7382 "malloc of %d byte failed\n", size);
7383 return(NULL);
7384 }
7385
7386 buf[len++] = cur;
7387 NEXT;
7388 cur = CUR;
7389 while (((cur >= 'a') && (cur <= 'z')) ||
7390 ((cur >= 'A') && (cur <= 'Z')) ||
7391 ((cur >= '0') && (cur <= '9')) ||
7392 (cur == '.') || (cur == '_') ||
7393 (cur == '-')) {
7394 if (len + 1 >= size) {
7395 size *= 2;
7396 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7397 if (buf == NULL) {
7398 xmlGenericError(xmlGenericErrorContext,
7399 "realloc of %d byte failed\n", size);
7400 return(NULL);
7401 }
7402 }
7403 buf[len++] = cur;
7404 NEXT;
7405 cur = CUR;
7406 if (cur == 0) {
7407 SHRINK;
7408 GROW;
7409 cur = CUR;
7410 }
7411 }
7412 buf[len] = 0;
7413 } else {
7414 ctxt->errNo = XML_ERR_ENCODING_NAME;
7415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7416 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7417 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007418 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007419 }
7420 return(buf);
7421}
7422
7423/**
7424 * xmlParseEncodingDecl:
7425 * @ctxt: an XML parser context
7426 *
7427 * parse the XML encoding declaration
7428 *
7429 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7430 *
7431 * this setups the conversion filters.
7432 *
7433 * Returns the encoding value or NULL
7434 */
7435
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007436const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007437xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7438 xmlChar *encoding = NULL;
7439 const xmlChar *q;
7440
7441 SKIP_BLANKS;
7442 if ((RAW == 'e') && (NXT(1) == 'n') &&
7443 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7444 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7445 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7446 SKIP(8);
7447 SKIP_BLANKS;
7448 if (RAW != '=') {
7449 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7450 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7451 ctxt->sax->error(ctxt->userData,
7452 "xmlParseEncodingDecl : expected '='\n");
7453 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007454 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007455 return(NULL);
7456 }
7457 NEXT;
7458 SKIP_BLANKS;
7459 if (RAW == '"') {
7460 NEXT;
7461 q = CUR_PTR;
7462 encoding = xmlParseEncName(ctxt);
7463 if (RAW != '"') {
7464 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7466 ctxt->sax->error(ctxt->userData,
7467 "String not closed\n%.50s\n", q);
7468 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007469 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007470 } else
7471 NEXT;
7472 } else if (RAW == '\''){
7473 NEXT;
7474 q = CUR_PTR;
7475 encoding = xmlParseEncName(ctxt);
7476 if (RAW != '\'') {
7477 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7479 ctxt->sax->error(ctxt->userData,
7480 "String not closed\n%.50s\n", q);
7481 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007482 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007483 } else
7484 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007485 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007486 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7488 ctxt->sax->error(ctxt->userData,
7489 "xmlParseEncodingDecl : expected ' or \"\n");
7490 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007491 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007492 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00007493 /*
7494 * UTF-16 encoding stwich has already taken place at this stage,
7495 * more over the little-endian/big-endian selection is already done
7496 */
7497 if ((encoding != NULL) &&
7498 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
7499 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007500 if (ctxt->encoding != NULL)
7501 xmlFree((xmlChar *) ctxt->encoding);
7502 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00007503 }
7504 /*
7505 * UTF-8 encoding is handled natively
7506 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007507 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00007508 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
7509 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007510 if (ctxt->encoding != NULL)
7511 xmlFree((xmlChar *) ctxt->encoding);
7512 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00007513 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00007514 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00007515 xmlCharEncodingHandlerPtr handler;
7516
7517 if (ctxt->input->encoding != NULL)
7518 xmlFree((xmlChar *) ctxt->input->encoding);
7519 ctxt->input->encoding = encoding;
7520
Daniel Veillarda6874ca2003-07-29 16:47:24 +00007521 handler = xmlFindCharEncodingHandler((const char *) encoding);
7522 if (handler != NULL) {
7523 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00007524 } else {
Daniel Veillarda6874ca2003-07-29 16:47:24 +00007525 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7526 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7527 ctxt->sax->error(ctxt->userData,
7528 "Unsupported encoding %s\n", encoding);
7529 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007530 }
7531 }
7532 }
7533 return(encoding);
7534}
7535
7536/**
7537 * xmlParseSDDecl:
7538 * @ctxt: an XML parser context
7539 *
7540 * parse the XML standalone declaration
7541 *
7542 * [32] SDDecl ::= S 'standalone' Eq
7543 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7544 *
7545 * [ VC: Standalone Document Declaration ]
7546 * TODO The standalone document declaration must have the value "no"
7547 * if any external markup declarations contain declarations of:
7548 * - attributes with default values, if elements to which these
7549 * attributes apply appear in the document without specifications
7550 * of values for these attributes, or
7551 * - entities (other than amp, lt, gt, apos, quot), if references
7552 * to those entities appear in the document, or
7553 * - attributes with values subject to normalization, where the
7554 * attribute appears in the document with a value which will change
7555 * as a result of normalization, or
7556 * - element types with element content, if white space occurs directly
7557 * within any instance of those types.
7558 *
7559 * Returns 1 if standalone, 0 otherwise
7560 */
7561
7562int
7563xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7564 int standalone = -1;
7565
7566 SKIP_BLANKS;
7567 if ((RAW == 's') && (NXT(1) == 't') &&
7568 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7569 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7570 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7571 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7572 SKIP(10);
7573 SKIP_BLANKS;
7574 if (RAW != '=') {
7575 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7577 ctxt->sax->error(ctxt->userData,
7578 "XML standalone declaration : expected '='\n");
7579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007581 return(standalone);
7582 }
7583 NEXT;
7584 SKIP_BLANKS;
7585 if (RAW == '\''){
7586 NEXT;
7587 if ((RAW == 'n') && (NXT(1) == 'o')) {
7588 standalone = 0;
7589 SKIP(2);
7590 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7591 (NXT(2) == 's')) {
7592 standalone = 1;
7593 SKIP(3);
7594 } else {
7595 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7597 ctxt->sax->error(ctxt->userData,
7598 "standalone accepts only 'yes' or 'no'\n");
7599 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007600 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007601 }
7602 if (RAW != '\'') {
7603 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7605 ctxt->sax->error(ctxt->userData, "String not closed\n");
7606 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007607 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007608 } else
7609 NEXT;
7610 } else if (RAW == '"'){
7611 NEXT;
7612 if ((RAW == 'n') && (NXT(1) == 'o')) {
7613 standalone = 0;
7614 SKIP(2);
7615 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7616 (NXT(2) == 's')) {
7617 standalone = 1;
7618 SKIP(3);
7619 } else {
7620 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7622 ctxt->sax->error(ctxt->userData,
7623 "standalone accepts only 'yes' or 'no'\n");
7624 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007625 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007626 }
7627 if (RAW != '"') {
7628 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7630 ctxt->sax->error(ctxt->userData, "String not closed\n");
7631 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007632 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007633 } else
7634 NEXT;
7635 } else {
7636 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7638 ctxt->sax->error(ctxt->userData,
7639 "Standalone value not found\n");
7640 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007641 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007642 }
7643 }
7644 return(standalone);
7645}
7646
7647/**
7648 * xmlParseXMLDecl:
7649 * @ctxt: an XML parser context
7650 *
7651 * parse an XML declaration header
7652 *
7653 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7654 */
7655
7656void
7657xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7658 xmlChar *version;
7659
7660 /*
7661 * We know that '<?xml' is here.
7662 */
7663 SKIP(5);
7664
7665 if (!IS_BLANK(RAW)) {
7666 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7668 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7669 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007670 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007671 }
7672 SKIP_BLANKS;
7673
7674 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007675 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007676 */
7677 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007678 if (version == NULL) {
7679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7680 ctxt->sax->error(ctxt->userData,
7681 "Malformed declaration expecting version\n");
7682 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007683 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007684 } else {
7685 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7686 /*
7687 * TODO: Blueberry should be detected here
7688 */
7689 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7690 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7691 version);
7692 }
7693 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007694 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007695 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007696 }
Owen Taylor3473f882001-02-23 17:55:21 +00007697
7698 /*
7699 * We may have the encoding declaration
7700 */
7701 if (!IS_BLANK(RAW)) {
7702 if ((RAW == '?') && (NXT(1) == '>')) {
7703 SKIP(2);
7704 return;
7705 }
7706 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7708 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7709 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007710 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007711 }
7712 xmlParseEncodingDecl(ctxt);
7713 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7714 /*
7715 * The XML REC instructs us to stop parsing right here
7716 */
7717 return;
7718 }
7719
7720 /*
7721 * We may have the standalone status.
7722 */
7723 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7724 if ((RAW == '?') && (NXT(1) == '>')) {
7725 SKIP(2);
7726 return;
7727 }
7728 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7730 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7731 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007732 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007733 }
7734 SKIP_BLANKS;
7735 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7736
7737 SKIP_BLANKS;
7738 if ((RAW == '?') && (NXT(1) == '>')) {
7739 SKIP(2);
7740 } else if (RAW == '>') {
7741 /* Deprecated old WD ... */
7742 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7744 ctxt->sax->error(ctxt->userData,
7745 "XML declaration must end-up with '?>'\n");
7746 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007747 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007748 NEXT;
7749 } else {
7750 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7752 ctxt->sax->error(ctxt->userData,
7753 "parsing XML declaration: '?>' expected\n");
7754 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007755 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007756 MOVETO_ENDTAG(CUR_PTR);
7757 NEXT;
7758 }
7759}
7760
7761/**
7762 * xmlParseMisc:
7763 * @ctxt: an XML parser context
7764 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007765 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007766 *
7767 * [27] Misc ::= Comment | PI | S
7768 */
7769
7770void
7771xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007772 while (((RAW == '<') && (NXT(1) == '?')) ||
7773 ((RAW == '<') && (NXT(1) == '!') &&
7774 (NXT(2) == '-') && (NXT(3) == '-')) ||
7775 IS_BLANK(CUR)) {
7776 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007777 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007778 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007779 NEXT;
7780 } else
7781 xmlParseComment(ctxt);
7782 }
7783}
7784
7785/**
7786 * xmlParseDocument:
7787 * @ctxt: an XML parser context
7788 *
7789 * parse an XML document (and build a tree if using the standard SAX
7790 * interface).
7791 *
7792 * [1] document ::= prolog element Misc*
7793 *
7794 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7795 *
7796 * Returns 0, -1 in case of error. the parser context is augmented
7797 * as a result of the parsing.
7798 */
7799
7800int
7801xmlParseDocument(xmlParserCtxtPtr ctxt) {
7802 xmlChar start[4];
7803 xmlCharEncoding enc;
7804
7805 xmlInitParser();
7806
7807 GROW;
7808
7809 /*
7810 * SAX: beginning of the document processing.
7811 */
7812 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7813 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7814
Daniel Veillard50f34372001-08-03 12:06:36 +00007815 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007816 /*
7817 * Get the 4 first bytes and decode the charset
7818 * if enc != XML_CHAR_ENCODING_NONE
7819 * plug some encoding conversion routines.
7820 */
7821 start[0] = RAW;
7822 start[1] = NXT(1);
7823 start[2] = NXT(2);
7824 start[3] = NXT(3);
7825 enc = xmlDetectCharEncoding(start, 4);
7826 if (enc != XML_CHAR_ENCODING_NONE) {
7827 xmlSwitchEncoding(ctxt, enc);
7828 }
Owen Taylor3473f882001-02-23 17:55:21 +00007829 }
7830
7831
7832 if (CUR == 0) {
7833 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7834 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7835 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7836 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007837 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007838 }
7839
7840 /*
7841 * Check for the XMLDecl in the Prolog.
7842 */
7843 GROW;
7844 if ((RAW == '<') && (NXT(1) == '?') &&
7845 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7846 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7847
7848 /*
7849 * Note that we will switch encoding on the fly.
7850 */
7851 xmlParseXMLDecl(ctxt);
7852 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7853 /*
7854 * The XML REC instructs us to stop parsing right here
7855 */
7856 return(-1);
7857 }
7858 ctxt->standalone = ctxt->input->standalone;
7859 SKIP_BLANKS;
7860 } else {
7861 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7862 }
7863 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7864 ctxt->sax->startDocument(ctxt->userData);
7865
7866 /*
7867 * The Misc part of the Prolog
7868 */
7869 GROW;
7870 xmlParseMisc(ctxt);
7871
7872 /*
7873 * Then possibly doc type declaration(s) and more Misc
7874 * (doctypedecl Misc*)?
7875 */
7876 GROW;
7877 if ((RAW == '<') && (NXT(1) == '!') &&
7878 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7879 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7880 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7881 (NXT(8) == 'E')) {
7882
7883 ctxt->inSubset = 1;
7884 xmlParseDocTypeDecl(ctxt);
7885 if (RAW == '[') {
7886 ctxt->instate = XML_PARSER_DTD;
7887 xmlParseInternalSubset(ctxt);
7888 }
7889
7890 /*
7891 * Create and update the external subset.
7892 */
7893 ctxt->inSubset = 2;
7894 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7895 (!ctxt->disableSAX))
7896 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7897 ctxt->extSubSystem, ctxt->extSubURI);
7898 ctxt->inSubset = 0;
7899
7900
7901 ctxt->instate = XML_PARSER_PROLOG;
7902 xmlParseMisc(ctxt);
7903 }
7904
7905 /*
7906 * Time to start parsing the tree itself
7907 */
7908 GROW;
7909 if (RAW != '<') {
7910 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7912 ctxt->sax->error(ctxt->userData,
7913 "Start tag expected, '<' not found\n");
7914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007916 ctxt->instate = XML_PARSER_EOF;
7917 } else {
7918 ctxt->instate = XML_PARSER_CONTENT;
7919 xmlParseElement(ctxt);
7920 ctxt->instate = XML_PARSER_EPILOG;
7921
7922
7923 /*
7924 * The Misc part at the end
7925 */
7926 xmlParseMisc(ctxt);
7927
Daniel Veillard561b7f82002-03-20 21:55:57 +00007928 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007929 ctxt->errNo = XML_ERR_DOCUMENT_END;
7930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7931 ctxt->sax->error(ctxt->userData,
7932 "Extra content at the end of the document\n");
7933 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007934 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007935 }
7936 ctxt->instate = XML_PARSER_EOF;
7937 }
7938
7939 /*
7940 * SAX: end of the document processing.
7941 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007942 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007943 ctxt->sax->endDocument(ctxt->userData);
7944
Daniel Veillard5997aca2002-03-18 18:36:20 +00007945 /*
7946 * Remove locally kept entity definitions if the tree was not built
7947 */
7948 if ((ctxt->myDoc != NULL) &&
7949 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7950 xmlFreeDoc(ctxt->myDoc);
7951 ctxt->myDoc = NULL;
7952 }
7953
Daniel Veillardc7612992002-02-17 22:47:37 +00007954 if (! ctxt->wellFormed) {
7955 ctxt->valid = 0;
7956 return(-1);
7957 }
Owen Taylor3473f882001-02-23 17:55:21 +00007958 return(0);
7959}
7960
7961/**
7962 * xmlParseExtParsedEnt:
7963 * @ctxt: an XML parser context
7964 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007965 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007966 * An external general parsed entity is well-formed if it matches the
7967 * production labeled extParsedEnt.
7968 *
7969 * [78] extParsedEnt ::= TextDecl? content
7970 *
7971 * Returns 0, -1 in case of error. the parser context is augmented
7972 * as a result of the parsing.
7973 */
7974
7975int
7976xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7977 xmlChar start[4];
7978 xmlCharEncoding enc;
7979
7980 xmlDefaultSAXHandlerInit();
7981
7982 GROW;
7983
7984 /*
7985 * SAX: beginning of the document processing.
7986 */
7987 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7988 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7989
7990 /*
7991 * Get the 4 first bytes and decode the charset
7992 * if enc != XML_CHAR_ENCODING_NONE
7993 * plug some encoding conversion routines.
7994 */
7995 start[0] = RAW;
7996 start[1] = NXT(1);
7997 start[2] = NXT(2);
7998 start[3] = NXT(3);
7999 enc = xmlDetectCharEncoding(start, 4);
8000 if (enc != XML_CHAR_ENCODING_NONE) {
8001 xmlSwitchEncoding(ctxt, enc);
8002 }
8003
8004
8005 if (CUR == 0) {
8006 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8008 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008011 }
8012
8013 /*
8014 * Check for the XMLDecl in the Prolog.
8015 */
8016 GROW;
8017 if ((RAW == '<') && (NXT(1) == '?') &&
8018 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8019 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8020
8021 /*
8022 * Note that we will switch encoding on the fly.
8023 */
8024 xmlParseXMLDecl(ctxt);
8025 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8026 /*
8027 * The XML REC instructs us to stop parsing right here
8028 */
8029 return(-1);
8030 }
8031 SKIP_BLANKS;
8032 } else {
8033 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8034 }
8035 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8036 ctxt->sax->startDocument(ctxt->userData);
8037
8038 /*
8039 * Doing validity checking on chunk doesn't make sense
8040 */
8041 ctxt->instate = XML_PARSER_CONTENT;
8042 ctxt->validate = 0;
8043 ctxt->loadsubset = 0;
8044 ctxt->depth = 0;
8045
8046 xmlParseContent(ctxt);
8047
8048 if ((RAW == '<') && (NXT(1) == '/')) {
8049 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8051 ctxt->sax->error(ctxt->userData,
8052 "chunk is not well balanced\n");
8053 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008054 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008055 } else if (RAW != 0) {
8056 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8058 ctxt->sax->error(ctxt->userData,
8059 "extra content at the end of well balanced chunk\n");
8060 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008061 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008062 }
8063
8064 /*
8065 * SAX: end of the document processing.
8066 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008067 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008068 ctxt->sax->endDocument(ctxt->userData);
8069
8070 if (! ctxt->wellFormed) return(-1);
8071 return(0);
8072}
8073
8074/************************************************************************
8075 * *
8076 * Progressive parsing interfaces *
8077 * *
8078 ************************************************************************/
8079
8080/**
8081 * xmlParseLookupSequence:
8082 * @ctxt: an XML parser context
8083 * @first: the first char to lookup
8084 * @next: the next char to lookup or zero
8085 * @third: the next char to lookup or zero
8086 *
8087 * Try to find if a sequence (first, next, third) or just (first next) or
8088 * (first) is available in the input stream.
8089 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8090 * to avoid rescanning sequences of bytes, it DOES change the state of the
8091 * parser, do not use liberally.
8092 *
8093 * Returns the index to the current parsing point if the full sequence
8094 * is available, -1 otherwise.
8095 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008096static int
Owen Taylor3473f882001-02-23 17:55:21 +00008097xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8098 xmlChar next, xmlChar third) {
8099 int base, len;
8100 xmlParserInputPtr in;
8101 const xmlChar *buf;
8102
8103 in = ctxt->input;
8104 if (in == NULL) return(-1);
8105 base = in->cur - in->base;
8106 if (base < 0) return(-1);
8107 if (ctxt->checkIndex > base)
8108 base = ctxt->checkIndex;
8109 if (in->buf == NULL) {
8110 buf = in->base;
8111 len = in->length;
8112 } else {
8113 buf = in->buf->buffer->content;
8114 len = in->buf->buffer->use;
8115 }
8116 /* take into account the sequence length */
8117 if (third) len -= 2;
8118 else if (next) len --;
8119 for (;base < len;base++) {
8120 if (buf[base] == first) {
8121 if (third != 0) {
8122 if ((buf[base + 1] != next) ||
8123 (buf[base + 2] != third)) continue;
8124 } else if (next != 0) {
8125 if (buf[base + 1] != next) continue;
8126 }
8127 ctxt->checkIndex = 0;
8128#ifdef DEBUG_PUSH
8129 if (next == 0)
8130 xmlGenericError(xmlGenericErrorContext,
8131 "PP: lookup '%c' found at %d\n",
8132 first, base);
8133 else if (third == 0)
8134 xmlGenericError(xmlGenericErrorContext,
8135 "PP: lookup '%c%c' found at %d\n",
8136 first, next, base);
8137 else
8138 xmlGenericError(xmlGenericErrorContext,
8139 "PP: lookup '%c%c%c' found at %d\n",
8140 first, next, third, base);
8141#endif
8142 return(base - (in->cur - in->base));
8143 }
8144 }
8145 ctxt->checkIndex = base;
8146#ifdef DEBUG_PUSH
8147 if (next == 0)
8148 xmlGenericError(xmlGenericErrorContext,
8149 "PP: lookup '%c' failed\n", first);
8150 else if (third == 0)
8151 xmlGenericError(xmlGenericErrorContext,
8152 "PP: lookup '%c%c' failed\n", first, next);
8153 else
8154 xmlGenericError(xmlGenericErrorContext,
8155 "PP: lookup '%c%c%c' failed\n", first, next, third);
8156#endif
8157 return(-1);
8158}
8159
8160/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008161 * xmlParseGetLasts:
8162 * @ctxt: an XML parser context
8163 * @lastlt: pointer to store the last '<' from the input
8164 * @lastgt: pointer to store the last '>' from the input
8165 *
8166 * Lookup the last < and > in the current chunk
8167 */
8168static void
8169xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8170 const xmlChar **lastgt) {
8171 const xmlChar *tmp;
8172
8173 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8174 xmlGenericError(xmlGenericErrorContext,
8175 "Internal error: xmlParseGetLasts\n");
8176 return;
8177 }
8178 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
8179 tmp = ctxt->input->end;
8180 tmp--;
8181 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8182 (*tmp != '>')) tmp--;
8183 if (tmp < ctxt->input->base) {
8184 *lastlt = NULL;
8185 *lastgt = NULL;
8186 } else if (*tmp == '<') {
8187 *lastlt = tmp;
8188 tmp--;
8189 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8190 if (tmp < ctxt->input->base)
8191 *lastgt = NULL;
8192 else
8193 *lastgt = tmp;
8194 } else {
8195 *lastgt = tmp;
8196 tmp--;
8197 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8198 if (tmp < ctxt->input->base)
8199 *lastlt = NULL;
8200 else
8201 *lastlt = tmp;
8202 }
8203
8204 } else {
8205 *lastlt = NULL;
8206 *lastgt = NULL;
8207 }
8208}
8209/**
Owen Taylor3473f882001-02-23 17:55:21 +00008210 * xmlParseTryOrFinish:
8211 * @ctxt: an XML parser context
8212 * @terminate: last chunk indicator
8213 *
8214 * Try to progress on parsing
8215 *
8216 * Returns zero if no parsing was possible
8217 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008218static int
Owen Taylor3473f882001-02-23 17:55:21 +00008219xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8220 int ret = 0;
8221 int avail;
8222 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008223 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008224
8225#ifdef DEBUG_PUSH
8226 switch (ctxt->instate) {
8227 case XML_PARSER_EOF:
8228 xmlGenericError(xmlGenericErrorContext,
8229 "PP: try EOF\n"); break;
8230 case XML_PARSER_START:
8231 xmlGenericError(xmlGenericErrorContext,
8232 "PP: try START\n"); break;
8233 case XML_PARSER_MISC:
8234 xmlGenericError(xmlGenericErrorContext,
8235 "PP: try MISC\n");break;
8236 case XML_PARSER_COMMENT:
8237 xmlGenericError(xmlGenericErrorContext,
8238 "PP: try COMMENT\n");break;
8239 case XML_PARSER_PROLOG:
8240 xmlGenericError(xmlGenericErrorContext,
8241 "PP: try PROLOG\n");break;
8242 case XML_PARSER_START_TAG:
8243 xmlGenericError(xmlGenericErrorContext,
8244 "PP: try START_TAG\n");break;
8245 case XML_PARSER_CONTENT:
8246 xmlGenericError(xmlGenericErrorContext,
8247 "PP: try CONTENT\n");break;
8248 case XML_PARSER_CDATA_SECTION:
8249 xmlGenericError(xmlGenericErrorContext,
8250 "PP: try CDATA_SECTION\n");break;
8251 case XML_PARSER_END_TAG:
8252 xmlGenericError(xmlGenericErrorContext,
8253 "PP: try END_TAG\n");break;
8254 case XML_PARSER_ENTITY_DECL:
8255 xmlGenericError(xmlGenericErrorContext,
8256 "PP: try ENTITY_DECL\n");break;
8257 case XML_PARSER_ENTITY_VALUE:
8258 xmlGenericError(xmlGenericErrorContext,
8259 "PP: try ENTITY_VALUE\n");break;
8260 case XML_PARSER_ATTRIBUTE_VALUE:
8261 xmlGenericError(xmlGenericErrorContext,
8262 "PP: try ATTRIBUTE_VALUE\n");break;
8263 case XML_PARSER_DTD:
8264 xmlGenericError(xmlGenericErrorContext,
8265 "PP: try DTD\n");break;
8266 case XML_PARSER_EPILOG:
8267 xmlGenericError(xmlGenericErrorContext,
8268 "PP: try EPILOG\n");break;
8269 case XML_PARSER_PI:
8270 xmlGenericError(xmlGenericErrorContext,
8271 "PP: try PI\n");break;
8272 case XML_PARSER_IGNORE:
8273 xmlGenericError(xmlGenericErrorContext,
8274 "PP: try IGNORE\n");break;
8275 }
8276#endif
8277
Daniel Veillarda880b122003-04-21 21:36:41 +00008278 if (ctxt->input->cur - ctxt->input->base > 4096) {
8279 xmlSHRINK(ctxt);
8280 ctxt->checkIndex = 0;
8281 }
8282 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008283
Daniel Veillarda880b122003-04-21 21:36:41 +00008284 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008285 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8286 return(0);
8287
8288
Owen Taylor3473f882001-02-23 17:55:21 +00008289 /*
8290 * Pop-up of finished entities.
8291 */
8292 while ((RAW == 0) && (ctxt->inputNr > 1))
8293 xmlPopInput(ctxt);
8294
8295 if (ctxt->input ==NULL) break;
8296 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008297 avail = ctxt->input->length -
8298 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008299 else {
8300 /*
8301 * If we are operating on converted input, try to flush
8302 * remainng chars to avoid them stalling in the non-converted
8303 * buffer.
8304 */
8305 if ((ctxt->input->buf->raw != NULL) &&
8306 (ctxt->input->buf->raw->use > 0)) {
8307 int base = ctxt->input->base -
8308 ctxt->input->buf->buffer->content;
8309 int current = ctxt->input->cur - ctxt->input->base;
8310
8311 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8312 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8313 ctxt->input->cur = ctxt->input->base + current;
8314 ctxt->input->end =
8315 &ctxt->input->buf->buffer->content[
8316 ctxt->input->buf->buffer->use];
8317 }
8318 avail = ctxt->input->buf->buffer->use -
8319 (ctxt->input->cur - ctxt->input->base);
8320 }
Owen Taylor3473f882001-02-23 17:55:21 +00008321 if (avail < 1)
8322 goto done;
8323 switch (ctxt->instate) {
8324 case XML_PARSER_EOF:
8325 /*
8326 * Document parsing is done !
8327 */
8328 goto done;
8329 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008330 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8331 xmlChar start[4];
8332 xmlCharEncoding enc;
8333
8334 /*
8335 * Very first chars read from the document flow.
8336 */
8337 if (avail < 4)
8338 goto done;
8339
8340 /*
8341 * Get the 4 first bytes and decode the charset
8342 * if enc != XML_CHAR_ENCODING_NONE
8343 * plug some encoding conversion routines.
8344 */
8345 start[0] = RAW;
8346 start[1] = NXT(1);
8347 start[2] = NXT(2);
8348 start[3] = NXT(3);
8349 enc = xmlDetectCharEncoding(start, 4);
8350 if (enc != XML_CHAR_ENCODING_NONE) {
8351 xmlSwitchEncoding(ctxt, enc);
8352 }
8353 break;
8354 }
Owen Taylor3473f882001-02-23 17:55:21 +00008355
8356 cur = ctxt->input->cur[0];
8357 next = ctxt->input->cur[1];
8358 if (cur == 0) {
8359 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8360 ctxt->sax->setDocumentLocator(ctxt->userData,
8361 &xmlDefaultSAXLocator);
8362 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8364 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8365 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008366 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008367 ctxt->instate = XML_PARSER_EOF;
8368#ifdef DEBUG_PUSH
8369 xmlGenericError(xmlGenericErrorContext,
8370 "PP: entering EOF\n");
8371#endif
8372 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8373 ctxt->sax->endDocument(ctxt->userData);
8374 goto done;
8375 }
8376 if ((cur == '<') && (next == '?')) {
8377 /* PI or XML decl */
8378 if (avail < 5) return(ret);
8379 if ((!terminate) &&
8380 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8381 return(ret);
8382 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8383 ctxt->sax->setDocumentLocator(ctxt->userData,
8384 &xmlDefaultSAXLocator);
8385 if ((ctxt->input->cur[2] == 'x') &&
8386 (ctxt->input->cur[3] == 'm') &&
8387 (ctxt->input->cur[4] == 'l') &&
8388 (IS_BLANK(ctxt->input->cur[5]))) {
8389 ret += 5;
8390#ifdef DEBUG_PUSH
8391 xmlGenericError(xmlGenericErrorContext,
8392 "PP: Parsing XML Decl\n");
8393#endif
8394 xmlParseXMLDecl(ctxt);
8395 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8396 /*
8397 * The XML REC instructs us to stop parsing right
8398 * here
8399 */
8400 ctxt->instate = XML_PARSER_EOF;
8401 return(0);
8402 }
8403 ctxt->standalone = ctxt->input->standalone;
8404 if ((ctxt->encoding == NULL) &&
8405 (ctxt->input->encoding != NULL))
8406 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8407 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8408 (!ctxt->disableSAX))
8409 ctxt->sax->startDocument(ctxt->userData);
8410 ctxt->instate = XML_PARSER_MISC;
8411#ifdef DEBUG_PUSH
8412 xmlGenericError(xmlGenericErrorContext,
8413 "PP: entering MISC\n");
8414#endif
8415 } else {
8416 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8417 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8418 (!ctxt->disableSAX))
8419 ctxt->sax->startDocument(ctxt->userData);
8420 ctxt->instate = XML_PARSER_MISC;
8421#ifdef DEBUG_PUSH
8422 xmlGenericError(xmlGenericErrorContext,
8423 "PP: entering MISC\n");
8424#endif
8425 }
8426 } else {
8427 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8428 ctxt->sax->setDocumentLocator(ctxt->userData,
8429 &xmlDefaultSAXLocator);
8430 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8431 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8432 (!ctxt->disableSAX))
8433 ctxt->sax->startDocument(ctxt->userData);
8434 ctxt->instate = XML_PARSER_MISC;
8435#ifdef DEBUG_PUSH
8436 xmlGenericError(xmlGenericErrorContext,
8437 "PP: entering MISC\n");
8438#endif
8439 }
8440 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00008441 case XML_PARSER_START_TAG: {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008442 const xmlChar *name, *oldname;
Daniel Veillarda880b122003-04-21 21:36:41 +00008443
8444 if ((avail < 2) && (ctxt->inputNr == 1))
8445 goto done;
8446 cur = ctxt->input->cur[0];
8447 if (cur != '<') {
8448 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8450 ctxt->sax->error(ctxt->userData,
8451 "Start tag expect, '<' not found\n");
8452 ctxt->wellFormed = 0;
8453 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8454 ctxt->instate = XML_PARSER_EOF;
8455#ifdef DEBUG_PUSH
8456 xmlGenericError(xmlGenericErrorContext,
8457 "PP: entering EOF\n");
8458#endif
8459 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8460 ctxt->sax->endDocument(ctxt->userData);
8461 goto done;
8462 }
8463 if (!terminate) {
8464 if (ctxt->progressive) {
8465 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8466 goto done;
8467 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8468 goto done;
8469 }
8470 }
8471 if (ctxt->spaceNr == 0)
8472 spacePush(ctxt, -1);
8473 else
8474 spacePush(ctxt, *ctxt->space);
8475 name = xmlParseStartTag(ctxt);
8476 if (name == NULL) {
8477 spacePop(ctxt);
8478 ctxt->instate = XML_PARSER_EOF;
8479#ifdef DEBUG_PUSH
8480 xmlGenericError(xmlGenericErrorContext,
8481 "PP: entering EOF\n");
8482#endif
8483 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8484 ctxt->sax->endDocument(ctxt->userData);
8485 goto done;
8486 }
8487 namePush(ctxt, name);
8488
8489 /*
8490 * [ VC: Root Element Type ]
8491 * The Name in the document type declaration must match
8492 * the element type of the root element.
8493 */
8494 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8495 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8496 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8497
8498 /*
8499 * Check for an Empty Element.
8500 */
8501 if ((RAW == '/') && (NXT(1) == '>')) {
8502 SKIP(2);
8503 if ((ctxt->sax != NULL) &&
8504 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8505 ctxt->sax->endElement(ctxt->userData, name);
8506 oldname = namePop(ctxt);
8507 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00008508#ifdef DEBUG_STACK
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008509 if (oldname != NULL) {
8510 xmlGenericError(xmlGenericErrorContext,
8511 "Close: popped %s\n", oldname);
Daniel Veillarda880b122003-04-21 21:36:41 +00008512 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008513#endif
Daniel Veillarda880b122003-04-21 21:36:41 +00008514 if (ctxt->name == NULL) {
8515 ctxt->instate = XML_PARSER_EPILOG;
8516#ifdef DEBUG_PUSH
8517 xmlGenericError(xmlGenericErrorContext,
8518 "PP: entering EPILOG\n");
8519#endif
8520 } else {
8521 ctxt->instate = XML_PARSER_CONTENT;
8522#ifdef DEBUG_PUSH
8523 xmlGenericError(xmlGenericErrorContext,
8524 "PP: entering CONTENT\n");
8525#endif
8526 }
8527 break;
8528 }
8529 if (RAW == '>') {
8530 NEXT;
8531 } else {
8532 ctxt->errNo = XML_ERR_GT_REQUIRED;
8533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8534 ctxt->sax->error(ctxt->userData,
8535 "Couldn't find end of Start Tag %s\n",
8536 name);
8537 ctxt->wellFormed = 0;
8538 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8539
8540 /*
8541 * end of parsing of this node.
8542 */
8543 nodePop(ctxt);
8544 oldname = namePop(ctxt);
8545 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00008546#ifdef DEBUG_STACK
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008547 if (oldname != NULL) {
8548 xmlGenericError(xmlGenericErrorContext,
8549 "Close: popped %s\n", oldname);
Daniel Veillarda880b122003-04-21 21:36:41 +00008550 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008551#endif
Daniel Veillarda880b122003-04-21 21:36:41 +00008552 }
8553 ctxt->instate = XML_PARSER_CONTENT;
8554#ifdef DEBUG_PUSH
8555 xmlGenericError(xmlGenericErrorContext,
8556 "PP: entering CONTENT\n");
8557#endif
8558 break;
8559 }
8560 case XML_PARSER_CONTENT: {
8561 const xmlChar *test;
8562 unsigned int cons;
8563 if ((avail < 2) && (ctxt->inputNr == 1))
8564 goto done;
8565 cur = ctxt->input->cur[0];
8566 next = ctxt->input->cur[1];
8567
8568 test = CUR_PTR;
8569 cons = ctxt->input->consumed;
8570 if ((cur == '<') && (next == '/')) {
8571 ctxt->instate = XML_PARSER_END_TAG;
8572#ifdef DEBUG_PUSH
8573 xmlGenericError(xmlGenericErrorContext,
8574 "PP: entering END_TAG\n");
8575#endif
8576 break;
8577 } else if ((cur == '<') && (next == '?')) {
8578 if ((!terminate) &&
8579 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8580 goto done;
8581#ifdef DEBUG_PUSH
8582 xmlGenericError(xmlGenericErrorContext,
8583 "PP: Parsing PI\n");
8584#endif
8585 xmlParsePI(ctxt);
8586 } else if ((cur == '<') && (next != '!')) {
8587 ctxt->instate = XML_PARSER_START_TAG;
8588#ifdef DEBUG_PUSH
8589 xmlGenericError(xmlGenericErrorContext,
8590 "PP: entering START_TAG\n");
8591#endif
8592 break;
8593 } else if ((cur == '<') && (next == '!') &&
8594 (ctxt->input->cur[2] == '-') &&
8595 (ctxt->input->cur[3] == '-')) {
8596 if ((!terminate) &&
8597 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8598 goto done;
8599#ifdef DEBUG_PUSH
8600 xmlGenericError(xmlGenericErrorContext,
8601 "PP: Parsing Comment\n");
8602#endif
8603 xmlParseComment(ctxt);
8604 ctxt->instate = XML_PARSER_CONTENT;
8605 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8606 (ctxt->input->cur[2] == '[') &&
8607 (ctxt->input->cur[3] == 'C') &&
8608 (ctxt->input->cur[4] == 'D') &&
8609 (ctxt->input->cur[5] == 'A') &&
8610 (ctxt->input->cur[6] == 'T') &&
8611 (ctxt->input->cur[7] == 'A') &&
8612 (ctxt->input->cur[8] == '[')) {
8613 SKIP(9);
8614 ctxt->instate = XML_PARSER_CDATA_SECTION;
8615#ifdef DEBUG_PUSH
8616 xmlGenericError(xmlGenericErrorContext,
8617 "PP: entering CDATA_SECTION\n");
8618#endif
8619 break;
8620 } else if ((cur == '<') && (next == '!') &&
8621 (avail < 9)) {
8622 goto done;
8623 } else if (cur == '&') {
8624 if ((!terminate) &&
8625 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8626 goto done;
8627#ifdef DEBUG_PUSH
8628 xmlGenericError(xmlGenericErrorContext,
8629 "PP: Parsing Reference\n");
8630#endif
8631 xmlParseReference(ctxt);
8632 } else {
8633 /* TODO Avoid the extra copy, handle directly !!! */
8634 /*
8635 * Goal of the following test is:
8636 * - minimize calls to the SAX 'character' callback
8637 * when they are mergeable
8638 * - handle an problem for isBlank when we only parse
8639 * a sequence of blank chars and the next one is
8640 * not available to check against '<' presence.
8641 * - tries to homogenize the differences in SAX
8642 * callbacks between the push and pull versions
8643 * of the parser.
8644 */
8645 if ((ctxt->inputNr == 1) &&
8646 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8647 if (!terminate) {
8648 if (ctxt->progressive) {
8649 if ((lastlt == NULL) ||
8650 (ctxt->input->cur > lastlt))
8651 goto done;
8652 } else if (xmlParseLookupSequence(ctxt,
8653 '<', 0, 0) < 0) {
8654 goto done;
8655 }
8656 }
8657 }
8658 ctxt->checkIndex = 0;
8659#ifdef DEBUG_PUSH
8660 xmlGenericError(xmlGenericErrorContext,
8661 "PP: Parsing char data\n");
8662#endif
8663 xmlParseCharData(ctxt, 0);
8664 }
8665 /*
8666 * Pop-up of finished entities.
8667 */
8668 while ((RAW == 0) && (ctxt->inputNr > 1))
8669 xmlPopInput(ctxt);
8670 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8671 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8673 ctxt->sax->error(ctxt->userData,
8674 "detected an error in element content\n");
8675 ctxt->wellFormed = 0;
8676 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8677 ctxt->instate = XML_PARSER_EOF;
8678 break;
8679 }
8680 break;
8681 }
8682 case XML_PARSER_END_TAG:
8683 if (avail < 2)
8684 goto done;
8685 if (!terminate) {
8686 if (ctxt->progressive) {
8687 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8688 goto done;
8689 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8690 goto done;
8691 }
8692 }
8693 xmlParseEndTag(ctxt);
8694 if (ctxt->name == NULL) {
8695 ctxt->instate = XML_PARSER_EPILOG;
8696#ifdef DEBUG_PUSH
8697 xmlGenericError(xmlGenericErrorContext,
8698 "PP: entering EPILOG\n");
8699#endif
8700 } else {
8701 ctxt->instate = XML_PARSER_CONTENT;
8702#ifdef DEBUG_PUSH
8703 xmlGenericError(xmlGenericErrorContext,
8704 "PP: entering CONTENT\n");
8705#endif
8706 }
8707 break;
8708 case XML_PARSER_CDATA_SECTION: {
8709 /*
8710 * The Push mode need to have the SAX callback for
8711 * cdataBlock merge back contiguous callbacks.
8712 */
8713 int base;
8714
8715 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8716 if (base < 0) {
8717 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8718 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8719 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00008720 ctxt->sax->cdataBlock(ctxt->userData,
8721 ctxt->input->cur,
8722 XML_PARSER_BIG_BUFFER_SIZE);
8723 else if (ctxt->sax->characters != NULL)
8724 ctxt->sax->characters(ctxt->userData,
8725 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00008726 XML_PARSER_BIG_BUFFER_SIZE);
8727 }
8728 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8729 ctxt->checkIndex = 0;
8730 }
8731 goto done;
8732 } else {
8733 if ((ctxt->sax != NULL) && (base > 0) &&
8734 (!ctxt->disableSAX)) {
8735 if (ctxt->sax->cdataBlock != NULL)
8736 ctxt->sax->cdataBlock(ctxt->userData,
8737 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00008738 else if (ctxt->sax->characters != NULL)
8739 ctxt->sax->characters(ctxt->userData,
8740 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00008741 }
8742 SKIP(base + 3);
8743 ctxt->checkIndex = 0;
8744 ctxt->instate = XML_PARSER_CONTENT;
8745#ifdef DEBUG_PUSH
8746 xmlGenericError(xmlGenericErrorContext,
8747 "PP: entering CONTENT\n");
8748#endif
8749 }
8750 break;
8751 }
Owen Taylor3473f882001-02-23 17:55:21 +00008752 case XML_PARSER_MISC:
8753 SKIP_BLANKS;
8754 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008755 avail = ctxt->input->length -
8756 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00008757 else
Daniel Veillarda880b122003-04-21 21:36:41 +00008758 avail = ctxt->input->buf->buffer->use -
8759 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00008760 if (avail < 2)
8761 goto done;
8762 cur = ctxt->input->cur[0];
8763 next = ctxt->input->cur[1];
8764 if ((cur == '<') && (next == '?')) {
8765 if ((!terminate) &&
8766 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8767 goto done;
8768#ifdef DEBUG_PUSH
8769 xmlGenericError(xmlGenericErrorContext,
8770 "PP: Parsing PI\n");
8771#endif
8772 xmlParsePI(ctxt);
8773 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00008774 (ctxt->input->cur[2] == '-') &&
8775 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008776 if ((!terminate) &&
8777 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8778 goto done;
8779#ifdef DEBUG_PUSH
8780 xmlGenericError(xmlGenericErrorContext,
8781 "PP: Parsing Comment\n");
8782#endif
8783 xmlParseComment(ctxt);
8784 ctxt->instate = XML_PARSER_MISC;
8785 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00008786 (ctxt->input->cur[2] == 'D') &&
8787 (ctxt->input->cur[3] == 'O') &&
8788 (ctxt->input->cur[4] == 'C') &&
8789 (ctxt->input->cur[5] == 'T') &&
8790 (ctxt->input->cur[6] == 'Y') &&
8791 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008792 (ctxt->input->cur[8] == 'E')) {
8793 if ((!terminate) &&
8794 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8795 goto done;
8796#ifdef DEBUG_PUSH
8797 xmlGenericError(xmlGenericErrorContext,
8798 "PP: Parsing internal subset\n");
8799#endif
8800 ctxt->inSubset = 1;
8801 xmlParseDocTypeDecl(ctxt);
8802 if (RAW == '[') {
8803 ctxt->instate = XML_PARSER_DTD;
8804#ifdef DEBUG_PUSH
8805 xmlGenericError(xmlGenericErrorContext,
8806 "PP: entering DTD\n");
8807#endif
8808 } else {
8809 /*
8810 * Create and update the external subset.
8811 */
8812 ctxt->inSubset = 2;
8813 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8814 (ctxt->sax->externalSubset != NULL))
8815 ctxt->sax->externalSubset(ctxt->userData,
8816 ctxt->intSubName, ctxt->extSubSystem,
8817 ctxt->extSubURI);
8818 ctxt->inSubset = 0;
8819 ctxt->instate = XML_PARSER_PROLOG;
8820#ifdef DEBUG_PUSH
8821 xmlGenericError(xmlGenericErrorContext,
8822 "PP: entering PROLOG\n");
8823#endif
8824 }
8825 } else if ((cur == '<') && (next == '!') &&
8826 (avail < 9)) {
8827 goto done;
8828 } else {
8829 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00008830 ctxt->progressive = 1;
8831 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00008832#ifdef DEBUG_PUSH
8833 xmlGenericError(xmlGenericErrorContext,
8834 "PP: entering START_TAG\n");
8835#endif
8836 }
8837 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008838 case XML_PARSER_PROLOG:
8839 SKIP_BLANKS;
8840 if (ctxt->input->buf == NULL)
8841 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8842 else
8843 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8844 if (avail < 2)
8845 goto done;
8846 cur = ctxt->input->cur[0];
8847 next = ctxt->input->cur[1];
8848 if ((cur == '<') && (next == '?')) {
8849 if ((!terminate) &&
8850 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8851 goto done;
8852#ifdef DEBUG_PUSH
8853 xmlGenericError(xmlGenericErrorContext,
8854 "PP: Parsing PI\n");
8855#endif
8856 xmlParsePI(ctxt);
8857 } else if ((cur == '<') && (next == '!') &&
8858 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8859 if ((!terminate) &&
8860 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8861 goto done;
8862#ifdef DEBUG_PUSH
8863 xmlGenericError(xmlGenericErrorContext,
8864 "PP: Parsing Comment\n");
8865#endif
8866 xmlParseComment(ctxt);
8867 ctxt->instate = XML_PARSER_PROLOG;
8868 } else if ((cur == '<') && (next == '!') &&
8869 (avail < 4)) {
8870 goto done;
8871 } else {
8872 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00008873 ctxt->progressive = 1;
8874 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00008875#ifdef DEBUG_PUSH
8876 xmlGenericError(xmlGenericErrorContext,
8877 "PP: entering START_TAG\n");
8878#endif
8879 }
8880 break;
8881 case XML_PARSER_EPILOG:
8882 SKIP_BLANKS;
8883 if (ctxt->input->buf == NULL)
8884 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8885 else
8886 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8887 if (avail < 2)
8888 goto done;
8889 cur = ctxt->input->cur[0];
8890 next = ctxt->input->cur[1];
8891 if ((cur == '<') && (next == '?')) {
8892 if ((!terminate) &&
8893 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8894 goto done;
8895#ifdef DEBUG_PUSH
8896 xmlGenericError(xmlGenericErrorContext,
8897 "PP: Parsing PI\n");
8898#endif
8899 xmlParsePI(ctxt);
8900 ctxt->instate = XML_PARSER_EPILOG;
8901 } else if ((cur == '<') && (next == '!') &&
8902 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8903 if ((!terminate) &&
8904 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8905 goto done;
8906#ifdef DEBUG_PUSH
8907 xmlGenericError(xmlGenericErrorContext,
8908 "PP: Parsing Comment\n");
8909#endif
8910 xmlParseComment(ctxt);
8911 ctxt->instate = XML_PARSER_EPILOG;
8912 } else if ((cur == '<') && (next == '!') &&
8913 (avail < 4)) {
8914 goto done;
8915 } else {
8916 ctxt->errNo = XML_ERR_DOCUMENT_END;
8917 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8918 ctxt->sax->error(ctxt->userData,
8919 "Extra content at the end of the document\n");
8920 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008921 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008922 ctxt->instate = XML_PARSER_EOF;
8923#ifdef DEBUG_PUSH
8924 xmlGenericError(xmlGenericErrorContext,
8925 "PP: entering EOF\n");
8926#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008927 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008928 ctxt->sax->endDocument(ctxt->userData);
8929 goto done;
8930 }
8931 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008932 case XML_PARSER_DTD: {
8933 /*
8934 * Sorry but progressive parsing of the internal subset
8935 * is not expected to be supported. We first check that
8936 * the full content of the internal subset is available and
8937 * the parsing is launched only at that point.
8938 * Internal subset ends up with "']' S? '>'" in an unescaped
8939 * section and not in a ']]>' sequence which are conditional
8940 * sections (whoever argued to keep that crap in XML deserve
8941 * a place in hell !).
8942 */
8943 int base, i;
8944 xmlChar *buf;
8945 xmlChar quote = 0;
8946
8947 base = ctxt->input->cur - ctxt->input->base;
8948 if (base < 0) return(0);
8949 if (ctxt->checkIndex > base)
8950 base = ctxt->checkIndex;
8951 buf = ctxt->input->buf->buffer->content;
8952 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8953 base++) {
8954 if (quote != 0) {
8955 if (buf[base] == quote)
8956 quote = 0;
8957 continue;
8958 }
8959 if (buf[base] == '"') {
8960 quote = '"';
8961 continue;
8962 }
8963 if (buf[base] == '\'') {
8964 quote = '\'';
8965 continue;
8966 }
8967 if (buf[base] == ']') {
8968 if ((unsigned int) base +1 >=
8969 ctxt->input->buf->buffer->use)
8970 break;
8971 if (buf[base + 1] == ']') {
8972 /* conditional crap, skip both ']' ! */
8973 base++;
8974 continue;
8975 }
8976 for (i = 0;
8977 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8978 i++) {
8979 if (buf[base + i] == '>')
8980 goto found_end_int_subset;
8981 }
8982 break;
8983 }
8984 }
8985 /*
8986 * We didn't found the end of the Internal subset
8987 */
8988 if (quote == 0)
8989 ctxt->checkIndex = base;
8990#ifdef DEBUG_PUSH
8991 if (next == 0)
8992 xmlGenericError(xmlGenericErrorContext,
8993 "PP: lookup of int subset end filed\n");
8994#endif
8995 goto done;
8996
8997found_end_int_subset:
8998 xmlParseInternalSubset(ctxt);
8999 ctxt->inSubset = 2;
9000 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9001 (ctxt->sax->externalSubset != NULL))
9002 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9003 ctxt->extSubSystem, ctxt->extSubURI);
9004 ctxt->inSubset = 0;
9005 ctxt->instate = XML_PARSER_PROLOG;
9006 ctxt->checkIndex = 0;
9007#ifdef DEBUG_PUSH
9008 xmlGenericError(xmlGenericErrorContext,
9009 "PP: entering PROLOG\n");
9010#endif
9011 break;
9012 }
9013 case XML_PARSER_COMMENT:
9014 xmlGenericError(xmlGenericErrorContext,
9015 "PP: internal error, state == COMMENT\n");
9016 ctxt->instate = XML_PARSER_CONTENT;
9017#ifdef DEBUG_PUSH
9018 xmlGenericError(xmlGenericErrorContext,
9019 "PP: entering CONTENT\n");
9020#endif
9021 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009022 case XML_PARSER_IGNORE:
9023 xmlGenericError(xmlGenericErrorContext,
9024 "PP: internal error, state == IGNORE");
9025 ctxt->instate = XML_PARSER_DTD;
9026#ifdef DEBUG_PUSH
9027 xmlGenericError(xmlGenericErrorContext,
9028 "PP: entering DTD\n");
9029#endif
9030 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009031 case XML_PARSER_PI:
9032 xmlGenericError(xmlGenericErrorContext,
9033 "PP: internal error, state == PI\n");
9034 ctxt->instate = XML_PARSER_CONTENT;
9035#ifdef DEBUG_PUSH
9036 xmlGenericError(xmlGenericErrorContext,
9037 "PP: entering CONTENT\n");
9038#endif
9039 break;
9040 case XML_PARSER_ENTITY_DECL:
9041 xmlGenericError(xmlGenericErrorContext,
9042 "PP: internal error, state == ENTITY_DECL\n");
9043 ctxt->instate = XML_PARSER_DTD;
9044#ifdef DEBUG_PUSH
9045 xmlGenericError(xmlGenericErrorContext,
9046 "PP: entering DTD\n");
9047#endif
9048 break;
9049 case XML_PARSER_ENTITY_VALUE:
9050 xmlGenericError(xmlGenericErrorContext,
9051 "PP: internal error, state == ENTITY_VALUE\n");
9052 ctxt->instate = XML_PARSER_CONTENT;
9053#ifdef DEBUG_PUSH
9054 xmlGenericError(xmlGenericErrorContext,
9055 "PP: entering DTD\n");
9056#endif
9057 break;
9058 case XML_PARSER_ATTRIBUTE_VALUE:
9059 xmlGenericError(xmlGenericErrorContext,
9060 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9061 ctxt->instate = XML_PARSER_START_TAG;
9062#ifdef DEBUG_PUSH
9063 xmlGenericError(xmlGenericErrorContext,
9064 "PP: entering START_TAG\n");
9065#endif
9066 break;
9067 case XML_PARSER_SYSTEM_LITERAL:
9068 xmlGenericError(xmlGenericErrorContext,
9069 "PP: internal error, state == SYSTEM_LITERAL\n");
9070 ctxt->instate = XML_PARSER_START_TAG;
9071#ifdef DEBUG_PUSH
9072 xmlGenericError(xmlGenericErrorContext,
9073 "PP: entering START_TAG\n");
9074#endif
9075 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009076 case XML_PARSER_PUBLIC_LITERAL:
9077 xmlGenericError(xmlGenericErrorContext,
9078 "PP: internal error, state == PUBLIC_LITERAL\n");
9079 ctxt->instate = XML_PARSER_START_TAG;
9080#ifdef DEBUG_PUSH
9081 xmlGenericError(xmlGenericErrorContext,
9082 "PP: entering START_TAG\n");
9083#endif
9084 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009085 }
9086 }
9087done:
9088#ifdef DEBUG_PUSH
9089 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9090#endif
9091 return(ret);
9092}
9093
9094/**
Owen Taylor3473f882001-02-23 17:55:21 +00009095 * xmlParseChunk:
9096 * @ctxt: an XML parser context
9097 * @chunk: an char array
9098 * @size: the size in byte of the chunk
9099 * @terminate: last chunk indicator
9100 *
9101 * Parse a Chunk of memory
9102 *
9103 * Returns zero if no error, the xmlParserErrors otherwise.
9104 */
9105int
9106xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9107 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009108 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9109 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009110 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9111 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9112 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9113 int cur = ctxt->input->cur - ctxt->input->base;
9114
9115 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9116 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9117 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009118 ctxt->input->end =
9119 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009120#ifdef DEBUG_PUSH
9121 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9122#endif
9123
Daniel Veillarda880b122003-04-21 21:36:41 +00009124#if 0
Owen Taylor3473f882001-02-23 17:55:21 +00009125 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9126 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda880b122003-04-21 21:36:41 +00009127#endif
Owen Taylor3473f882001-02-23 17:55:21 +00009128 } else if (ctxt->instate != XML_PARSER_EOF) {
9129 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9130 xmlParserInputBufferPtr in = ctxt->input->buf;
9131 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9132 (in->raw != NULL)) {
9133 int nbchars;
9134
9135 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9136 if (nbchars < 0) {
9137 xmlGenericError(xmlGenericErrorContext,
9138 "xmlParseChunk: encoder error\n");
9139 return(XML_ERR_INVALID_ENCODING);
9140 }
9141 }
9142 }
9143 }
9144 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009145 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9146 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009147 if (terminate) {
9148 /*
9149 * Check for termination
9150 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009151 int avail = 0;
9152 if (ctxt->input->buf == NULL)
9153 avail = ctxt->input->length -
9154 (ctxt->input->cur - ctxt->input->base);
9155 else
9156 avail = ctxt->input->buf->buffer->use -
9157 (ctxt->input->cur - ctxt->input->base);
9158
Owen Taylor3473f882001-02-23 17:55:21 +00009159 if ((ctxt->instate != XML_PARSER_EOF) &&
9160 (ctxt->instate != XML_PARSER_EPILOG)) {
9161 ctxt->errNo = XML_ERR_DOCUMENT_END;
9162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9163 ctxt->sax->error(ctxt->userData,
9164 "Extra content at the end of the document\n");
9165 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009166 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009167 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009168 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9169 ctxt->errNo = XML_ERR_DOCUMENT_END;
9170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9171 ctxt->sax->error(ctxt->userData,
9172 "Extra content at the end of the document\n");
9173 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009174 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009175
9176 }
Owen Taylor3473f882001-02-23 17:55:21 +00009177 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009178 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009179 ctxt->sax->endDocument(ctxt->userData);
9180 }
9181 ctxt->instate = XML_PARSER_EOF;
9182 }
9183 return((xmlParserErrors) ctxt->errNo);
9184}
9185
9186/************************************************************************
9187 * *
9188 * I/O front end functions to the parser *
9189 * *
9190 ************************************************************************/
9191
9192/**
9193 * xmlStopParser:
9194 * @ctxt: an XML parser context
9195 *
9196 * Blocks further parser processing
9197 */
9198void
9199xmlStopParser(xmlParserCtxtPtr ctxt) {
9200 ctxt->instate = XML_PARSER_EOF;
9201 if (ctxt->input != NULL)
9202 ctxt->input->cur = BAD_CAST"";
9203}
9204
9205/**
9206 * xmlCreatePushParserCtxt:
9207 * @sax: a SAX handler
9208 * @user_data: The user data returned on SAX callbacks
9209 * @chunk: a pointer to an array of chars
9210 * @size: number of chars in the array
9211 * @filename: an optional file name or URI
9212 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009213 * Create a parser context for using the XML parser in push mode.
9214 * If @buffer and @size are non-NULL, the data is used to detect
9215 * the encoding. The remaining characters will be parsed so they
9216 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009217 * To allow content encoding detection, @size should be >= 4
9218 * The value of @filename is used for fetching external entities
9219 * and error/warning reports.
9220 *
9221 * Returns the new parser context or NULL
9222 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009223
Owen Taylor3473f882001-02-23 17:55:21 +00009224xmlParserCtxtPtr
9225xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9226 const char *chunk, int size, const char *filename) {
9227 xmlParserCtxtPtr ctxt;
9228 xmlParserInputPtr inputStream;
9229 xmlParserInputBufferPtr buf;
9230 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9231
9232 /*
9233 * plug some encoding conversion routines
9234 */
9235 if ((chunk != NULL) && (size >= 4))
9236 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9237
9238 buf = xmlAllocParserInputBuffer(enc);
9239 if (buf == NULL) return(NULL);
9240
9241 ctxt = xmlNewParserCtxt();
9242 if (ctxt == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009243 xmlGenericError(xmlGenericErrorContext,
9244 "xml parser: out of memory\n");
9245 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009246 return(NULL);
9247 }
9248 if (sax != NULL) {
9249 if (ctxt->sax != &xmlDefaultSAXHandler)
9250 xmlFree(ctxt->sax);
9251 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9252 if (ctxt->sax == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009253 xmlGenericError(xmlGenericErrorContext,
9254 "xml parser: out of memory\n");
9255 xmlFreeParserInputBuffer(buf);
9256 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009257 return(NULL);
9258 }
9259 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9260 if (user_data != NULL)
9261 ctxt->userData = user_data;
9262 }
9263 if (filename == NULL) {
9264 ctxt->directory = NULL;
9265 } else {
9266 ctxt->directory = xmlParserGetDirectory(filename);
9267 }
9268
9269 inputStream = xmlNewInputStream(ctxt);
9270 if (inputStream == NULL) {
9271 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009272 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009273 return(NULL);
9274 }
9275
9276 if (filename == NULL)
9277 inputStream->filename = NULL;
9278 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009279 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009280 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009281 inputStream->buf = buf;
9282 inputStream->base = inputStream->buf->buffer->content;
9283 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009284 inputStream->end =
9285 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009286
9287 inputPush(ctxt, inputStream);
9288
9289 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9290 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009291 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9292 int cur = ctxt->input->cur - ctxt->input->base;
9293
Owen Taylor3473f882001-02-23 17:55:21 +00009294 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009295
9296 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9297 ctxt->input->cur = ctxt->input->base + cur;
9298 ctxt->input->end =
9299 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009300#ifdef DEBUG_PUSH
9301 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9302#endif
9303 }
9304
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009305 if (enc != XML_CHAR_ENCODING_NONE) {
9306 xmlSwitchEncoding(ctxt, enc);
9307 }
9308
Owen Taylor3473f882001-02-23 17:55:21 +00009309 return(ctxt);
9310}
9311
9312/**
9313 * xmlCreateIOParserCtxt:
9314 * @sax: a SAX handler
9315 * @user_data: The user data returned on SAX callbacks
9316 * @ioread: an I/O read function
9317 * @ioclose: an I/O close function
9318 * @ioctx: an I/O handler
9319 * @enc: the charset encoding if known
9320 *
9321 * Create a parser context for using the XML parser with an existing
9322 * I/O stream
9323 *
9324 * Returns the new parser context or NULL
9325 */
9326xmlParserCtxtPtr
9327xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9328 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9329 void *ioctx, xmlCharEncoding enc) {
9330 xmlParserCtxtPtr ctxt;
9331 xmlParserInputPtr inputStream;
9332 xmlParserInputBufferPtr buf;
9333
9334 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9335 if (buf == NULL) return(NULL);
9336
9337 ctxt = xmlNewParserCtxt();
9338 if (ctxt == NULL) {
9339 xmlFree(buf);
9340 return(NULL);
9341 }
9342 if (sax != NULL) {
9343 if (ctxt->sax != &xmlDefaultSAXHandler)
9344 xmlFree(ctxt->sax);
9345 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9346 if (ctxt->sax == NULL) {
9347 xmlFree(buf);
9348 xmlFree(ctxt);
9349 return(NULL);
9350 }
9351 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9352 if (user_data != NULL)
9353 ctxt->userData = user_data;
9354 }
9355
9356 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9357 if (inputStream == NULL) {
9358 xmlFreeParserCtxt(ctxt);
9359 return(NULL);
9360 }
9361 inputPush(ctxt, inputStream);
9362
9363 return(ctxt);
9364}
9365
9366/************************************************************************
9367 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009368 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009369 * *
9370 ************************************************************************/
9371
9372/**
9373 * xmlIOParseDTD:
9374 * @sax: the SAX handler block or NULL
9375 * @input: an Input Buffer
9376 * @enc: the charset encoding if known
9377 *
9378 * Load and parse a DTD
9379 *
9380 * Returns the resulting xmlDtdPtr or NULL in case of error.
9381 * @input will be freed at parsing end.
9382 */
9383
9384xmlDtdPtr
9385xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9386 xmlCharEncoding enc) {
9387 xmlDtdPtr ret = NULL;
9388 xmlParserCtxtPtr ctxt;
9389 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009390 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009391
9392 if (input == NULL)
9393 return(NULL);
9394
9395 ctxt = xmlNewParserCtxt();
9396 if (ctxt == NULL) {
9397 return(NULL);
9398 }
9399
9400 /*
9401 * Set-up the SAX context
9402 */
9403 if (sax != NULL) {
9404 if (ctxt->sax != NULL)
9405 xmlFree(ctxt->sax);
9406 ctxt->sax = sax;
9407 ctxt->userData = NULL;
9408 }
9409
9410 /*
9411 * generate a parser input from the I/O handler
9412 */
9413
9414 pinput = xmlNewIOInputStream(ctxt, input, enc);
9415 if (pinput == NULL) {
9416 if (sax != NULL) ctxt->sax = NULL;
9417 xmlFreeParserCtxt(ctxt);
9418 return(NULL);
9419 }
9420
9421 /*
9422 * plug some encoding conversion routines here.
9423 */
9424 xmlPushInput(ctxt, pinput);
9425
9426 pinput->filename = NULL;
9427 pinput->line = 1;
9428 pinput->col = 1;
9429 pinput->base = ctxt->input->cur;
9430 pinput->cur = ctxt->input->cur;
9431 pinput->free = NULL;
9432
9433 /*
9434 * let's parse that entity knowing it's an external subset.
9435 */
9436 ctxt->inSubset = 2;
9437 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9438 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9439 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009440
9441 if (enc == XML_CHAR_ENCODING_NONE) {
9442 /*
9443 * Get the 4 first bytes and decode the charset
9444 * if enc != XML_CHAR_ENCODING_NONE
9445 * plug some encoding conversion routines.
9446 */
9447 start[0] = RAW;
9448 start[1] = NXT(1);
9449 start[2] = NXT(2);
9450 start[3] = NXT(3);
9451 enc = xmlDetectCharEncoding(start, 4);
9452 if (enc != XML_CHAR_ENCODING_NONE) {
9453 xmlSwitchEncoding(ctxt, enc);
9454 }
9455 }
9456
Owen Taylor3473f882001-02-23 17:55:21 +00009457 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9458
9459 if (ctxt->myDoc != NULL) {
9460 if (ctxt->wellFormed) {
9461 ret = ctxt->myDoc->extSubset;
9462 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +00009463 if (ret != NULL) {
9464 xmlNodePtr tmp;
9465
9466 ret->doc = NULL;
9467 tmp = ret->children;
9468 while (tmp != NULL) {
9469 tmp->doc = NULL;
9470 tmp = tmp->next;
9471 }
9472 }
Owen Taylor3473f882001-02-23 17:55:21 +00009473 } else {
9474 ret = NULL;
9475 }
9476 xmlFreeDoc(ctxt->myDoc);
9477 ctxt->myDoc = NULL;
9478 }
9479 if (sax != NULL) ctxt->sax = NULL;
9480 xmlFreeParserCtxt(ctxt);
9481
9482 return(ret);
9483}
9484
9485/**
9486 * xmlSAXParseDTD:
9487 * @sax: the SAX handler block
9488 * @ExternalID: a NAME* containing the External ID of the DTD
9489 * @SystemID: a NAME* containing the URL to the DTD
9490 *
9491 * Load and parse an external subset.
9492 *
9493 * Returns the resulting xmlDtdPtr or NULL in case of error.
9494 */
9495
9496xmlDtdPtr
9497xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9498 const xmlChar *SystemID) {
9499 xmlDtdPtr ret = NULL;
9500 xmlParserCtxtPtr ctxt;
9501 xmlParserInputPtr input = NULL;
9502 xmlCharEncoding enc;
9503
9504 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9505
9506 ctxt = xmlNewParserCtxt();
9507 if (ctxt == NULL) {
9508 return(NULL);
9509 }
9510
9511 /*
9512 * Set-up the SAX context
9513 */
9514 if (sax != NULL) {
9515 if (ctxt->sax != NULL)
9516 xmlFree(ctxt->sax);
9517 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +00009518 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +00009519 }
9520
9521 /*
9522 * Ask the Entity resolver to load the damn thing
9523 */
9524
9525 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +00009526 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +00009527 if (input == NULL) {
9528 if (sax != NULL) ctxt->sax = NULL;
9529 xmlFreeParserCtxt(ctxt);
9530 return(NULL);
9531 }
9532
9533 /*
9534 * plug some encoding conversion routines here.
9535 */
9536 xmlPushInput(ctxt, input);
9537 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9538 xmlSwitchEncoding(ctxt, enc);
9539
9540 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +00009541 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +00009542 input->line = 1;
9543 input->col = 1;
9544 input->base = ctxt->input->cur;
9545 input->cur = ctxt->input->cur;
9546 input->free = NULL;
9547
9548 /*
9549 * let's parse that entity knowing it's an external subset.
9550 */
9551 ctxt->inSubset = 2;
9552 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9553 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9554 ExternalID, SystemID);
9555 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9556
9557 if (ctxt->myDoc != NULL) {
9558 if (ctxt->wellFormed) {
9559 ret = ctxt->myDoc->extSubset;
9560 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +00009561 if (ret != NULL) {
9562 xmlNodePtr tmp;
9563
9564 ret->doc = NULL;
9565 tmp = ret->children;
9566 while (tmp != NULL) {
9567 tmp->doc = NULL;
9568 tmp = tmp->next;
9569 }
9570 }
Owen Taylor3473f882001-02-23 17:55:21 +00009571 } else {
9572 ret = NULL;
9573 }
9574 xmlFreeDoc(ctxt->myDoc);
9575 ctxt->myDoc = NULL;
9576 }
9577 if (sax != NULL) ctxt->sax = NULL;
9578 xmlFreeParserCtxt(ctxt);
9579
9580 return(ret);
9581}
9582
9583/**
9584 * xmlParseDTD:
9585 * @ExternalID: a NAME* containing the External ID of the DTD
9586 * @SystemID: a NAME* containing the URL to the DTD
9587 *
9588 * Load and parse an external subset.
9589 *
9590 * Returns the resulting xmlDtdPtr or NULL in case of error.
9591 */
9592
9593xmlDtdPtr
9594xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9595 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9596}
9597
9598/************************************************************************
9599 * *
9600 * Front ends when parsing an Entity *
9601 * *
9602 ************************************************************************/
9603
9604/**
Owen Taylor3473f882001-02-23 17:55:21 +00009605 * xmlParseCtxtExternalEntity:
9606 * @ctx: the existing parsing context
9607 * @URL: the URL for the entity to load
9608 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009609 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009610 *
9611 * Parse an external general entity within an existing parsing context
9612 * An external general parsed entity is well-formed if it matches the
9613 * production labeled extParsedEnt.
9614 *
9615 * [78] extParsedEnt ::= TextDecl? content
9616 *
9617 * Returns 0 if the entity is well formed, -1 in case of args problem and
9618 * the parser error code otherwise
9619 */
9620
9621int
9622xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009623 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009624 xmlParserCtxtPtr ctxt;
9625 xmlDocPtr newDoc;
9626 xmlSAXHandlerPtr oldsax = NULL;
9627 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009628 xmlChar start[4];
9629 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009630
9631 if (ctx->depth > 40) {
9632 return(XML_ERR_ENTITY_LOOP);
9633 }
9634
Daniel Veillardcda96922001-08-21 10:56:31 +00009635 if (lst != NULL)
9636 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009637 if ((URL == NULL) && (ID == NULL))
9638 return(-1);
9639 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9640 return(-1);
9641
9642
9643 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9644 if (ctxt == NULL) return(-1);
9645 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +00009646 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +00009647 oldsax = ctxt->sax;
9648 ctxt->sax = ctx->sax;
9649 newDoc = xmlNewDoc(BAD_CAST "1.0");
9650 if (newDoc == NULL) {
9651 xmlFreeParserCtxt(ctxt);
9652 return(-1);
9653 }
9654 if (ctx->myDoc != NULL) {
9655 newDoc->intSubset = ctx->myDoc->intSubset;
9656 newDoc->extSubset = ctx->myDoc->extSubset;
9657 }
9658 if (ctx->myDoc->URL != NULL) {
9659 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9660 }
9661 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9662 if (newDoc->children == NULL) {
9663 ctxt->sax = oldsax;
9664 xmlFreeParserCtxt(ctxt);
9665 newDoc->intSubset = NULL;
9666 newDoc->extSubset = NULL;
9667 xmlFreeDoc(newDoc);
9668 return(-1);
9669 }
9670 nodePush(ctxt, newDoc->children);
9671 if (ctx->myDoc == NULL) {
9672 ctxt->myDoc = newDoc;
9673 } else {
9674 ctxt->myDoc = ctx->myDoc;
9675 newDoc->children->doc = ctx->myDoc;
9676 }
9677
Daniel Veillard87a764e2001-06-20 17:41:10 +00009678 /*
9679 * Get the 4 first bytes and decode the charset
9680 * if enc != XML_CHAR_ENCODING_NONE
9681 * plug some encoding conversion routines.
9682 */
9683 GROW
9684 start[0] = RAW;
9685 start[1] = NXT(1);
9686 start[2] = NXT(2);
9687 start[3] = NXT(3);
9688 enc = xmlDetectCharEncoding(start, 4);
9689 if (enc != XML_CHAR_ENCODING_NONE) {
9690 xmlSwitchEncoding(ctxt, enc);
9691 }
9692
Owen Taylor3473f882001-02-23 17:55:21 +00009693 /*
9694 * Parse a possible text declaration first
9695 */
Owen Taylor3473f882001-02-23 17:55:21 +00009696 if ((RAW == '<') && (NXT(1) == '?') &&
9697 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9698 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9699 xmlParseTextDecl(ctxt);
9700 }
9701
9702 /*
9703 * Doing validity checking on chunk doesn't make sense
9704 */
9705 ctxt->instate = XML_PARSER_CONTENT;
9706 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009707 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009708 ctxt->loadsubset = ctx->loadsubset;
9709 ctxt->depth = ctx->depth + 1;
9710 ctxt->replaceEntities = ctx->replaceEntities;
9711 if (ctxt->validate) {
9712 ctxt->vctxt.error = ctx->vctxt.error;
9713 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009714 } else {
9715 ctxt->vctxt.error = NULL;
9716 ctxt->vctxt.warning = NULL;
9717 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009718 ctxt->vctxt.nodeTab = NULL;
9719 ctxt->vctxt.nodeNr = 0;
9720 ctxt->vctxt.nodeMax = 0;
9721 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009722
9723 xmlParseContent(ctxt);
9724
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009725 ctx->validate = ctxt->validate;
9726 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009727 if ((RAW == '<') && (NXT(1) == '/')) {
9728 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9730 ctxt->sax->error(ctxt->userData,
9731 "chunk is not well balanced\n");
9732 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009733 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009734 } else if (RAW != 0) {
9735 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9737 ctxt->sax->error(ctxt->userData,
9738 "extra content at the end of well balanced chunk\n");
9739 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009740 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009741 }
9742 if (ctxt->node != newDoc->children) {
9743 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9745 ctxt->sax->error(ctxt->userData,
9746 "chunk is not well balanced\n");
9747 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009748 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009749 }
9750
9751 if (!ctxt->wellFormed) {
9752 if (ctxt->errNo == 0)
9753 ret = 1;
9754 else
9755 ret = ctxt->errNo;
9756 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009757 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009758 xmlNodePtr cur;
9759
9760 /*
9761 * Return the newly created nodeset after unlinking it from
9762 * they pseudo parent.
9763 */
9764 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009765 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009766 while (cur != NULL) {
9767 cur->parent = NULL;
9768 cur = cur->next;
9769 }
9770 newDoc->children->children = NULL;
9771 }
9772 ret = 0;
9773 }
9774 ctxt->sax = oldsax;
9775 xmlFreeParserCtxt(ctxt);
9776 newDoc->intSubset = NULL;
9777 newDoc->extSubset = NULL;
9778 xmlFreeDoc(newDoc);
9779
9780 return(ret);
9781}
9782
9783/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009784 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009785 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009786 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009787 * @sax: the SAX handler bloc (possibly NULL)
9788 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9789 * @depth: Used for loop detection, use 0
9790 * @URL: the URL for the entity to load
9791 * @ID: the System ID for the entity to load
9792 * @list: the return value for the set of parsed nodes
9793 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009794 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009795 *
9796 * Returns 0 if the entity is well formed, -1 in case of args problem and
9797 * the parser error code otherwise
9798 */
9799
Daniel Veillard257d9102001-05-08 10:41:44 +00009800static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009801xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9802 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009803 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009804 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009805 xmlParserCtxtPtr ctxt;
9806 xmlDocPtr newDoc;
9807 xmlSAXHandlerPtr oldsax = NULL;
9808 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009809 xmlChar start[4];
9810 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009811
9812 if (depth > 40) {
9813 return(XML_ERR_ENTITY_LOOP);
9814 }
9815
9816
9817
9818 if (list != NULL)
9819 *list = NULL;
9820 if ((URL == NULL) && (ID == NULL))
9821 return(-1);
9822 if (doc == NULL) /* @@ relax but check for dereferences */
9823 return(-1);
9824
9825
9826 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9827 if (ctxt == NULL) return(-1);
9828 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009829 if (oldctxt != NULL) {
9830 ctxt->_private = oldctxt->_private;
9831 ctxt->loadsubset = oldctxt->loadsubset;
9832 ctxt->validate = oldctxt->validate;
9833 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009834 ctxt->record_info = oldctxt->record_info;
9835 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
9836 ctxt->node_seq.length = oldctxt->node_seq.length;
9837 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009838 } else {
9839 /*
9840 * Doing validity checking on chunk without context
9841 * doesn't make sense
9842 */
9843 ctxt->_private = NULL;
9844 ctxt->validate = 0;
9845 ctxt->external = 2;
9846 ctxt->loadsubset = 0;
9847 }
Owen Taylor3473f882001-02-23 17:55:21 +00009848 if (sax != NULL) {
9849 oldsax = ctxt->sax;
9850 ctxt->sax = sax;
9851 if (user_data != NULL)
9852 ctxt->userData = user_data;
9853 }
9854 newDoc = xmlNewDoc(BAD_CAST "1.0");
9855 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009856 ctxt->node_seq.maximum = 0;
9857 ctxt->node_seq.length = 0;
9858 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009859 xmlFreeParserCtxt(ctxt);
9860 return(-1);
9861 }
9862 if (doc != NULL) {
9863 newDoc->intSubset = doc->intSubset;
9864 newDoc->extSubset = doc->extSubset;
9865 }
9866 if (doc->URL != NULL) {
9867 newDoc->URL = xmlStrdup(doc->URL);
9868 }
9869 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9870 if (newDoc->children == NULL) {
9871 if (sax != NULL)
9872 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009873 ctxt->node_seq.maximum = 0;
9874 ctxt->node_seq.length = 0;
9875 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009876 xmlFreeParserCtxt(ctxt);
9877 newDoc->intSubset = NULL;
9878 newDoc->extSubset = NULL;
9879 xmlFreeDoc(newDoc);
9880 return(-1);
9881 }
9882 nodePush(ctxt, newDoc->children);
9883 if (doc == NULL) {
9884 ctxt->myDoc = newDoc;
9885 } else {
9886 ctxt->myDoc = doc;
9887 newDoc->children->doc = doc;
9888 }
9889
Daniel Veillard87a764e2001-06-20 17:41:10 +00009890 /*
9891 * Get the 4 first bytes and decode the charset
9892 * if enc != XML_CHAR_ENCODING_NONE
9893 * plug some encoding conversion routines.
9894 */
9895 GROW;
9896 start[0] = RAW;
9897 start[1] = NXT(1);
9898 start[2] = NXT(2);
9899 start[3] = NXT(3);
9900 enc = xmlDetectCharEncoding(start, 4);
9901 if (enc != XML_CHAR_ENCODING_NONE) {
9902 xmlSwitchEncoding(ctxt, enc);
9903 }
9904
Owen Taylor3473f882001-02-23 17:55:21 +00009905 /*
9906 * Parse a possible text declaration first
9907 */
Owen Taylor3473f882001-02-23 17:55:21 +00009908 if ((RAW == '<') && (NXT(1) == '?') &&
9909 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9910 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9911 xmlParseTextDecl(ctxt);
9912 }
9913
Owen Taylor3473f882001-02-23 17:55:21 +00009914 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009915 ctxt->depth = depth;
9916
9917 xmlParseContent(ctxt);
9918
Daniel Veillard561b7f82002-03-20 21:55:57 +00009919 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009920 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9922 ctxt->sax->error(ctxt->userData,
9923 "chunk is not well balanced\n");
9924 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009925 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009926 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009927 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9929 ctxt->sax->error(ctxt->userData,
9930 "extra content at the end of well balanced chunk\n");
9931 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009932 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009933 }
9934 if (ctxt->node != newDoc->children) {
9935 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9937 ctxt->sax->error(ctxt->userData,
9938 "chunk is not well balanced\n");
9939 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009940 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009941 }
9942
9943 if (!ctxt->wellFormed) {
9944 if (ctxt->errNo == 0)
9945 ret = 1;
9946 else
9947 ret = ctxt->errNo;
9948 } else {
9949 if (list != NULL) {
9950 xmlNodePtr cur;
9951
9952 /*
9953 * Return the newly created nodeset after unlinking it from
9954 * they pseudo parent.
9955 */
9956 cur = newDoc->children->children;
9957 *list = cur;
9958 while (cur != NULL) {
9959 cur->parent = NULL;
9960 cur = cur->next;
9961 }
9962 newDoc->children->children = NULL;
9963 }
9964 ret = 0;
9965 }
9966 if (sax != NULL)
9967 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +00009968 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
9969 oldctxt->node_seq.length = ctxt->node_seq.length;
9970 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009971 ctxt->node_seq.maximum = 0;
9972 ctxt->node_seq.length = 0;
9973 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009974 xmlFreeParserCtxt(ctxt);
9975 newDoc->intSubset = NULL;
9976 newDoc->extSubset = NULL;
9977 xmlFreeDoc(newDoc);
9978
9979 return(ret);
9980}
9981
9982/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009983 * xmlParseExternalEntity:
9984 * @doc: the document the chunk pertains to
9985 * @sax: the SAX handler bloc (possibly NULL)
9986 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9987 * @depth: Used for loop detection, use 0
9988 * @URL: the URL for the entity to load
9989 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009990 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009991 *
9992 * Parse an external general entity
9993 * An external general parsed entity is well-formed if it matches the
9994 * production labeled extParsedEnt.
9995 *
9996 * [78] extParsedEnt ::= TextDecl? content
9997 *
9998 * Returns 0 if the entity is well formed, -1 in case of args problem and
9999 * the parser error code otherwise
10000 */
10001
10002int
10003xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010004 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010005 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010006 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010007}
10008
10009/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010010 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010011 * @doc: the document the chunk pertains to
10012 * @sax: the SAX handler bloc (possibly NULL)
10013 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10014 * @depth: Used for loop detection, use 0
10015 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010016 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010017 *
10018 * Parse a well-balanced chunk of an XML document
10019 * called by the parser
10020 * The allowed sequence for the Well Balanced Chunk is the one defined by
10021 * the content production in the XML grammar:
10022 *
10023 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10024 *
10025 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10026 * the parser error code otherwise
10027 */
10028
10029int
10030xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010031 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010032 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10033 depth, string, lst, 0 );
10034}
10035
10036/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010037 * xmlParseBalancedChunkMemoryInternal:
10038 * @oldctxt: the existing parsing context
10039 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10040 * @user_data: the user data field for the parser context
10041 * @lst: the return value for the set of parsed nodes
10042 *
10043 *
10044 * Parse a well-balanced chunk of an XML document
10045 * called by the parser
10046 * The allowed sequence for the Well Balanced Chunk is the one defined by
10047 * the content production in the XML grammar:
10048 *
10049 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10050 *
10051 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10052 * the parser error code otherwise
10053 *
10054 * In case recover is set to 1, the nodelist will not be empty even if
10055 * the parsed chunk is not well balanced.
10056 */
10057static int
10058xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10059 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10060 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010061 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010062 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010063 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010064 int size;
10065 int ret = 0;
10066
10067 if (oldctxt->depth > 40) {
10068 return(XML_ERR_ENTITY_LOOP);
10069 }
10070
10071
10072 if (lst != NULL)
10073 *lst = NULL;
10074 if (string == NULL)
10075 return(-1);
10076
10077 size = xmlStrlen(string);
10078
10079 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10080 if (ctxt == NULL) return(-1);
10081 if (user_data != NULL)
10082 ctxt->userData = user_data;
10083 else
10084 ctxt->userData = ctxt;
10085
10086 oldsax = ctxt->sax;
10087 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010088 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010089 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010090 newDoc = xmlNewDoc(BAD_CAST "1.0");
10091 if (newDoc == NULL) {
10092 ctxt->sax = oldsax;
10093 xmlFreeParserCtxt(ctxt);
10094 return(-1);
10095 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010096 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010097 } else {
10098 ctxt->myDoc = oldctxt->myDoc;
10099 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010100 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010101 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010102 BAD_CAST "pseudoroot", NULL);
10103 if (ctxt->myDoc->children == NULL) {
10104 ctxt->sax = oldsax;
10105 xmlFreeParserCtxt(ctxt);
10106 if (newDoc != NULL)
10107 xmlFreeDoc(newDoc);
10108 return(-1);
10109 }
10110 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010111 ctxt->instate = XML_PARSER_CONTENT;
10112 ctxt->depth = oldctxt->depth + 1;
10113
Daniel Veillard328f48c2002-11-15 15:24:34 +000010114 ctxt->validate = 0;
10115 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010116 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10117 /*
10118 * ID/IDREF registration will be done in xmlValidateElement below
10119 */
10120 ctxt->loadsubset |= XML_SKIP_IDS;
10121 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010122
Daniel Veillard68e9e742002-11-16 15:35:11 +000010123 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010124 if ((RAW == '<') && (NXT(1) == '/')) {
10125 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10127 ctxt->sax->error(ctxt->userData,
10128 "chunk is not well balanced\n");
10129 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010130 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010131 } else if (RAW != 0) {
10132 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10134 ctxt->sax->error(ctxt->userData,
10135 "extra content at the end of well balanced chunk\n");
10136 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010137 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010138 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010139 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010140 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10142 ctxt->sax->error(ctxt->userData,
10143 "chunk is not well balanced\n");
10144 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010145 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010146 }
10147
10148 if (!ctxt->wellFormed) {
10149 if (ctxt->errNo == 0)
10150 ret = 1;
10151 else
10152 ret = ctxt->errNo;
10153 } else {
10154 ret = 0;
10155 }
10156
10157 if ((lst != NULL) && (ret == 0)) {
10158 xmlNodePtr cur;
10159
10160 /*
10161 * Return the newly created nodeset after unlinking it from
10162 * they pseudo parent.
10163 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010164 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010165 *lst = cur;
10166 while (cur != NULL) {
Daniel Veillard8d589042003-02-04 15:07:21 +000010167 if (oldctxt->validate && oldctxt->wellFormed &&
10168 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10169 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10170 oldctxt->myDoc, cur);
10171 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010172 cur->parent = NULL;
10173 cur = cur->next;
10174 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010175 ctxt->myDoc->children->children = NULL;
10176 }
10177 if (ctxt->myDoc != NULL) {
10178 xmlFreeNode(ctxt->myDoc->children);
10179 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010180 }
10181
10182 ctxt->sax = oldsax;
10183 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010184 if (newDoc != NULL)
10185 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010186
10187 return(ret);
10188}
10189
10190/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010191 * xmlParseBalancedChunkMemoryRecover:
10192 * @doc: the document the chunk pertains to
10193 * @sax: the SAX handler bloc (possibly NULL)
10194 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10195 * @depth: Used for loop detection, use 0
10196 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10197 * @lst: the return value for the set of parsed nodes
10198 * @recover: return nodes even if the data is broken (use 0)
10199 *
10200 *
10201 * Parse a well-balanced chunk of an XML document
10202 * called by the parser
10203 * The allowed sequence for the Well Balanced Chunk is the one defined by
10204 * the content production in the XML grammar:
10205 *
10206 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10207 *
10208 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10209 * the parser error code otherwise
10210 *
10211 * In case recover is set to 1, the nodelist will not be empty even if
10212 * the parsed chunk is not well balanced.
10213 */
10214int
10215xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10216 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10217 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010218 xmlParserCtxtPtr ctxt;
10219 xmlDocPtr newDoc;
10220 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010221 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010222 int size;
10223 int ret = 0;
10224
10225 if (depth > 40) {
10226 return(XML_ERR_ENTITY_LOOP);
10227 }
10228
10229
Daniel Veillardcda96922001-08-21 10:56:31 +000010230 if (lst != NULL)
10231 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010232 if (string == NULL)
10233 return(-1);
10234
10235 size = xmlStrlen(string);
10236
10237 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10238 if (ctxt == NULL) return(-1);
10239 ctxt->userData = ctxt;
10240 if (sax != NULL) {
10241 oldsax = ctxt->sax;
10242 ctxt->sax = sax;
10243 if (user_data != NULL)
10244 ctxt->userData = user_data;
10245 }
10246 newDoc = xmlNewDoc(BAD_CAST "1.0");
10247 if (newDoc == NULL) {
10248 xmlFreeParserCtxt(ctxt);
10249 return(-1);
10250 }
10251 if (doc != NULL) {
10252 newDoc->intSubset = doc->intSubset;
10253 newDoc->extSubset = doc->extSubset;
10254 }
10255 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10256 if (newDoc->children == NULL) {
10257 if (sax != NULL)
10258 ctxt->sax = oldsax;
10259 xmlFreeParserCtxt(ctxt);
10260 newDoc->intSubset = NULL;
10261 newDoc->extSubset = NULL;
10262 xmlFreeDoc(newDoc);
10263 return(-1);
10264 }
10265 nodePush(ctxt, newDoc->children);
10266 if (doc == NULL) {
10267 ctxt->myDoc = newDoc;
10268 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010269 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010270 newDoc->children->doc = doc;
10271 }
10272 ctxt->instate = XML_PARSER_CONTENT;
10273 ctxt->depth = depth;
10274
10275 /*
10276 * Doing validity checking on chunk doesn't make sense
10277 */
10278 ctxt->validate = 0;
10279 ctxt->loadsubset = 0;
10280
Daniel Veillardb39bc392002-10-26 19:29:51 +000010281 if ( doc != NULL ){
10282 content = doc->children;
10283 doc->children = NULL;
10284 xmlParseContent(ctxt);
10285 doc->children = content;
10286 }
10287 else {
10288 xmlParseContent(ctxt);
10289 }
Owen Taylor3473f882001-02-23 17:55:21 +000010290 if ((RAW == '<') && (NXT(1) == '/')) {
10291 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10292 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10293 ctxt->sax->error(ctxt->userData,
10294 "chunk is not well balanced\n");
10295 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010296 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010297 } else if (RAW != 0) {
10298 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10300 ctxt->sax->error(ctxt->userData,
10301 "extra content at the end of well balanced chunk\n");
10302 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010303 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010304 }
10305 if (ctxt->node != newDoc->children) {
10306 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10307 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10308 ctxt->sax->error(ctxt->userData,
10309 "chunk is not well balanced\n");
10310 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010311 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010312 }
10313
10314 if (!ctxt->wellFormed) {
10315 if (ctxt->errNo == 0)
10316 ret = 1;
10317 else
10318 ret = ctxt->errNo;
10319 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010320 ret = 0;
10321 }
10322
10323 if (lst != NULL && (ret == 0 || recover == 1)) {
10324 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010325
10326 /*
10327 * Return the newly created nodeset after unlinking it from
10328 * they pseudo parent.
10329 */
10330 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010331 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010332 while (cur != NULL) {
10333 cur->parent = NULL;
10334 cur = cur->next;
10335 }
10336 newDoc->children->children = NULL;
10337 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010338
Owen Taylor3473f882001-02-23 17:55:21 +000010339 if (sax != NULL)
10340 ctxt->sax = oldsax;
10341 xmlFreeParserCtxt(ctxt);
10342 newDoc->intSubset = NULL;
10343 newDoc->extSubset = NULL;
10344 xmlFreeDoc(newDoc);
10345
10346 return(ret);
10347}
10348
10349/**
10350 * xmlSAXParseEntity:
10351 * @sax: the SAX handler block
10352 * @filename: the filename
10353 *
10354 * parse an XML external entity out of context and build a tree.
10355 * It use the given SAX function block to handle the parsing callback.
10356 * If sax is NULL, fallback to the default DOM tree building routines.
10357 *
10358 * [78] extParsedEnt ::= TextDecl? content
10359 *
10360 * This correspond to a "Well Balanced" chunk
10361 *
10362 * Returns the resulting document tree
10363 */
10364
10365xmlDocPtr
10366xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10367 xmlDocPtr ret;
10368 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010369
10370 ctxt = xmlCreateFileParserCtxt(filename);
10371 if (ctxt == NULL) {
10372 return(NULL);
10373 }
10374 if (sax != NULL) {
10375 if (ctxt->sax != NULL)
10376 xmlFree(ctxt->sax);
10377 ctxt->sax = sax;
10378 ctxt->userData = NULL;
10379 }
10380
Owen Taylor3473f882001-02-23 17:55:21 +000010381 xmlParseExtParsedEnt(ctxt);
10382
10383 if (ctxt->wellFormed)
10384 ret = ctxt->myDoc;
10385 else {
10386 ret = NULL;
10387 xmlFreeDoc(ctxt->myDoc);
10388 ctxt->myDoc = NULL;
10389 }
10390 if (sax != NULL)
10391 ctxt->sax = NULL;
10392 xmlFreeParserCtxt(ctxt);
10393
10394 return(ret);
10395}
10396
10397/**
10398 * xmlParseEntity:
10399 * @filename: the filename
10400 *
10401 * parse an XML external entity out of context and build a tree.
10402 *
10403 * [78] extParsedEnt ::= TextDecl? content
10404 *
10405 * This correspond to a "Well Balanced" chunk
10406 *
10407 * Returns the resulting document tree
10408 */
10409
10410xmlDocPtr
10411xmlParseEntity(const char *filename) {
10412 return(xmlSAXParseEntity(NULL, filename));
10413}
10414
10415/**
10416 * xmlCreateEntityParserCtxt:
10417 * @URL: the entity URL
10418 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010419 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010420 *
10421 * Create a parser context for an external entity
10422 * Automatic support for ZLIB/Compress compressed document is provided
10423 * by default if found at compile-time.
10424 *
10425 * Returns the new parser context or NULL
10426 */
10427xmlParserCtxtPtr
10428xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10429 const xmlChar *base) {
10430 xmlParserCtxtPtr ctxt;
10431 xmlParserInputPtr inputStream;
10432 char *directory = NULL;
10433 xmlChar *uri;
10434
10435 ctxt = xmlNewParserCtxt();
10436 if (ctxt == NULL) {
10437 return(NULL);
10438 }
10439
10440 uri = xmlBuildURI(URL, base);
10441
10442 if (uri == NULL) {
10443 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10444 if (inputStream == NULL) {
10445 xmlFreeParserCtxt(ctxt);
10446 return(NULL);
10447 }
10448
10449 inputPush(ctxt, inputStream);
10450
10451 if ((ctxt->directory == NULL) && (directory == NULL))
10452 directory = xmlParserGetDirectory((char *)URL);
10453 if ((ctxt->directory == NULL) && (directory != NULL))
10454 ctxt->directory = directory;
10455 } else {
10456 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10457 if (inputStream == NULL) {
10458 xmlFree(uri);
10459 xmlFreeParserCtxt(ctxt);
10460 return(NULL);
10461 }
10462
10463 inputPush(ctxt, inputStream);
10464
10465 if ((ctxt->directory == NULL) && (directory == NULL))
10466 directory = xmlParserGetDirectory((char *)uri);
10467 if ((ctxt->directory == NULL) && (directory != NULL))
10468 ctxt->directory = directory;
10469 xmlFree(uri);
10470 }
10471
10472 return(ctxt);
10473}
10474
10475/************************************************************************
10476 * *
10477 * Front ends when parsing from a file *
10478 * *
10479 ************************************************************************/
10480
10481/**
10482 * xmlCreateFileParserCtxt:
10483 * @filename: the filename
10484 *
10485 * Create a parser context for a file content.
10486 * Automatic support for ZLIB/Compress compressed document is provided
10487 * by default if found at compile-time.
10488 *
10489 * Returns the new parser context or NULL
10490 */
10491xmlParserCtxtPtr
10492xmlCreateFileParserCtxt(const char *filename)
10493{
10494 xmlParserCtxtPtr ctxt;
10495 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010496 char *directory = NULL;
10497
Owen Taylor3473f882001-02-23 17:55:21 +000010498 ctxt = xmlNewParserCtxt();
10499 if (ctxt == NULL) {
10500 if (xmlDefaultSAXHandler.error != NULL) {
10501 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10502 }
10503 return(NULL);
10504 }
10505
Igor Zlatkovicce076162003-02-23 13:39:39 +000010506
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000010507 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010508 if (inputStream == NULL) {
10509 xmlFreeParserCtxt(ctxt);
10510 return(NULL);
10511 }
10512
Owen Taylor3473f882001-02-23 17:55:21 +000010513 inputPush(ctxt, inputStream);
10514 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010515 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010516 if ((ctxt->directory == NULL) && (directory != NULL))
10517 ctxt->directory = directory;
10518
10519 return(ctxt);
10520}
10521
10522/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010523 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010524 * @sax: the SAX handler block
10525 * @filename: the filename
10526 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10527 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010528 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010529 *
10530 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10531 * compressed document is provided by default if found at compile-time.
10532 * It use the given SAX function block to handle the parsing callback.
10533 * If sax is NULL, fallback to the default DOM tree building routines.
10534 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010535 * User data (void *) is stored within the parser context in the
10536 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010537 *
Owen Taylor3473f882001-02-23 17:55:21 +000010538 * Returns the resulting document tree
10539 */
10540
10541xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010542xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10543 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010544 xmlDocPtr ret;
10545 xmlParserCtxtPtr ctxt;
10546 char *directory = NULL;
10547
Daniel Veillard635ef722001-10-29 11:48:19 +000010548 xmlInitParser();
10549
Owen Taylor3473f882001-02-23 17:55:21 +000010550 ctxt = xmlCreateFileParserCtxt(filename);
10551 if (ctxt == NULL) {
10552 return(NULL);
10553 }
10554 if (sax != NULL) {
10555 if (ctxt->sax != NULL)
10556 xmlFree(ctxt->sax);
10557 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010558 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010559 if (data!=NULL) {
10560 ctxt->_private=data;
10561 }
Owen Taylor3473f882001-02-23 17:55:21 +000010562
10563 if ((ctxt->directory == NULL) && (directory == NULL))
10564 directory = xmlParserGetDirectory(filename);
10565 if ((ctxt->directory == NULL) && (directory != NULL))
10566 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10567
Daniel Veillarddad3f682002-11-17 16:47:27 +000010568 ctxt->recovery = recovery;
10569
Owen Taylor3473f882001-02-23 17:55:21 +000010570 xmlParseDocument(ctxt);
10571
10572 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10573 else {
10574 ret = NULL;
10575 xmlFreeDoc(ctxt->myDoc);
10576 ctxt->myDoc = NULL;
10577 }
10578 if (sax != NULL)
10579 ctxt->sax = NULL;
10580 xmlFreeParserCtxt(ctxt);
10581
10582 return(ret);
10583}
10584
10585/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010586 * xmlSAXParseFile:
10587 * @sax: the SAX handler block
10588 * @filename: the filename
10589 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10590 * documents
10591 *
10592 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10593 * compressed document is provided by default if found at compile-time.
10594 * It use the given SAX function block to handle the parsing callback.
10595 * If sax is NULL, fallback to the default DOM tree building routines.
10596 *
10597 * Returns the resulting document tree
10598 */
10599
10600xmlDocPtr
10601xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10602 int recovery) {
10603 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10604}
10605
10606/**
Owen Taylor3473f882001-02-23 17:55:21 +000010607 * xmlRecoverDoc:
10608 * @cur: a pointer to an array of xmlChar
10609 *
10610 * parse an XML in-memory document and build a tree.
10611 * In the case the document is not Well Formed, a tree is built anyway
10612 *
10613 * Returns the resulting document tree
10614 */
10615
10616xmlDocPtr
10617xmlRecoverDoc(xmlChar *cur) {
10618 return(xmlSAXParseDoc(NULL, cur, 1));
10619}
10620
10621/**
10622 * xmlParseFile:
10623 * @filename: the filename
10624 *
10625 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10626 * compressed document is provided by default if found at compile-time.
10627 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010628 * Returns the resulting document tree if the file was wellformed,
10629 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010630 */
10631
10632xmlDocPtr
10633xmlParseFile(const char *filename) {
10634 return(xmlSAXParseFile(NULL, filename, 0));
10635}
10636
10637/**
10638 * xmlRecoverFile:
10639 * @filename: the filename
10640 *
10641 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10642 * compressed document is provided by default if found at compile-time.
10643 * In the case the document is not Well Formed, a tree is built anyway
10644 *
10645 * Returns the resulting document tree
10646 */
10647
10648xmlDocPtr
10649xmlRecoverFile(const char *filename) {
10650 return(xmlSAXParseFile(NULL, filename, 1));
10651}
10652
10653
10654/**
10655 * xmlSetupParserForBuffer:
10656 * @ctxt: an XML parser context
10657 * @buffer: a xmlChar * buffer
10658 * @filename: a file name
10659 *
10660 * Setup the parser context to parse a new buffer; Clears any prior
10661 * contents from the parser context. The buffer parameter must not be
10662 * NULL, but the filename parameter can be
10663 */
10664void
10665xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10666 const char* filename)
10667{
10668 xmlParserInputPtr input;
10669
10670 input = xmlNewInputStream(ctxt);
10671 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010672 xmlGenericError(xmlGenericErrorContext,
10673 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010674 xmlFree(ctxt);
10675 return;
10676 }
10677
10678 xmlClearParserCtxt(ctxt);
10679 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000010680 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010681 input->base = buffer;
10682 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010683 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010684 inputPush(ctxt, input);
10685}
10686
10687/**
10688 * xmlSAXUserParseFile:
10689 * @sax: a SAX handler
10690 * @user_data: The user data returned on SAX callbacks
10691 * @filename: a file name
10692 *
10693 * parse an XML file and call the given SAX handler routines.
10694 * Automatic support for ZLIB/Compress compressed document is provided
10695 *
10696 * Returns 0 in case of success or a error number otherwise
10697 */
10698int
10699xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10700 const char *filename) {
10701 int ret = 0;
10702 xmlParserCtxtPtr ctxt;
10703
10704 ctxt = xmlCreateFileParserCtxt(filename);
10705 if (ctxt == NULL) return -1;
10706 if (ctxt->sax != &xmlDefaultSAXHandler)
10707 xmlFree(ctxt->sax);
10708 ctxt->sax = sax;
10709 if (user_data != NULL)
10710 ctxt->userData = user_data;
10711
10712 xmlParseDocument(ctxt);
10713
10714 if (ctxt->wellFormed)
10715 ret = 0;
10716 else {
10717 if (ctxt->errNo != 0)
10718 ret = ctxt->errNo;
10719 else
10720 ret = -1;
10721 }
10722 if (sax != NULL)
10723 ctxt->sax = NULL;
10724 xmlFreeParserCtxt(ctxt);
10725
10726 return ret;
10727}
10728
10729/************************************************************************
10730 * *
10731 * Front ends when parsing from memory *
10732 * *
10733 ************************************************************************/
10734
10735/**
10736 * xmlCreateMemoryParserCtxt:
10737 * @buffer: a pointer to a char array
10738 * @size: the size of the array
10739 *
10740 * Create a parser context for an XML in-memory document.
10741 *
10742 * Returns the new parser context or NULL
10743 */
10744xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010745xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010746 xmlParserCtxtPtr ctxt;
10747 xmlParserInputPtr input;
10748 xmlParserInputBufferPtr buf;
10749
10750 if (buffer == NULL)
10751 return(NULL);
10752 if (size <= 0)
10753 return(NULL);
10754
10755 ctxt = xmlNewParserCtxt();
10756 if (ctxt == NULL)
10757 return(NULL);
10758
10759 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010760 if (buf == NULL) {
10761 xmlFreeParserCtxt(ctxt);
10762 return(NULL);
10763 }
Owen Taylor3473f882001-02-23 17:55:21 +000010764
10765 input = xmlNewInputStream(ctxt);
10766 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010767 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010768 xmlFreeParserCtxt(ctxt);
10769 return(NULL);
10770 }
10771
10772 input->filename = NULL;
10773 input->buf = buf;
10774 input->base = input->buf->buffer->content;
10775 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010776 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010777
10778 inputPush(ctxt, input);
10779 return(ctxt);
10780}
10781
10782/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010783 * xmlSAXParseMemoryWithData:
10784 * @sax: the SAX handler block
10785 * @buffer: an pointer to a char array
10786 * @size: the size of the array
10787 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10788 * documents
10789 * @data: the userdata
10790 *
10791 * parse an XML in-memory block and use the given SAX function block
10792 * to handle the parsing callback. If sax is NULL, fallback to the default
10793 * DOM tree building routines.
10794 *
10795 * User data (void *) is stored within the parser context in the
10796 * context's _private member, so it is available nearly everywhere in libxml
10797 *
10798 * Returns the resulting document tree
10799 */
10800
10801xmlDocPtr
10802xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10803 int size, int recovery, void *data) {
10804 xmlDocPtr ret;
10805 xmlParserCtxtPtr ctxt;
10806
10807 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10808 if (ctxt == NULL) return(NULL);
10809 if (sax != NULL) {
10810 if (ctxt->sax != NULL)
10811 xmlFree(ctxt->sax);
10812 ctxt->sax = sax;
10813 }
10814 if (data!=NULL) {
10815 ctxt->_private=data;
10816 }
10817
Daniel Veillardadba5f12003-04-04 16:09:01 +000010818 ctxt->recovery = recovery;
10819
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010820 xmlParseDocument(ctxt);
10821
10822 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10823 else {
10824 ret = NULL;
10825 xmlFreeDoc(ctxt->myDoc);
10826 ctxt->myDoc = NULL;
10827 }
10828 if (sax != NULL)
10829 ctxt->sax = NULL;
10830 xmlFreeParserCtxt(ctxt);
10831
10832 return(ret);
10833}
10834
10835/**
Owen Taylor3473f882001-02-23 17:55:21 +000010836 * xmlSAXParseMemory:
10837 * @sax: the SAX handler block
10838 * @buffer: an pointer to a char array
10839 * @size: the size of the array
10840 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10841 * documents
10842 *
10843 * parse an XML in-memory block and use the given SAX function block
10844 * to handle the parsing callback. If sax is NULL, fallback to the default
10845 * DOM tree building routines.
10846 *
10847 * Returns the resulting document tree
10848 */
10849xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010850xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10851 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010852 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010853}
10854
10855/**
10856 * xmlParseMemory:
10857 * @buffer: an pointer to a char array
10858 * @size: the size of the array
10859 *
10860 * parse an XML in-memory block and build a tree.
10861 *
10862 * Returns the resulting document tree
10863 */
10864
Daniel Veillard50822cb2001-07-26 20:05:51 +000010865xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010866 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10867}
10868
10869/**
10870 * xmlRecoverMemory:
10871 * @buffer: an pointer to a char array
10872 * @size: the size of the array
10873 *
10874 * parse an XML in-memory block and build a tree.
10875 * In the case the document is not Well Formed, a tree is built anyway
10876 *
10877 * Returns the resulting document tree
10878 */
10879
Daniel Veillard50822cb2001-07-26 20:05:51 +000010880xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010881 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10882}
10883
10884/**
10885 * xmlSAXUserParseMemory:
10886 * @sax: a SAX handler
10887 * @user_data: The user data returned on SAX callbacks
10888 * @buffer: an in-memory XML document input
10889 * @size: the length of the XML document in bytes
10890 *
10891 * A better SAX parsing routine.
10892 * parse an XML in-memory buffer and call the given SAX handler routines.
10893 *
10894 * Returns 0 in case of success or a error number otherwise
10895 */
10896int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010897 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010898 int ret = 0;
10899 xmlParserCtxtPtr ctxt;
10900 xmlSAXHandlerPtr oldsax = NULL;
10901
Daniel Veillard9e923512002-08-14 08:48:52 +000010902 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010903 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10904 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010905 oldsax = ctxt->sax;
10906 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010907 if (user_data != NULL)
10908 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010909
10910 xmlParseDocument(ctxt);
10911
10912 if (ctxt->wellFormed)
10913 ret = 0;
10914 else {
10915 if (ctxt->errNo != 0)
10916 ret = ctxt->errNo;
10917 else
10918 ret = -1;
10919 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010920 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010921 xmlFreeParserCtxt(ctxt);
10922
10923 return ret;
10924}
10925
10926/**
10927 * xmlCreateDocParserCtxt:
10928 * @cur: a pointer to an array of xmlChar
10929 *
10930 * Creates a parser context for an XML in-memory document.
10931 *
10932 * Returns the new parser context or NULL
10933 */
10934xmlParserCtxtPtr
10935xmlCreateDocParserCtxt(xmlChar *cur) {
10936 int len;
10937
10938 if (cur == NULL)
10939 return(NULL);
10940 len = xmlStrlen(cur);
10941 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10942}
10943
10944/**
10945 * xmlSAXParseDoc:
10946 * @sax: the SAX handler block
10947 * @cur: a pointer to an array of xmlChar
10948 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10949 * documents
10950 *
10951 * parse an XML in-memory document and build a tree.
10952 * It use the given SAX function block to handle the parsing callback.
10953 * If sax is NULL, fallback to the default DOM tree building routines.
10954 *
10955 * Returns the resulting document tree
10956 */
10957
10958xmlDocPtr
10959xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10960 xmlDocPtr ret;
10961 xmlParserCtxtPtr ctxt;
10962
10963 if (cur == NULL) return(NULL);
10964
10965
10966 ctxt = xmlCreateDocParserCtxt(cur);
10967 if (ctxt == NULL) return(NULL);
10968 if (sax != NULL) {
10969 ctxt->sax = sax;
10970 ctxt->userData = NULL;
10971 }
10972
10973 xmlParseDocument(ctxt);
10974 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10975 else {
10976 ret = NULL;
10977 xmlFreeDoc(ctxt->myDoc);
10978 ctxt->myDoc = NULL;
10979 }
10980 if (sax != NULL)
10981 ctxt->sax = NULL;
10982 xmlFreeParserCtxt(ctxt);
10983
10984 return(ret);
10985}
10986
10987/**
10988 * xmlParseDoc:
10989 * @cur: a pointer to an array of xmlChar
10990 *
10991 * parse an XML in-memory document and build a tree.
10992 *
10993 * Returns the resulting document tree
10994 */
10995
10996xmlDocPtr
10997xmlParseDoc(xmlChar *cur) {
10998 return(xmlSAXParseDoc(NULL, cur, 0));
10999}
11000
Daniel Veillard8107a222002-01-13 14:10:10 +000011001/************************************************************************
11002 * *
11003 * Specific function to keep track of entities references *
11004 * and used by the XSLT debugger *
11005 * *
11006 ************************************************************************/
11007
11008static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11009
11010/**
11011 * xmlAddEntityReference:
11012 * @ent : A valid entity
11013 * @firstNode : A valid first node for children of entity
11014 * @lastNode : A valid last node of children entity
11015 *
11016 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11017 */
11018static void
11019xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11020 xmlNodePtr lastNode)
11021{
11022 if (xmlEntityRefFunc != NULL) {
11023 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11024 }
11025}
11026
11027
11028/**
11029 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011030 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011031 *
11032 * Set the function to call call back when a xml reference has been made
11033 */
11034void
11035xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11036{
11037 xmlEntityRefFunc = func;
11038}
Owen Taylor3473f882001-02-23 17:55:21 +000011039
11040/************************************************************************
11041 * *
11042 * Miscellaneous *
11043 * *
11044 ************************************************************************/
11045
11046#ifdef LIBXML_XPATH_ENABLED
11047#include <libxml/xpath.h>
11048#endif
11049
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011050extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011051static int xmlParserInitialized = 0;
11052
11053/**
11054 * xmlInitParser:
11055 *
11056 * Initialization function for the XML parser.
11057 * This is not reentrant. Call once before processing in case of
11058 * use in multithreaded programs.
11059 */
11060
11061void
11062xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011063 if (xmlParserInitialized != 0)
11064 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011065
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011066 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11067 (xmlGenericError == NULL))
11068 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011069 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011070 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011071 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011072 xmlInitCharEncodingHandlers();
11073 xmlInitializePredefinedEntities();
11074 xmlDefaultSAXHandlerInit();
11075 xmlRegisterDefaultInputCallbacks();
11076 xmlRegisterDefaultOutputCallbacks();
11077#ifdef LIBXML_HTML_ENABLED
11078 htmlInitAutoClose();
11079 htmlDefaultSAXHandlerInit();
11080#endif
11081#ifdef LIBXML_XPATH_ENABLED
11082 xmlXPathInit();
11083#endif
11084 xmlParserInitialized = 1;
11085}
11086
11087/**
11088 * xmlCleanupParser:
11089 *
11090 * Cleanup function for the XML parser. It tries to reclaim all
11091 * parsing related global memory allocated for the parser processing.
11092 * It doesn't deallocate any document related memory. Calling this
11093 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011094 * One should call xmlCleanupParser() only when the process has
11095 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011096 */
11097
11098void
11099xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011100 if (!xmlParserInitialized)
11101 return;
11102
Owen Taylor3473f882001-02-23 17:55:21 +000011103 xmlCleanupCharEncodingHandlers();
11104 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011105#ifdef LIBXML_CATALOG_ENABLED
11106 xmlCatalogCleanup();
11107#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011108 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011109 xmlCleanupGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011110 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011111}