blob: 0189d78f0c13e87349cae85ac9cc617cdd969c04 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard21a0f912001-02-25 19:54:14 +000088#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000089#define XML_PARSER_BUFFER_SIZE 100
90
Daniel Veillard5997aca2002-03-18 18:36:20 +000091#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
92
Owen Taylor3473f882001-02-23 17:55:21 +000093/*
Owen Taylor3473f882001-02-23 17:55:21 +000094 * List of XML prefixed PI allowed by W3C specs
95 */
96
Daniel Veillardb44025c2001-10-11 22:55:55 +000097static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000098 "xml-stylesheet",
99 NULL
100};
101
102/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000103xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
104 const xmlChar **str);
105
Daniel Veillard257d9102001-05-08 10:41:44 +0000106static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000107xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
108 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000109 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000111
Daniel Veillard8107a222002-01-13 14:10:10 +0000112static void
113xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
114 xmlNodePtr lastNode);
115
Daniel Veillard328f48c2002-11-15 15:24:34 +0000116static int
117xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
118 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000119/************************************************************************
120 * *
121 * Parser stacks related functions and macros *
122 * *
123 ************************************************************************/
124
125xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
126 const xmlChar ** str);
127
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128/**
129 * inputPush:
130 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000131 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000132 *
133 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000134 *
135 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000137extern int
138inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
139{
140 if (ctxt->inputNr >= ctxt->inputMax) {
141 ctxt->inputMax *= 2;
142 ctxt->inputTab =
143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
144 ctxt->inputMax *
145 sizeof(ctxt->inputTab[0]));
146 if (ctxt->inputTab == NULL) {
147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
148 return (0);
149 }
150 }
151 ctxt->inputTab[ctxt->inputNr] = value;
152 ctxt->input = value;
153 return (ctxt->inputNr++);
154}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000155/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000156 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000157 * @ctxt: an XML parser context
158 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000159 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000160 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000161 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000163extern xmlParserInputPtr
164inputPop(xmlParserCtxtPtr ctxt)
165{
166 xmlParserInputPtr ret;
167
168 if (ctxt->inputNr <= 0)
169 return (0);
170 ctxt->inputNr--;
171 if (ctxt->inputNr > 0)
172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
173 else
174 ctxt->input = NULL;
175 ret = ctxt->inputTab[ctxt->inputNr];
176 ctxt->inputTab[ctxt->inputNr] = 0;
177 return (ret);
178}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179/**
180 * nodePush:
181 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000182 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 *
184 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000185 *
186 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000187 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000188extern int
189nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
190{
191 if (ctxt->nodeNr >= ctxt->nodeMax) {
192 ctxt->nodeMax *= 2;
193 ctxt->nodeTab =
194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
195 ctxt->nodeMax *
196 sizeof(ctxt->nodeTab[0]));
197 if (ctxt->nodeTab == NULL) {
198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
199 return (0);
200 }
201 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202#ifdef MAX_DEPTH
203 if (ctxt->nodeNr > MAX_DEPTH) {
204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
205 ctxt->sax->error(ctxt->userData,
206 "Excessive depth in document: change MAX_DEPTH = %d\n",
207 MAX_DEPTH);
208 ctxt->wellFormed = 0;
209 ctxt->instate = XML_PARSER_EOF;
210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
211 return(0);
212 }
213#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000214 ctxt->nodeTab[ctxt->nodeNr] = value;
215 ctxt->node = value;
216 return (ctxt->nodeNr++);
217}
218/**
219 * nodePop:
220 * @ctxt: an XML parser context
221 *
222 * Pops the top element node from the node stack
223 *
224 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000226extern xmlNodePtr
227nodePop(xmlParserCtxtPtr ctxt)
228{
229 xmlNodePtr ret;
230
231 if (ctxt->nodeNr <= 0)
232 return (0);
233 ctxt->nodeNr--;
234 if (ctxt->nodeNr > 0)
235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
236 else
237 ctxt->node = NULL;
238 ret = ctxt->nodeTab[ctxt->nodeNr];
239 ctxt->nodeTab[ctxt->nodeNr] = 0;
240 return (ret);
241}
242/**
243 * namePush:
244 * @ctxt: an XML parser context
245 * @value: the element name
246 *
247 * Pushes a new element name on top of the name stack
248 *
249 * Returns 0 in case of error, the index in the stack otherwise
250 */
251extern int
252namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
253{
254 if (ctxt->nameNr >= ctxt->nameMax) {
255 ctxt->nameMax *= 2;
256 ctxt->nameTab =
257 (xmlChar * *)xmlRealloc(ctxt->nameTab,
258 ctxt->nameMax *
259 sizeof(ctxt->nameTab[0]));
260 if (ctxt->nameTab == NULL) {
261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
262 return (0);
263 }
264 }
265 ctxt->nameTab[ctxt->nameNr] = value;
266 ctxt->name = value;
267 return (ctxt->nameNr++);
268}
269/**
270 * namePop:
271 * @ctxt: an XML parser context
272 *
273 * Pops the top element name from the name stack
274 *
275 * Returns the name just removed
276 */
277extern xmlChar *
278namePop(xmlParserCtxtPtr ctxt)
279{
280 xmlChar *ret;
281
282 if (ctxt->nameNr <= 0)
283 return (0);
284 ctxt->nameNr--;
285 if (ctxt->nameNr > 0)
286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
287 else
288 ctxt->name = NULL;
289 ret = ctxt->nameTab[ctxt->nameNr];
290 ctxt->nameTab[ctxt->nameNr] = 0;
291 return (ret);
292}
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000294static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000295 if (ctxt->spaceNr >= ctxt->spaceMax) {
296 ctxt->spaceMax *= 2;
297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
299 if (ctxt->spaceTab == NULL) {
300 xmlGenericError(xmlGenericErrorContext,
301 "realloc failed !\n");
302 return(0);
303 }
304 }
305 ctxt->spaceTab[ctxt->spaceNr] = val;
306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
307 return(ctxt->spaceNr++);
308}
309
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000311 int ret;
312 if (ctxt->spaceNr <= 0) return(0);
313 ctxt->spaceNr--;
314 if (ctxt->spaceNr > 0)
315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
316 else
317 ctxt->space = NULL;
318 ret = ctxt->spaceTab[ctxt->spaceNr];
319 ctxt->spaceTab[ctxt->spaceNr] = -1;
320 return(ret);
321}
322
323/*
324 * Macros for accessing the content. Those should be used only by the parser,
325 * and not exported.
326 *
327 * Dirty macros, i.e. one often need to make assumption on the context to
328 * use them
329 *
330 * CUR_PTR return the current pointer to the xmlChar to be parsed.
331 * To be used with extreme caution since operations consuming
332 * characters may move the input buffer to a different location !
333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
334 * This should be used internally by the parser
335 * only to compare to ASCII values otherwise it would break when
336 * running with UTF-8 encoding.
337 * RAW same as CUR but in the input buffer, bypass any token
338 * extraction that may have been done
339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
340 * to compare on ASCII based substring.
341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +0000342 * strings without newlines within the parser.
343 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
344 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +0000345 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
346 *
347 * NEXT Skip to the next character, this does the proper decoding
348 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +0000349 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +0000350 * CUR_CHAR(l) returns the current unicode character (int), set l
351 * to the number of xmlChars used for the encoding [0-5].
352 * CUR_SCHAR same but operate on a string instead of the context
353 * COPY_BUF copy the current unicode char to the target buffer, increment
354 * the index
355 * GROW, SHRINK handling of input buffers
356 */
357
Daniel Veillardfdc91562002-07-01 21:52:03 +0000358#define RAW (*ctxt->input->cur)
359#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000360#define NXT(val) ctxt->input->cur[(val)]
361#define CUR_PTR ctxt->input->cur
362
363#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000364 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +0000365 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000366 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000367 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
368 xmlPopInput(ctxt); \
369 } while (0)
370
Daniel Veillard46de64e2002-05-29 08:21:33 +0000371#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
372 xmlSHRINK (ctxt);
373
374static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
375 xmlParserInputShrink(ctxt->input);
376 if ((*ctxt->input->cur == 0) &&
377 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
378 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000379 }
Owen Taylor3473f882001-02-23 17:55:21 +0000380
Daniel Veillard46de64e2002-05-29 08:21:33 +0000381#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
382 xmlGROW (ctxt);
383
384static void xmlGROW (xmlParserCtxtPtr ctxt) {
385 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
386 if ((*ctxt->input->cur == 0) &&
387 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
388 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000389 }
Owen Taylor3473f882001-02-23 17:55:21 +0000390
391#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
392
393#define NEXT xmlNextChar(ctxt)
394
Daniel Veillard21a0f912001-02-25 19:54:14 +0000395#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000396 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000397 ctxt->input->cur++; \
398 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000399 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000400 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
401 }
402
Owen Taylor3473f882001-02-23 17:55:21 +0000403#define NEXTL(l) do { \
404 if (*(ctxt->input->cur) == '\n') { \
405 ctxt->input->line++; ctxt->input->col = 1; \
406 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000407 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000408 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000409 } while (0)
410
411#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
412#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
413
414#define COPY_BUF(l,b,i,v) \
415 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000416 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000417
418/**
419 * xmlSkipBlankChars:
420 * @ctxt: the XML parser context
421 *
422 * skip all blanks character found at that point in the input streams.
423 * It pops up finished entities in the process if allowable at that point.
424 *
425 * Returns the number of space chars skipped
426 */
427
428int
429xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000430 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000431
432 /*
433 * It's Okay to use CUR/NEXT here since all the blanks are on
434 * the ASCII range.
435 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000436 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
437 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000438 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000439 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000440 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000441 cur = ctxt->input->cur;
442 while (IS_BLANK(*cur)) {
443 if (*cur == '\n') {
444 ctxt->input->line++; ctxt->input->col = 1;
445 }
446 cur++;
447 res++;
448 if (*cur == 0) {
449 ctxt->input->cur = cur;
450 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
451 cur = ctxt->input->cur;
452 }
453 }
454 ctxt->input->cur = cur;
455 } else {
456 int cur;
457 do {
458 cur = CUR;
459 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
460 NEXT;
461 cur = CUR;
462 res++;
463 }
464 while ((cur == 0) && (ctxt->inputNr > 1) &&
465 (ctxt->instate != XML_PARSER_COMMENT)) {
466 xmlPopInput(ctxt);
467 cur = CUR;
468 }
469 /*
470 * Need to handle support of entities branching here
471 */
472 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
473 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
474 }
Owen Taylor3473f882001-02-23 17:55:21 +0000475 return(res);
476}
477
478/************************************************************************
479 * *
480 * Commodity functions to handle entities *
481 * *
482 ************************************************************************/
483
484/**
485 * xmlPopInput:
486 * @ctxt: an XML parser context
487 *
488 * xmlPopInput: the current input pointed by ctxt->input came to an end
489 * pop it and return the next char.
490 *
491 * Returns the current xmlChar in the parser context
492 */
493xmlChar
494xmlPopInput(xmlParserCtxtPtr ctxt) {
495 if (ctxt->inputNr == 1) return(0); /* End of main Input */
496 if (xmlParserDebugEntities)
497 xmlGenericError(xmlGenericErrorContext,
498 "Popping input %d\n", ctxt->inputNr);
499 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000500 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000501 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
502 return(xmlPopInput(ctxt));
503 return(CUR);
504}
505
506/**
507 * xmlPushInput:
508 * @ctxt: an XML parser context
509 * @input: an XML parser input fragment (entity, XML fragment ...).
510 *
511 * xmlPushInput: switch to a new input stream which is stacked on top
512 * of the previous one(s).
513 */
514void
515xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
516 if (input == NULL) return;
517
518 if (xmlParserDebugEntities) {
519 if ((ctxt->input != NULL) && (ctxt->input->filename))
520 xmlGenericError(xmlGenericErrorContext,
521 "%s(%d): ", ctxt->input->filename,
522 ctxt->input->line);
523 xmlGenericError(xmlGenericErrorContext,
524 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
525 }
526 inputPush(ctxt, input);
527 GROW;
528}
529
530/**
531 * xmlParseCharRef:
532 * @ctxt: an XML parser context
533 *
534 * parse Reference declarations
535 *
536 * [66] CharRef ::= '&#' [0-9]+ ';' |
537 * '&#x' [0-9a-fA-F]+ ';'
538 *
539 * [ WFC: Legal Character ]
540 * Characters referred to using character references must match the
541 * production for Char.
542 *
543 * Returns the value parsed (as an int), 0 in case of error
544 */
545int
546xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000547 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000548 int count = 0;
549
Owen Taylor3473f882001-02-23 17:55:21 +0000550 /*
551 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
552 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000553 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000554 (NXT(2) == 'x')) {
555 SKIP(3);
556 GROW;
557 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000558 if (count++ > 20) {
559 count = 0;
560 GROW;
561 }
562 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000563 val = val * 16 + (CUR - '0');
564 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
565 val = val * 16 + (CUR - 'a') + 10;
566 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
567 val = val * 16 + (CUR - 'A') + 10;
568 else {
569 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
571 ctxt->sax->error(ctxt->userData,
572 "xmlParseCharRef: invalid hexadecimal value\n");
573 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000574 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000575 val = 0;
576 break;
577 }
578 NEXT;
579 count++;
580 }
581 if (RAW == ';') {
582 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000583 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000584 ctxt->nbChars ++;
585 ctxt->input->cur++;
586 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000587 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000588 SKIP(2);
589 GROW;
590 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000591 if (count++ > 20) {
592 count = 0;
593 GROW;
594 }
595 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000596 val = val * 10 + (CUR - '0');
597 else {
598 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
600 ctxt->sax->error(ctxt->userData,
601 "xmlParseCharRef: invalid decimal value\n");
602 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000603 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000604 val = 0;
605 break;
606 }
607 NEXT;
608 count++;
609 }
610 if (RAW == ';') {
611 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000612 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000613 ctxt->nbChars ++;
614 ctxt->input->cur++;
615 }
616 } else {
617 ctxt->errNo = XML_ERR_INVALID_CHARREF;
618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
619 ctxt->sax->error(ctxt->userData,
620 "xmlParseCharRef: invalid value\n");
621 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000622 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000623 }
624
625 /*
626 * [ WFC: Legal Character ]
627 * Characters referred to using character references must match the
628 * production for Char.
629 */
630 if (IS_CHAR(val)) {
631 return(val);
632 } else {
633 ctxt->errNo = XML_ERR_INVALID_CHAR;
634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000635 ctxt->sax->error(ctxt->userData,
636 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000637 val);
638 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000639 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000640 }
641 return(0);
642}
643
644/**
645 * xmlParseStringCharRef:
646 * @ctxt: an XML parser context
647 * @str: a pointer to an index in the string
648 *
649 * parse Reference declarations, variant parsing from a string rather
650 * than an an input flow.
651 *
652 * [66] CharRef ::= '&#' [0-9]+ ';' |
653 * '&#x' [0-9a-fA-F]+ ';'
654 *
655 * [ WFC: Legal Character ]
656 * Characters referred to using character references must match the
657 * production for Char.
658 *
659 * Returns the value parsed (as an int), 0 in case of error, str will be
660 * updated to the current value of the index
661 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000662static int
Owen Taylor3473f882001-02-23 17:55:21 +0000663xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
664 const xmlChar *ptr;
665 xmlChar cur;
666 int val = 0;
667
668 if ((str == NULL) || (*str == NULL)) return(0);
669 ptr = *str;
670 cur = *ptr;
671 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
672 ptr += 3;
673 cur = *ptr;
674 while (cur != ';') { /* Non input consuming loop */
675 if ((cur >= '0') && (cur <= '9'))
676 val = val * 16 + (cur - '0');
677 else if ((cur >= 'a') && (cur <= 'f'))
678 val = val * 16 + (cur - 'a') + 10;
679 else if ((cur >= 'A') && (cur <= 'F'))
680 val = val * 16 + (cur - 'A') + 10;
681 else {
682 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
684 ctxt->sax->error(ctxt->userData,
685 "xmlParseStringCharRef: invalid hexadecimal value\n");
686 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000687 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000688 val = 0;
689 break;
690 }
691 ptr++;
692 cur = *ptr;
693 }
694 if (cur == ';')
695 ptr++;
696 } else if ((cur == '&') && (ptr[1] == '#')){
697 ptr += 2;
698 cur = *ptr;
699 while (cur != ';') { /* Non input consuming loops */
700 if ((cur >= '0') && (cur <= '9'))
701 val = val * 10 + (cur - '0');
702 else {
703 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
705 ctxt->sax->error(ctxt->userData,
706 "xmlParseStringCharRef: invalid decimal value\n");
707 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000708 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000709 val = 0;
710 break;
711 }
712 ptr++;
713 cur = *ptr;
714 }
715 if (cur == ';')
716 ptr++;
717 } else {
718 ctxt->errNo = XML_ERR_INVALID_CHARREF;
719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
720 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000721 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000722 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000723 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000724 return(0);
725 }
726 *str = ptr;
727
728 /*
729 * [ WFC: Legal Character ]
730 * Characters referred to using character references must match the
731 * production for Char.
732 */
733 if (IS_CHAR(val)) {
734 return(val);
735 } else {
736 ctxt->errNo = XML_ERR_INVALID_CHAR;
737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
738 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000739 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000740 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000741 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000742 }
743 return(0);
744}
745
746/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000747 * xmlNewBlanksWrapperInputStream:
748 * @ctxt: an XML parser context
749 * @entity: an Entity pointer
750 *
751 * Create a new input stream for wrapping
752 * blanks around a PEReference
753 *
754 * Returns the new input stream or NULL
755 */
756
757static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
758
Daniel Veillardf4862f02002-09-10 11:13:43 +0000759static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000760xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
761 xmlParserInputPtr input;
762 xmlChar *buffer;
763 size_t length;
764 if (entity == NULL) {
765 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
767 ctxt->sax->error(ctxt->userData,
768 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
769 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
770 return(NULL);
771 }
772 if (xmlParserDebugEntities)
773 xmlGenericError(xmlGenericErrorContext,
774 "new blanks wrapper for entity: %s\n", entity->name);
775 input = xmlNewInputStream(ctxt);
776 if (input == NULL) {
777 return(NULL);
778 }
779 length = xmlStrlen(entity->name) + 5;
780 buffer = xmlMalloc(length);
781 if (buffer == NULL) {
782 return(NULL);
783 }
784 buffer [0] = ' ';
785 buffer [1] = '%';
786 buffer [length-3] = ';';
787 buffer [length-2] = ' ';
788 buffer [length-1] = 0;
789 memcpy(buffer + 2, entity->name, length - 5);
790 input->free = deallocblankswrapper;
791 input->base = buffer;
792 input->cur = buffer;
793 input->length = length;
794 input->end = &buffer[length];
795 return(input);
796}
797
798/**
Owen Taylor3473f882001-02-23 17:55:21 +0000799 * xmlParserHandlePEReference:
800 * @ctxt: the parser context
801 *
802 * [69] PEReference ::= '%' Name ';'
803 *
804 * [ WFC: No Recursion ]
805 * A parsed entity must not contain a recursive
806 * reference to itself, either directly or indirectly.
807 *
808 * [ WFC: Entity Declared ]
809 * In a document without any DTD, a document with only an internal DTD
810 * subset which contains no parameter entity references, or a document
811 * with "standalone='yes'", ... ... The declaration of a parameter
812 * entity must precede any reference to it...
813 *
814 * [ VC: Entity Declared ]
815 * In a document with an external subset or external parameter entities
816 * with "standalone='no'", ... ... The declaration of a parameter entity
817 * must precede any reference to it...
818 *
819 * [ WFC: In DTD ]
820 * Parameter-entity references may only appear in the DTD.
821 * NOTE: misleading but this is handled.
822 *
823 * A PEReference may have been detected in the current input stream
824 * the handling is done accordingly to
825 * http://www.w3.org/TR/REC-xml#entproc
826 * i.e.
827 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000828 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000829 */
830void
831xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
832 xmlChar *name;
833 xmlEntityPtr entity = NULL;
834 xmlParserInputPtr input;
835
Owen Taylor3473f882001-02-23 17:55:21 +0000836 if (RAW != '%') return;
837 switch(ctxt->instate) {
838 case XML_PARSER_CDATA_SECTION:
839 return;
840 case XML_PARSER_COMMENT:
841 return;
842 case XML_PARSER_START_TAG:
843 return;
844 case XML_PARSER_END_TAG:
845 return;
846 case XML_PARSER_EOF:
847 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
850 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000851 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000852 return;
853 case XML_PARSER_PROLOG:
854 case XML_PARSER_START:
855 case XML_PARSER_MISC:
856 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
858 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
859 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000860 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000861 return;
862 case XML_PARSER_ENTITY_DECL:
863 case XML_PARSER_CONTENT:
864 case XML_PARSER_ATTRIBUTE_VALUE:
865 case XML_PARSER_PI:
866 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000867 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000868 /* we just ignore it there */
869 return;
870 case XML_PARSER_EPILOG:
871 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
873 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
874 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000875 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000876 return;
877 case XML_PARSER_ENTITY_VALUE:
878 /*
879 * NOTE: in the case of entity values, we don't do the
880 * substitution here since we need the literal
881 * entity value to be able to save the internal
882 * subset of the document.
883 * This will be handled by xmlStringDecodeEntities
884 */
885 return;
886 case XML_PARSER_DTD:
887 /*
888 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
889 * In the internal DTD subset, parameter-entity references
890 * can occur only where markup declarations can occur, not
891 * within markup declarations.
892 * In that case this is handled in xmlParseMarkupDecl
893 */
894 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
895 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000896 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
897 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000898 break;
899 case XML_PARSER_IGNORE:
900 return;
901 }
902
903 NEXT;
904 name = xmlParseName(ctxt);
905 if (xmlParserDebugEntities)
906 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000907 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000908 if (name == NULL) {
909 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
910 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000911 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000912 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000913 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000914 } else {
915 if (RAW == ';') {
916 NEXT;
917 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
918 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
919 if (entity == NULL) {
920
921 /*
922 * [ WFC: Entity Declared ]
923 * In a document without any DTD, a document with only an
924 * internal DTD subset which contains no parameter entity
925 * references, or a document with "standalone='yes'", ...
926 * ... The declaration of a parameter entity must precede
927 * any reference to it...
928 */
929 if ((ctxt->standalone == 1) ||
930 ((ctxt->hasExternalSubset == 0) &&
931 (ctxt->hasPErefs == 0))) {
932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
933 ctxt->sax->error(ctxt->userData,
934 "PEReference: %%%s; not found\n", name);
935 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000936 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000937 } else {
938 /*
939 * [ VC: Entity Declared ]
940 * In a document with an external subset or external
941 * parameter entities with "standalone='no'", ...
942 * ... The declaration of a parameter entity must precede
943 * any reference to it...
944 */
945 if ((!ctxt->disableSAX) &&
946 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
947 ctxt->vctxt.error(ctxt->vctxt.userData,
948 "PEReference: %%%s; not found\n", name);
949 } else if ((!ctxt->disableSAX) &&
950 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
951 ctxt->sax->warning(ctxt->userData,
952 "PEReference: %%%s; not found\n", name);
953 ctxt->valid = 0;
954 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000955 } else if (ctxt->input->free != deallocblankswrapper) {
956 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
957 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000958 } else {
959 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
960 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000961 xmlChar start[4];
962 xmlCharEncoding enc;
963
Owen Taylor3473f882001-02-23 17:55:21 +0000964 /*
965 * handle the extra spaces added before and after
966 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000967 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000968 */
969 input = xmlNewEntityInputStream(ctxt, entity);
970 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000971
972 /*
973 * Get the 4 first bytes and decode the charset
974 * if enc != XML_CHAR_ENCODING_NONE
975 * plug some encoding conversion routines.
976 */
977 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000978 if (entity->length >= 4) {
979 start[0] = RAW;
980 start[1] = NXT(1);
981 start[2] = NXT(2);
982 start[3] = NXT(3);
983 enc = xmlDetectCharEncoding(start, 4);
984 if (enc != XML_CHAR_ENCODING_NONE) {
985 xmlSwitchEncoding(ctxt, enc);
986 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000987 }
988
Owen Taylor3473f882001-02-23 17:55:21 +0000989 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
990 (RAW == '<') && (NXT(1) == '?') &&
991 (NXT(2) == 'x') && (NXT(3) == 'm') &&
992 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
993 xmlParseTextDecl(ctxt);
994 }
Owen Taylor3473f882001-02-23 17:55:21 +0000995 } else {
996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
997 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000998 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000999 name);
1000 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001001 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001002 }
1003 }
1004 } else {
1005 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1007 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001008 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001011 }
1012 xmlFree(name);
1013 }
1014}
1015
1016/*
1017 * Macro used to grow the current buffer.
1018 */
1019#define growBuffer(buffer) { \
1020 buffer##_size *= 2; \
1021 buffer = (xmlChar *) \
1022 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1023 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001024 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001025 return(NULL); \
1026 } \
1027}
1028
1029/**
1030 * xmlStringDecodeEntities:
1031 * @ctxt: the parser context
1032 * @str: the input string
1033 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1034 * @end: an end marker xmlChar, 0 if none
1035 * @end2: an end marker xmlChar, 0 if none
1036 * @end3: an end marker xmlChar, 0 if none
1037 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001038 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001039 *
1040 * [67] Reference ::= EntityRef | CharRef
1041 *
1042 * [69] PEReference ::= '%' Name ';'
1043 *
1044 * Returns A newly allocated string with the substitution done. The caller
1045 * must deallocate it !
1046 */
1047xmlChar *
1048xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1049 xmlChar end, xmlChar end2, xmlChar end3) {
1050 xmlChar *buffer = NULL;
1051 int buffer_size = 0;
1052
1053 xmlChar *current = NULL;
1054 xmlEntityPtr ent;
1055 int c,l;
1056 int nbchars = 0;
1057
1058 if (str == NULL)
1059 return(NULL);
1060
1061 if (ctxt->depth > 40) {
1062 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1063 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1064 ctxt->sax->error(ctxt->userData,
1065 "Detected entity reference loop\n");
1066 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001067 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001068 return(NULL);
1069 }
1070
1071 /*
1072 * allocate a translation buffer.
1073 */
1074 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1075 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1076 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001077 xmlGenericError(xmlGenericErrorContext,
1078 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001079 return(NULL);
1080 }
1081
1082 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001083 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001084 * we are operating on already parsed values.
1085 */
1086 c = CUR_SCHAR(str, l);
1087 while ((c != 0) && (c != end) && /* non input consuming loop */
1088 (c != end2) && (c != end3)) {
1089
1090 if (c == 0) break;
1091 if ((c == '&') && (str[1] == '#')) {
1092 int val = xmlParseStringCharRef(ctxt, &str);
1093 if (val != 0) {
1094 COPY_BUF(0,buffer,nbchars,val);
1095 }
1096 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1097 if (xmlParserDebugEntities)
1098 xmlGenericError(xmlGenericErrorContext,
1099 "String decoding Entity Reference: %.30s\n",
1100 str);
1101 ent = xmlParseStringEntityRef(ctxt, &str);
1102 if ((ent != NULL) &&
1103 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1104 if (ent->content != NULL) {
1105 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1106 } else {
1107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1108 ctxt->sax->error(ctxt->userData,
1109 "internal error entity has no content\n");
1110 }
1111 } else if ((ent != NULL) && (ent->content != NULL)) {
1112 xmlChar *rep;
1113
1114 ctxt->depth++;
1115 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1116 0, 0, 0);
1117 ctxt->depth--;
1118 if (rep != NULL) {
1119 current = rep;
1120 while (*current != 0) { /* non input consuming loop */
1121 buffer[nbchars++] = *current++;
1122 if (nbchars >
1123 buffer_size - XML_PARSER_BUFFER_SIZE) {
1124 growBuffer(buffer);
1125 }
1126 }
1127 xmlFree(rep);
1128 }
1129 } else if (ent != NULL) {
1130 int i = xmlStrlen(ent->name);
1131 const xmlChar *cur = ent->name;
1132
1133 buffer[nbchars++] = '&';
1134 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1135 growBuffer(buffer);
1136 }
1137 for (;i > 0;i--)
1138 buffer[nbchars++] = *cur++;
1139 buffer[nbchars++] = ';';
1140 }
1141 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1142 if (xmlParserDebugEntities)
1143 xmlGenericError(xmlGenericErrorContext,
1144 "String decoding PE Reference: %.30s\n", str);
1145 ent = xmlParseStringPEReference(ctxt, &str);
1146 if (ent != NULL) {
1147 xmlChar *rep;
1148
1149 ctxt->depth++;
1150 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1151 0, 0, 0);
1152 ctxt->depth--;
1153 if (rep != NULL) {
1154 current = rep;
1155 while (*current != 0) { /* non input consuming loop */
1156 buffer[nbchars++] = *current++;
1157 if (nbchars >
1158 buffer_size - XML_PARSER_BUFFER_SIZE) {
1159 growBuffer(buffer);
1160 }
1161 }
1162 xmlFree(rep);
1163 }
1164 }
1165 } else {
1166 COPY_BUF(l,buffer,nbchars,c);
1167 str += l;
1168 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1169 growBuffer(buffer);
1170 }
1171 }
1172 c = CUR_SCHAR(str, l);
1173 }
1174 buffer[nbchars++] = 0;
1175 return(buffer);
1176}
1177
1178
1179/************************************************************************
1180 * *
1181 * Commodity functions to handle xmlChars *
1182 * *
1183 ************************************************************************/
1184
1185/**
1186 * xmlStrndup:
1187 * @cur: the input xmlChar *
1188 * @len: the len of @cur
1189 *
1190 * a strndup for array of xmlChar's
1191 *
1192 * Returns a new xmlChar * or NULL
1193 */
1194xmlChar *
1195xmlStrndup(const xmlChar *cur, int len) {
1196 xmlChar *ret;
1197
1198 if ((cur == NULL) || (len < 0)) return(NULL);
1199 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1200 if (ret == NULL) {
1201 xmlGenericError(xmlGenericErrorContext,
1202 "malloc of %ld byte failed\n",
1203 (len + 1) * (long)sizeof(xmlChar));
1204 return(NULL);
1205 }
1206 memcpy(ret, cur, len * sizeof(xmlChar));
1207 ret[len] = 0;
1208 return(ret);
1209}
1210
1211/**
1212 * xmlStrdup:
1213 * @cur: the input xmlChar *
1214 *
1215 * a strdup for array of xmlChar's. Since they are supposed to be
1216 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1217 * a termination mark of '0'.
1218 *
1219 * Returns a new xmlChar * or NULL
1220 */
1221xmlChar *
1222xmlStrdup(const xmlChar *cur) {
1223 const xmlChar *p = cur;
1224
1225 if (cur == NULL) return(NULL);
1226 while (*p != 0) p++; /* non input consuming */
1227 return(xmlStrndup(cur, p - cur));
1228}
1229
1230/**
1231 * xmlCharStrndup:
1232 * @cur: the input char *
1233 * @len: the len of @cur
1234 *
1235 * a strndup for char's to xmlChar's
1236 *
1237 * Returns a new xmlChar * or NULL
1238 */
1239
1240xmlChar *
1241xmlCharStrndup(const char *cur, int len) {
1242 int i;
1243 xmlChar *ret;
1244
1245 if ((cur == NULL) || (len < 0)) return(NULL);
1246 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1247 if (ret == NULL) {
1248 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1249 (len + 1) * (long)sizeof(xmlChar));
1250 return(NULL);
1251 }
1252 for (i = 0;i < len;i++)
1253 ret[i] = (xmlChar) cur[i];
1254 ret[len] = 0;
1255 return(ret);
1256}
1257
1258/**
1259 * xmlCharStrdup:
1260 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001261 *
1262 * a strdup for char's to xmlChar's
1263 *
1264 * Returns a new xmlChar * or NULL
1265 */
1266
1267xmlChar *
1268xmlCharStrdup(const char *cur) {
1269 const char *p = cur;
1270
1271 if (cur == NULL) return(NULL);
1272 while (*p != '\0') p++; /* non input consuming */
1273 return(xmlCharStrndup(cur, p - cur));
1274}
1275
1276/**
1277 * xmlStrcmp:
1278 * @str1: the first xmlChar *
1279 * @str2: the second xmlChar *
1280 *
1281 * a strcmp for xmlChar's
1282 *
1283 * Returns the integer result of the comparison
1284 */
1285
1286int
1287xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1288 register int tmp;
1289
1290 if (str1 == str2) return(0);
1291 if (str1 == NULL) return(-1);
1292 if (str2 == NULL) return(1);
1293 do {
1294 tmp = *str1++ - *str2;
1295 if (tmp != 0) return(tmp);
1296 } while (*str2++ != 0);
1297 return 0;
1298}
1299
1300/**
1301 * xmlStrEqual:
1302 * @str1: the first xmlChar *
1303 * @str2: the second xmlChar *
1304 *
1305 * Check if both string are equal of have same content
1306 * Should be a bit more readable and faster than xmlStrEqual()
1307 *
1308 * Returns 1 if they are equal, 0 if they are different
1309 */
1310
1311int
1312xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1313 if (str1 == str2) return(1);
1314 if (str1 == NULL) return(0);
1315 if (str2 == NULL) return(0);
1316 do {
1317 if (*str1++ != *str2) return(0);
1318 } while (*str2++);
1319 return(1);
1320}
1321
1322/**
1323 * xmlStrncmp:
1324 * @str1: the first xmlChar *
1325 * @str2: the second xmlChar *
1326 * @len: the max comparison length
1327 *
1328 * a strncmp for xmlChar's
1329 *
1330 * Returns the integer result of the comparison
1331 */
1332
1333int
1334xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1335 register int tmp;
1336
1337 if (len <= 0) return(0);
1338 if (str1 == str2) return(0);
1339 if (str1 == NULL) return(-1);
1340 if (str2 == NULL) return(1);
1341 do {
1342 tmp = *str1++ - *str2;
1343 if (tmp != 0 || --len == 0) return(tmp);
1344 } while (*str2++ != 0);
1345 return 0;
1346}
1347
Daniel Veillardb44025c2001-10-11 22:55:55 +00001348static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001349 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1350 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1351 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1352 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1353 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1354 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1355 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1356 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1357 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1358 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1359 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1360 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1361 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1362 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1363 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1364 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1365 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1366 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1367 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1368 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1369 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1370 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1371 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1372 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1373 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1374 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1375 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1376 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1377 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1378 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1379 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1380 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1381};
1382
1383/**
1384 * xmlStrcasecmp:
1385 * @str1: the first xmlChar *
1386 * @str2: the second xmlChar *
1387 *
1388 * a strcasecmp for xmlChar's
1389 *
1390 * Returns the integer result of the comparison
1391 */
1392
1393int
1394xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1395 register int tmp;
1396
1397 if (str1 == str2) return(0);
1398 if (str1 == NULL) return(-1);
1399 if (str2 == NULL) return(1);
1400 do {
1401 tmp = casemap[*str1++] - casemap[*str2];
1402 if (tmp != 0) return(tmp);
1403 } while (*str2++ != 0);
1404 return 0;
1405}
1406
1407/**
1408 * xmlStrncasecmp:
1409 * @str1: the first xmlChar *
1410 * @str2: the second xmlChar *
1411 * @len: the max comparison length
1412 *
1413 * a strncasecmp for xmlChar's
1414 *
1415 * Returns the integer result of the comparison
1416 */
1417
1418int
1419xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1420 register int tmp;
1421
1422 if (len <= 0) return(0);
1423 if (str1 == str2) return(0);
1424 if (str1 == NULL) return(-1);
1425 if (str2 == NULL) return(1);
1426 do {
1427 tmp = casemap[*str1++] - casemap[*str2];
1428 if (tmp != 0 || --len == 0) return(tmp);
1429 } while (*str2++ != 0);
1430 return 0;
1431}
1432
1433/**
1434 * xmlStrchr:
1435 * @str: the xmlChar * array
1436 * @val: the xmlChar to search
1437 *
1438 * a strchr for xmlChar's
1439 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001440 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001441 */
1442
1443const xmlChar *
1444xmlStrchr(const xmlChar *str, xmlChar val) {
1445 if (str == NULL) return(NULL);
1446 while (*str != 0) { /* non input consuming */
1447 if (*str == val) return((xmlChar *) str);
1448 str++;
1449 }
1450 return(NULL);
1451}
1452
1453/**
1454 * xmlStrstr:
1455 * @str: the xmlChar * array (haystack)
1456 * @val: the xmlChar to search (needle)
1457 *
1458 * a strstr for xmlChar's
1459 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001460 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001461 */
1462
1463const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001464xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001465 int n;
1466
1467 if (str == NULL) return(NULL);
1468 if (val == NULL) return(NULL);
1469 n = xmlStrlen(val);
1470
1471 if (n == 0) return(str);
1472 while (*str != 0) { /* non input consuming */
1473 if (*str == *val) {
1474 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1475 }
1476 str++;
1477 }
1478 return(NULL);
1479}
1480
1481/**
1482 * xmlStrcasestr:
1483 * @str: the xmlChar * array (haystack)
1484 * @val: the xmlChar to search (needle)
1485 *
1486 * a case-ignoring strstr for xmlChar's
1487 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001488 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001489 */
1490
1491const xmlChar *
1492xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1493 int n;
1494
1495 if (str == NULL) return(NULL);
1496 if (val == NULL) return(NULL);
1497 n = xmlStrlen(val);
1498
1499 if (n == 0) return(str);
1500 while (*str != 0) { /* non input consuming */
1501 if (casemap[*str] == casemap[*val])
1502 if (!xmlStrncasecmp(str, val, n)) return(str);
1503 str++;
1504 }
1505 return(NULL);
1506}
1507
1508/**
1509 * xmlStrsub:
1510 * @str: the xmlChar * array (haystack)
1511 * @start: the index of the first char (zero based)
1512 * @len: the length of the substring
1513 *
1514 * Extract a substring of a given string
1515 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001516 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001517 */
1518
1519xmlChar *
1520xmlStrsub(const xmlChar *str, int start, int len) {
1521 int i;
1522
1523 if (str == NULL) return(NULL);
1524 if (start < 0) return(NULL);
1525 if (len < 0) return(NULL);
1526
1527 for (i = 0;i < start;i++) {
1528 if (*str == 0) return(NULL);
1529 str++;
1530 }
1531 if (*str == 0) return(NULL);
1532 return(xmlStrndup(str, len));
1533}
1534
1535/**
1536 * xmlStrlen:
1537 * @str: the xmlChar * array
1538 *
1539 * length of a xmlChar's string
1540 *
1541 * Returns the number of xmlChar contained in the ARRAY.
1542 */
1543
1544int
1545xmlStrlen(const xmlChar *str) {
1546 int len = 0;
1547
1548 if (str == NULL) return(0);
1549 while (*str != 0) { /* non input consuming */
1550 str++;
1551 len++;
1552 }
1553 return(len);
1554}
1555
1556/**
1557 * xmlStrncat:
1558 * @cur: the original xmlChar * array
1559 * @add: the xmlChar * array added
1560 * @len: the length of @add
1561 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001562 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001563 * first bytes of @add.
1564 *
1565 * Returns a new xmlChar *, the original @cur is reallocated if needed
1566 * and should not be freed
1567 */
1568
1569xmlChar *
1570xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1571 int size;
1572 xmlChar *ret;
1573
1574 if ((add == NULL) || (len == 0))
1575 return(cur);
1576 if (cur == NULL)
1577 return(xmlStrndup(add, len));
1578
1579 size = xmlStrlen(cur);
1580 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1581 if (ret == NULL) {
1582 xmlGenericError(xmlGenericErrorContext,
1583 "xmlStrncat: realloc of %ld byte failed\n",
1584 (size + len + 1) * (long)sizeof(xmlChar));
1585 return(cur);
1586 }
1587 memcpy(&ret[size], add, len * sizeof(xmlChar));
1588 ret[size + len] = 0;
1589 return(ret);
1590}
1591
1592/**
1593 * xmlStrcat:
1594 * @cur: the original xmlChar * array
1595 * @add: the xmlChar * array added
1596 *
1597 * a strcat for array of xmlChar's. Since they are supposed to be
1598 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1599 * a termination mark of '0'.
1600 *
1601 * Returns a new xmlChar * containing the concatenated string.
1602 */
1603xmlChar *
1604xmlStrcat(xmlChar *cur, const xmlChar *add) {
1605 const xmlChar *p = add;
1606
1607 if (add == NULL) return(cur);
1608 if (cur == NULL)
1609 return(xmlStrdup(add));
1610
1611 while (*p != 0) p++; /* non input consuming */
1612 return(xmlStrncat(cur, add, p - add));
1613}
1614
1615/************************************************************************
1616 * *
1617 * Commodity functions, cleanup needed ? *
1618 * *
1619 ************************************************************************/
1620
1621/**
1622 * areBlanks:
1623 * @ctxt: an XML parser context
1624 * @str: a xmlChar *
1625 * @len: the size of @str
1626 *
1627 * Is this a sequence of blank chars that one can ignore ?
1628 *
1629 * Returns 1 if ignorable 0 otherwise.
1630 */
1631
1632static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1633 int i, ret;
1634 xmlNodePtr lastChild;
1635
Daniel Veillard05c13a22001-09-09 08:38:09 +00001636 /*
1637 * Don't spend time trying to differentiate them, the same callback is
1638 * used !
1639 */
1640 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001641 return(0);
1642
Owen Taylor3473f882001-02-23 17:55:21 +00001643 /*
1644 * Check for xml:space value.
1645 */
1646 if (*(ctxt->space) == 1)
1647 return(0);
1648
1649 /*
1650 * Check that the string is made of blanks
1651 */
1652 for (i = 0;i < len;i++)
1653 if (!(IS_BLANK(str[i]))) return(0);
1654
1655 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001656 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001657 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001658 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001659 if (ctxt->myDoc != NULL) {
1660 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1661 if (ret == 0) return(1);
1662 if (ret == 1) return(0);
1663 }
1664
1665 /*
1666 * Otherwise, heuristic :-\
1667 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001668 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001669 if ((ctxt->node->children == NULL) &&
1670 (RAW == '<') && (NXT(1) == '/')) return(0);
1671
1672 lastChild = xmlGetLastChild(ctxt->node);
1673 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001674 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1675 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001676 } else if (xmlNodeIsText(lastChild))
1677 return(0);
1678 else if ((ctxt->node->children != NULL) &&
1679 (xmlNodeIsText(ctxt->node->children)))
1680 return(0);
1681 return(1);
1682}
1683
Owen Taylor3473f882001-02-23 17:55:21 +00001684/************************************************************************
1685 * *
1686 * Extra stuff for namespace support *
1687 * Relates to http://www.w3.org/TR/WD-xml-names *
1688 * *
1689 ************************************************************************/
1690
1691/**
1692 * xmlSplitQName:
1693 * @ctxt: an XML parser context
1694 * @name: an XML parser context
1695 * @prefix: a xmlChar **
1696 *
1697 * parse an UTF8 encoded XML qualified name string
1698 *
1699 * [NS 5] QName ::= (Prefix ':')? LocalPart
1700 *
1701 * [NS 6] Prefix ::= NCName
1702 *
1703 * [NS 7] LocalPart ::= NCName
1704 *
1705 * Returns the local part, and prefix is updated
1706 * to get the Prefix if any.
1707 */
1708
1709xmlChar *
1710xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1711 xmlChar buf[XML_MAX_NAMELEN + 5];
1712 xmlChar *buffer = NULL;
1713 int len = 0;
1714 int max = XML_MAX_NAMELEN;
1715 xmlChar *ret = NULL;
1716 const xmlChar *cur = name;
1717 int c;
1718
1719 *prefix = NULL;
1720
1721#ifndef XML_XML_NAMESPACE
1722 /* xml: prefix is not really a namespace */
1723 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1724 (cur[2] == 'l') && (cur[3] == ':'))
1725 return(xmlStrdup(name));
1726#endif
1727
1728 /* nasty but valid */
1729 if (cur[0] == ':')
1730 return(xmlStrdup(name));
1731
1732 c = *cur++;
1733 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1734 buf[len++] = c;
1735 c = *cur++;
1736 }
1737 if (len >= max) {
1738 /*
1739 * Okay someone managed to make a huge name, so he's ready to pay
1740 * for the processing speed.
1741 */
1742 max = len * 2;
1743
1744 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1745 if (buffer == NULL) {
1746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1747 ctxt->sax->error(ctxt->userData,
1748 "xmlSplitQName: out of memory\n");
1749 return(NULL);
1750 }
1751 memcpy(buffer, buf, len);
1752 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1753 if (len + 10 > max) {
1754 max *= 2;
1755 buffer = (xmlChar *) xmlRealloc(buffer,
1756 max * sizeof(xmlChar));
1757 if (buffer == NULL) {
1758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1759 ctxt->sax->error(ctxt->userData,
1760 "xmlSplitQName: out of memory\n");
1761 return(NULL);
1762 }
1763 }
1764 buffer[len++] = c;
1765 c = *cur++;
1766 }
1767 buffer[len] = 0;
1768 }
1769
1770 if (buffer == NULL)
1771 ret = xmlStrndup(buf, len);
1772 else {
1773 ret = buffer;
1774 buffer = NULL;
1775 max = XML_MAX_NAMELEN;
1776 }
1777
1778
1779 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001780 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001781 if (c == 0) return(ret);
1782 *prefix = ret;
1783 len = 0;
1784
Daniel Veillardbb284f42002-10-16 18:02:47 +00001785 /*
1786 * Check that the first character is proper to start
1787 * a new name
1788 */
1789 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1790 ((c >= 0x41) && (c <= 0x5A)) ||
1791 (c == '_') || (c == ':'))) {
1792 int l;
1793 int first = CUR_SCHAR(cur, l);
1794
1795 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001796 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1797 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001798 ctxt->sax->error(ctxt->userData,
1799 "Name %s is not XML Namespace compliant\n",
1800 name);
1801 }
1802 }
1803 cur++;
1804
Owen Taylor3473f882001-02-23 17:55:21 +00001805 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1806 buf[len++] = c;
1807 c = *cur++;
1808 }
1809 if (len >= max) {
1810 /*
1811 * Okay someone managed to make a huge name, so he's ready to pay
1812 * for the processing speed.
1813 */
1814 max = len * 2;
1815
1816 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1817 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001818 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1819 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001820 ctxt->sax->error(ctxt->userData,
1821 "xmlSplitQName: out of memory\n");
1822 return(NULL);
1823 }
1824 memcpy(buffer, buf, len);
1825 while (c != 0) { /* tested bigname2.xml */
1826 if (len + 10 > max) {
1827 max *= 2;
1828 buffer = (xmlChar *) xmlRealloc(buffer,
1829 max * sizeof(xmlChar));
1830 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001831 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1832 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001833 ctxt->sax->error(ctxt->userData,
1834 "xmlSplitQName: out of memory\n");
1835 return(NULL);
1836 }
1837 }
1838 buffer[len++] = c;
1839 c = *cur++;
1840 }
1841 buffer[len] = 0;
1842 }
1843
1844 if (buffer == NULL)
1845 ret = xmlStrndup(buf, len);
1846 else {
1847 ret = buffer;
1848 }
1849 }
1850
1851 return(ret);
1852}
1853
1854/************************************************************************
1855 * *
1856 * The parser itself *
1857 * Relates to http://www.w3.org/TR/REC-xml *
1858 * *
1859 ************************************************************************/
1860
Daniel Veillard76d66f42001-05-16 21:05:17 +00001861static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001862/**
1863 * xmlParseName:
1864 * @ctxt: an XML parser context
1865 *
1866 * parse an XML name.
1867 *
1868 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1869 * CombiningChar | Extender
1870 *
1871 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1872 *
1873 * [6] Names ::= Name (S Name)*
1874 *
1875 * Returns the Name parsed or NULL
1876 */
1877
1878xmlChar *
1879xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001880 const xmlChar *in;
1881 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001882 int count = 0;
1883
1884 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001885
1886 /*
1887 * Accelerator for simple ASCII names
1888 */
1889 in = ctxt->input->cur;
1890 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1891 ((*in >= 0x41) && (*in <= 0x5A)) ||
1892 (*in == '_') || (*in == ':')) {
1893 in++;
1894 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1895 ((*in >= 0x41) && (*in <= 0x5A)) ||
1896 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001897 (*in == '_') || (*in == '-') ||
1898 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001899 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001900 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001901 count = in - ctxt->input->cur;
1902 ret = xmlStrndup(ctxt->input->cur, count);
1903 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001904 ctxt->nbChars += count;
1905 ctxt->input->col += count;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001906 return(ret);
1907 }
1908 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001909 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001910}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001911
Daniel Veillard46de64e2002-05-29 08:21:33 +00001912/**
1913 * xmlParseNameAndCompare:
1914 * @ctxt: an XML parser context
1915 *
1916 * parse an XML name and compares for match
1917 * (specialized for endtag parsing)
1918 *
1919 *
1920 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1921 * and the name for mismatch
1922 */
1923
Daniel Veillardf4862f02002-09-10 11:13:43 +00001924static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001925xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1926 const xmlChar *cmp = other;
1927 const xmlChar *in;
1928 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001929
1930 GROW;
1931
1932 in = ctxt->input->cur;
1933 while (*in != 0 && *in == *cmp) {
1934 ++in;
1935 ++cmp;
1936 }
1937 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1938 /* success */
1939 ctxt->input->cur = in;
1940 return (xmlChar*) 1;
1941 }
1942 /* failure (or end of input buffer), check with full function */
1943 ret = xmlParseName (ctxt);
1944 if (ret != 0 && xmlStrEqual (ret, other)) {
1945 xmlFree (ret);
1946 return (xmlChar*) 1;
1947 }
1948 return ret;
1949}
1950
Daniel Veillard76d66f42001-05-16 21:05:17 +00001951static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001952xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1953 xmlChar buf[XML_MAX_NAMELEN + 5];
1954 int len = 0, l;
1955 int c;
1956 int count = 0;
1957
1958 /*
1959 * Handler for more complex cases
1960 */
1961 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001962 c = CUR_CHAR(l);
1963 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1964 (!IS_LETTER(c) && (c != '_') &&
1965 (c != ':'))) {
1966 return(NULL);
1967 }
1968
1969 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1970 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1971 (c == '.') || (c == '-') ||
1972 (c == '_') || (c == ':') ||
1973 (IS_COMBINING(c)) ||
1974 (IS_EXTENDER(c)))) {
1975 if (count++ > 100) {
1976 count = 0;
1977 GROW;
1978 }
1979 COPY_BUF(l,buf,len,c);
1980 NEXTL(l);
1981 c = CUR_CHAR(l);
1982 if (len >= XML_MAX_NAMELEN) {
1983 /*
1984 * Okay someone managed to make a huge name, so he's ready to pay
1985 * for the processing speed.
1986 */
1987 xmlChar *buffer;
1988 int max = len * 2;
1989
1990 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1991 if (buffer == NULL) {
1992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1993 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001994 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001995 return(NULL);
1996 }
1997 memcpy(buffer, buf, len);
1998 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1999 (c == '.') || (c == '-') ||
2000 (c == '_') || (c == ':') ||
2001 (IS_COMBINING(c)) ||
2002 (IS_EXTENDER(c))) {
2003 if (count++ > 100) {
2004 count = 0;
2005 GROW;
2006 }
2007 if (len + 10 > max) {
2008 max *= 2;
2009 buffer = (xmlChar *) xmlRealloc(buffer,
2010 max * sizeof(xmlChar));
2011 if (buffer == NULL) {
2012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2013 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00002014 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002015 return(NULL);
2016 }
2017 }
2018 COPY_BUF(l,buffer,len,c);
2019 NEXTL(l);
2020 c = CUR_CHAR(l);
2021 }
2022 buffer[len] = 0;
2023 return(buffer);
2024 }
2025 }
2026 return(xmlStrndup(buf, len));
2027}
2028
2029/**
2030 * xmlParseStringName:
2031 * @ctxt: an XML parser context
2032 * @str: a pointer to the string pointer (IN/OUT)
2033 *
2034 * parse an XML name.
2035 *
2036 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2037 * CombiningChar | Extender
2038 *
2039 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2040 *
2041 * [6] Names ::= Name (S Name)*
2042 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002043 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002044 * is updated to the current location in the string.
2045 */
2046
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002047static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002048xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2049 xmlChar buf[XML_MAX_NAMELEN + 5];
2050 const xmlChar *cur = *str;
2051 int len = 0, l;
2052 int c;
2053
2054 c = CUR_SCHAR(cur, l);
2055 if (!IS_LETTER(c) && (c != '_') &&
2056 (c != ':')) {
2057 return(NULL);
2058 }
2059
2060 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2061 (c == '.') || (c == '-') ||
2062 (c == '_') || (c == ':') ||
2063 (IS_COMBINING(c)) ||
2064 (IS_EXTENDER(c))) {
2065 COPY_BUF(l,buf,len,c);
2066 cur += l;
2067 c = CUR_SCHAR(cur, l);
2068 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2069 /*
2070 * Okay someone managed to make a huge name, so he's ready to pay
2071 * for the processing speed.
2072 */
2073 xmlChar *buffer;
2074 int max = len * 2;
2075
2076 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2077 if (buffer == NULL) {
2078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2079 ctxt->sax->error(ctxt->userData,
2080 "xmlParseStringName: out of memory\n");
2081 return(NULL);
2082 }
2083 memcpy(buffer, buf, len);
2084 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2085 (c == '.') || (c == '-') ||
2086 (c == '_') || (c == ':') ||
2087 (IS_COMBINING(c)) ||
2088 (IS_EXTENDER(c))) {
2089 if (len + 10 > max) {
2090 max *= 2;
2091 buffer = (xmlChar *) xmlRealloc(buffer,
2092 max * sizeof(xmlChar));
2093 if (buffer == NULL) {
2094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2095 ctxt->sax->error(ctxt->userData,
2096 "xmlParseStringName: out of memory\n");
2097 return(NULL);
2098 }
2099 }
2100 COPY_BUF(l,buffer,len,c);
2101 cur += l;
2102 c = CUR_SCHAR(cur, l);
2103 }
2104 buffer[len] = 0;
2105 *str = cur;
2106 return(buffer);
2107 }
2108 }
2109 *str = cur;
2110 return(xmlStrndup(buf, len));
2111}
2112
2113/**
2114 * xmlParseNmtoken:
2115 * @ctxt: an XML parser context
2116 *
2117 * parse an XML Nmtoken.
2118 *
2119 * [7] Nmtoken ::= (NameChar)+
2120 *
2121 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2122 *
2123 * Returns the Nmtoken parsed or NULL
2124 */
2125
2126xmlChar *
2127xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2128 xmlChar buf[XML_MAX_NAMELEN + 5];
2129 int len = 0, l;
2130 int c;
2131 int count = 0;
2132
2133 GROW;
2134 c = CUR_CHAR(l);
2135
2136 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2137 (c == '.') || (c == '-') ||
2138 (c == '_') || (c == ':') ||
2139 (IS_COMBINING(c)) ||
2140 (IS_EXTENDER(c))) {
2141 if (count++ > 100) {
2142 count = 0;
2143 GROW;
2144 }
2145 COPY_BUF(l,buf,len,c);
2146 NEXTL(l);
2147 c = CUR_CHAR(l);
2148 if (len >= XML_MAX_NAMELEN) {
2149 /*
2150 * Okay someone managed to make a huge token, so he's ready to pay
2151 * for the processing speed.
2152 */
2153 xmlChar *buffer;
2154 int max = len * 2;
2155
2156 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2157 if (buffer == NULL) {
2158 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2159 ctxt->sax->error(ctxt->userData,
2160 "xmlParseNmtoken: out of memory\n");
2161 return(NULL);
2162 }
2163 memcpy(buffer, buf, len);
2164 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2165 (c == '.') || (c == '-') ||
2166 (c == '_') || (c == ':') ||
2167 (IS_COMBINING(c)) ||
2168 (IS_EXTENDER(c))) {
2169 if (count++ > 100) {
2170 count = 0;
2171 GROW;
2172 }
2173 if (len + 10 > max) {
2174 max *= 2;
2175 buffer = (xmlChar *) xmlRealloc(buffer,
2176 max * sizeof(xmlChar));
2177 if (buffer == NULL) {
2178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2179 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002180 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002181 return(NULL);
2182 }
2183 }
2184 COPY_BUF(l,buffer,len,c);
2185 NEXTL(l);
2186 c = CUR_CHAR(l);
2187 }
2188 buffer[len] = 0;
2189 return(buffer);
2190 }
2191 }
2192 if (len == 0)
2193 return(NULL);
2194 return(xmlStrndup(buf, len));
2195}
2196
2197/**
2198 * xmlParseEntityValue:
2199 * @ctxt: an XML parser context
2200 * @orig: if non-NULL store a copy of the original entity value
2201 *
2202 * parse a value for ENTITY declarations
2203 *
2204 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2205 * "'" ([^%&'] | PEReference | Reference)* "'"
2206 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002207 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002208 */
2209
2210xmlChar *
2211xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2212 xmlChar *buf = NULL;
2213 int len = 0;
2214 int size = XML_PARSER_BUFFER_SIZE;
2215 int c, l;
2216 xmlChar stop;
2217 xmlChar *ret = NULL;
2218 const xmlChar *cur = NULL;
2219 xmlParserInputPtr input;
2220
2221 if (RAW == '"') stop = '"';
2222 else if (RAW == '\'') stop = '\'';
2223 else {
2224 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2226 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2227 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002228 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002229 return(NULL);
2230 }
2231 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2232 if (buf == NULL) {
2233 xmlGenericError(xmlGenericErrorContext,
2234 "malloc of %d byte failed\n", size);
2235 return(NULL);
2236 }
2237
2238 /*
2239 * The content of the entity definition is copied in a buffer.
2240 */
2241
2242 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2243 input = ctxt->input;
2244 GROW;
2245 NEXT;
2246 c = CUR_CHAR(l);
2247 /*
2248 * NOTE: 4.4.5 Included in Literal
2249 * When a parameter entity reference appears in a literal entity
2250 * value, ... a single or double quote character in the replacement
2251 * text is always treated as a normal data character and will not
2252 * terminate the literal.
2253 * In practice it means we stop the loop only when back at parsing
2254 * the initial entity and the quote is found
2255 */
2256 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2257 (ctxt->input != input))) {
2258 if (len + 5 >= size) {
2259 size *= 2;
2260 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2261 if (buf == NULL) {
2262 xmlGenericError(xmlGenericErrorContext,
2263 "realloc of %d byte failed\n", size);
2264 return(NULL);
2265 }
2266 }
2267 COPY_BUF(l,buf,len,c);
2268 NEXTL(l);
2269 /*
2270 * Pop-up of finished entities.
2271 */
2272 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2273 xmlPopInput(ctxt);
2274
2275 GROW;
2276 c = CUR_CHAR(l);
2277 if (c == 0) {
2278 GROW;
2279 c = CUR_CHAR(l);
2280 }
2281 }
2282 buf[len] = 0;
2283
2284 /*
2285 * Raise problem w.r.t. '&' and '%' being used in non-entities
2286 * reference constructs. Note Charref will be handled in
2287 * xmlStringDecodeEntities()
2288 */
2289 cur = buf;
2290 while (*cur != 0) { /* non input consuming */
2291 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2292 xmlChar *name;
2293 xmlChar tmp = *cur;
2294
2295 cur++;
2296 name = xmlParseStringName(ctxt, &cur);
2297 if ((name == NULL) || (*cur != ';')) {
2298 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2300 ctxt->sax->error(ctxt->userData,
2301 "EntityValue: '%c' forbidden except for entities references\n",
2302 tmp);
2303 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002305 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002306 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2307 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002308 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2310 ctxt->sax->error(ctxt->userData,
2311 "EntityValue: PEReferences forbidden in internal subset\n",
2312 tmp);
2313 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002314 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002315 }
2316 if (name != NULL)
2317 xmlFree(name);
2318 }
2319 cur++;
2320 }
2321
2322 /*
2323 * Then PEReference entities are substituted.
2324 */
2325 if (c != stop) {
2326 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2328 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2329 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002330 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002331 xmlFree(buf);
2332 } else {
2333 NEXT;
2334 /*
2335 * NOTE: 4.4.7 Bypassed
2336 * When a general entity reference appears in the EntityValue in
2337 * an entity declaration, it is bypassed and left as is.
2338 * so XML_SUBSTITUTE_REF is not set here.
2339 */
2340 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2341 0, 0, 0);
2342 if (orig != NULL)
2343 *orig = buf;
2344 else
2345 xmlFree(buf);
2346 }
2347
2348 return(ret);
2349}
2350
2351/**
2352 * xmlParseAttValue:
2353 * @ctxt: an XML parser context
2354 *
2355 * parse a value for an attribute
2356 * Note: the parser won't do substitution of entities here, this
2357 * will be handled later in xmlStringGetNodeList
2358 *
2359 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2360 * "'" ([^<&'] | Reference)* "'"
2361 *
2362 * 3.3.3 Attribute-Value Normalization:
2363 * Before the value of an attribute is passed to the application or
2364 * checked for validity, the XML processor must normalize it as follows:
2365 * - a character reference is processed by appending the referenced
2366 * character to the attribute value
2367 * - an entity reference is processed by recursively processing the
2368 * replacement text of the entity
2369 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2370 * appending #x20 to the normalized value, except that only a single
2371 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2372 * parsed entity or the literal entity value of an internal parsed entity
2373 * - other characters are processed by appending them to the normalized value
2374 * If the declared value is not CDATA, then the XML processor must further
2375 * process the normalized attribute value by discarding any leading and
2376 * trailing space (#x20) characters, and by replacing sequences of space
2377 * (#x20) characters by a single space (#x20) character.
2378 * All attributes for which no declaration has been read should be treated
2379 * by a non-validating parser as if declared CDATA.
2380 *
2381 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2382 */
2383
2384xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002385xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2386
2387xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002388xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2389 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002390 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002391 xmlChar *ret = NULL;
2392 SHRINK;
2393 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002394 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002395 if (*in != '"' && *in != '\'') {
2396 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2398 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2399 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002400 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002401 return(NULL);
2402 }
2403 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2404 limit = *in;
2405 ++in;
2406
2407 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2408 *in != '&' && *in != '<'
2409 ) {
2410 ++in;
2411 }
2412 if (*in != limit) {
2413 return xmlParseAttValueComplex(ctxt);
2414 }
2415 ++in;
2416 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2417 CUR_PTR = in;
2418 return ret;
2419}
2420
Daniel Veillard01c13b52002-12-10 15:19:08 +00002421/**
2422 * xmlParseAttValueComplex:
2423 * @ctxt: an XML parser context
2424 *
2425 * parse a value for an attribute, this is the fallback function
2426 * of xmlParseAttValue() when the attribute parsing requires handling
2427 * of non-ASCII characters.
2428 *
2429 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2430 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002431xmlChar *
2432xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2433 xmlChar limit = 0;
2434 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002435 int len = 0;
2436 int buf_size = 0;
2437 int c, l;
2438 xmlChar *current = NULL;
2439 xmlEntityPtr ent;
2440
2441
2442 SHRINK;
2443 if (NXT(0) == '"') {
2444 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2445 limit = '"';
2446 NEXT;
2447 } else if (NXT(0) == '\'') {
2448 limit = '\'';
2449 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2450 NEXT;
2451 } else {
2452 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2454 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2455 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002456 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002457 return(NULL);
2458 }
2459
2460 /*
2461 * allocate a translation buffer.
2462 */
2463 buf_size = XML_PARSER_BUFFER_SIZE;
2464 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2465 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002466 xmlGenericError(xmlGenericErrorContext,
2467 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002468 return(NULL);
2469 }
2470
2471 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002472 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002473 */
2474 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002475 while ((NXT(0) != limit) && /* checked */
2476 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002477 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002478 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002479 if (NXT(1) == '#') {
2480 int val = xmlParseCharRef(ctxt);
2481 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002482 if (ctxt->replaceEntities) {
2483 if (len > buf_size - 10) {
2484 growBuffer(buf);
2485 }
2486 buf[len++] = '&';
2487 } else {
2488 /*
2489 * The reparsing will be done in xmlStringGetNodeList()
2490 * called by the attribute() function in SAX.c
2491 */
2492 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002493
Daniel Veillard319a7422001-09-11 09:27:09 +00002494 if (len > buf_size - 10) {
2495 growBuffer(buf);
2496 }
2497 current = &buffer[0];
2498 while (*current != 0) { /* non input consuming */
2499 buf[len++] = *current++;
2500 }
Owen Taylor3473f882001-02-23 17:55:21 +00002501 }
2502 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002503 if (len > buf_size - 10) {
2504 growBuffer(buf);
2505 }
Owen Taylor3473f882001-02-23 17:55:21 +00002506 len += xmlCopyChar(0, &buf[len], val);
2507 }
2508 } else {
2509 ent = xmlParseEntityRef(ctxt);
2510 if ((ent != NULL) &&
2511 (ctxt->replaceEntities != 0)) {
2512 xmlChar *rep;
2513
2514 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2515 rep = xmlStringDecodeEntities(ctxt, ent->content,
2516 XML_SUBSTITUTE_REF, 0, 0, 0);
2517 if (rep != NULL) {
2518 current = rep;
2519 while (*current != 0) { /* non input consuming */
2520 buf[len++] = *current++;
2521 if (len > buf_size - 10) {
2522 growBuffer(buf);
2523 }
2524 }
2525 xmlFree(rep);
2526 }
2527 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002528 if (len > buf_size - 10) {
2529 growBuffer(buf);
2530 }
Owen Taylor3473f882001-02-23 17:55:21 +00002531 if (ent->content != NULL)
2532 buf[len++] = ent->content[0];
2533 }
2534 } else if (ent != NULL) {
2535 int i = xmlStrlen(ent->name);
2536 const xmlChar *cur = ent->name;
2537
2538 /*
2539 * This may look absurd but is needed to detect
2540 * entities problems
2541 */
2542 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2543 (ent->content != NULL)) {
2544 xmlChar *rep;
2545 rep = xmlStringDecodeEntities(ctxt, ent->content,
2546 XML_SUBSTITUTE_REF, 0, 0, 0);
2547 if (rep != NULL)
2548 xmlFree(rep);
2549 }
2550
2551 /*
2552 * Just output the reference
2553 */
2554 buf[len++] = '&';
2555 if (len > buf_size - i - 10) {
2556 growBuffer(buf);
2557 }
2558 for (;i > 0;i--)
2559 buf[len++] = *cur++;
2560 buf[len++] = ';';
2561 }
2562 }
2563 } else {
2564 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2565 COPY_BUF(l,buf,len,0x20);
2566 if (len > buf_size - 10) {
2567 growBuffer(buf);
2568 }
2569 } else {
2570 COPY_BUF(l,buf,len,c);
2571 if (len > buf_size - 10) {
2572 growBuffer(buf);
2573 }
2574 }
2575 NEXTL(l);
2576 }
2577 GROW;
2578 c = CUR_CHAR(l);
2579 }
2580 buf[len++] = 0;
2581 if (RAW == '<') {
2582 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2584 ctxt->sax->error(ctxt->userData,
2585 "Unescaped '<' not allowed in attributes values\n");
2586 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002587 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002588 } else if (RAW != limit) {
2589 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2591 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2592 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002593 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002594 } else
2595 NEXT;
2596 return(buf);
2597}
2598
2599/**
2600 * xmlParseSystemLiteral:
2601 * @ctxt: an XML parser context
2602 *
2603 * parse an XML Literal
2604 *
2605 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2606 *
2607 * Returns the SystemLiteral parsed or NULL
2608 */
2609
2610xmlChar *
2611xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2612 xmlChar *buf = NULL;
2613 int len = 0;
2614 int size = XML_PARSER_BUFFER_SIZE;
2615 int cur, l;
2616 xmlChar stop;
2617 int state = ctxt->instate;
2618 int count = 0;
2619
2620 SHRINK;
2621 if (RAW == '"') {
2622 NEXT;
2623 stop = '"';
2624 } else if (RAW == '\'') {
2625 NEXT;
2626 stop = '\'';
2627 } else {
2628 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2630 ctxt->sax->error(ctxt->userData,
2631 "SystemLiteral \" or ' expected\n");
2632 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002633 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002634 return(NULL);
2635 }
2636
2637 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2638 if (buf == NULL) {
2639 xmlGenericError(xmlGenericErrorContext,
2640 "malloc of %d byte failed\n", size);
2641 return(NULL);
2642 }
2643 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2644 cur = CUR_CHAR(l);
2645 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2646 if (len + 5 >= size) {
2647 size *= 2;
2648 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2649 if (buf == NULL) {
2650 xmlGenericError(xmlGenericErrorContext,
2651 "realloc of %d byte failed\n", size);
2652 ctxt->instate = (xmlParserInputState) state;
2653 return(NULL);
2654 }
2655 }
2656 count++;
2657 if (count > 50) {
2658 GROW;
2659 count = 0;
2660 }
2661 COPY_BUF(l,buf,len,cur);
2662 NEXTL(l);
2663 cur = CUR_CHAR(l);
2664 if (cur == 0) {
2665 GROW;
2666 SHRINK;
2667 cur = CUR_CHAR(l);
2668 }
2669 }
2670 buf[len] = 0;
2671 ctxt->instate = (xmlParserInputState) state;
2672 if (!IS_CHAR(cur)) {
2673 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2675 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2676 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002677 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002678 } else {
2679 NEXT;
2680 }
2681 return(buf);
2682}
2683
2684/**
2685 * xmlParsePubidLiteral:
2686 * @ctxt: an XML parser context
2687 *
2688 * parse an XML public literal
2689 *
2690 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2691 *
2692 * Returns the PubidLiteral parsed or NULL.
2693 */
2694
2695xmlChar *
2696xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2697 xmlChar *buf = NULL;
2698 int len = 0;
2699 int size = XML_PARSER_BUFFER_SIZE;
2700 xmlChar cur;
2701 xmlChar stop;
2702 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002703 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002704
2705 SHRINK;
2706 if (RAW == '"') {
2707 NEXT;
2708 stop = '"';
2709 } else if (RAW == '\'') {
2710 NEXT;
2711 stop = '\'';
2712 } else {
2713 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2715 ctxt->sax->error(ctxt->userData,
2716 "SystemLiteral \" or ' expected\n");
2717 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002718 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002719 return(NULL);
2720 }
2721 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2722 if (buf == NULL) {
2723 xmlGenericError(xmlGenericErrorContext,
2724 "malloc of %d byte failed\n", size);
2725 return(NULL);
2726 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002727 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002728 cur = CUR;
2729 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2730 if (len + 1 >= size) {
2731 size *= 2;
2732 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2733 if (buf == NULL) {
2734 xmlGenericError(xmlGenericErrorContext,
2735 "realloc of %d byte failed\n", size);
2736 return(NULL);
2737 }
2738 }
2739 buf[len++] = cur;
2740 count++;
2741 if (count > 50) {
2742 GROW;
2743 count = 0;
2744 }
2745 NEXT;
2746 cur = CUR;
2747 if (cur == 0) {
2748 GROW;
2749 SHRINK;
2750 cur = CUR;
2751 }
2752 }
2753 buf[len] = 0;
2754 if (cur != stop) {
2755 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2757 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2758 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002759 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002760 } else {
2761 NEXT;
2762 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002763 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002764 return(buf);
2765}
2766
Daniel Veillard48b2f892001-02-25 16:11:03 +00002767void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002768/**
2769 * xmlParseCharData:
2770 * @ctxt: an XML parser context
2771 * @cdata: int indicating whether we are within a CDATA section
2772 *
2773 * parse a CharData section.
2774 * if we are within a CDATA section ']]>' marks an end of section.
2775 *
2776 * The right angle bracket (>) may be represented using the string "&gt;",
2777 * and must, for compatibility, be escaped using "&gt;" or a character
2778 * reference when it appears in the string "]]>" in content, when that
2779 * string is not marking the end of a CDATA section.
2780 *
2781 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2782 */
2783
2784void
2785xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002786 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002787 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002788 int line = ctxt->input->line;
2789 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002790
2791 SHRINK;
2792 GROW;
2793 /*
2794 * Accelerated common case where input don't need to be
2795 * modified before passing it to the handler.
2796 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002797 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002798 in = ctxt->input->cur;
2799 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002800get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002801 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2802 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002803 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002804 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002805 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002806 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002807 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002808 ctxt->input->line++;
2809 in++;
2810 }
2811 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002812 }
2813 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002814 if ((in[1] == ']') && (in[2] == '>')) {
2815 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2817 ctxt->sax->error(ctxt->userData,
2818 "Sequence ']]>' not allowed in content\n");
2819 ctxt->input->cur = in;
2820 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002821 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002822 return;
2823 }
2824 in++;
2825 goto get_more;
2826 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002827 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002828 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002829 if (IS_BLANK(*ctxt->input->cur)) {
2830 const xmlChar *tmp = ctxt->input->cur;
2831 ctxt->input->cur = in;
2832 if (areBlanks(ctxt, tmp, nbchar)) {
2833 if (ctxt->sax->ignorableWhitespace != NULL)
2834 ctxt->sax->ignorableWhitespace(ctxt->userData,
2835 tmp, nbchar);
2836 } else {
2837 if (ctxt->sax->characters != NULL)
2838 ctxt->sax->characters(ctxt->userData,
2839 tmp, nbchar);
2840 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002841 line = ctxt->input->line;
2842 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002843 } else {
2844 if (ctxt->sax->characters != NULL)
2845 ctxt->sax->characters(ctxt->userData,
2846 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002847 line = ctxt->input->line;
2848 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002849 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002850 }
2851 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002852 if (*in == 0xD) {
2853 in++;
2854 if (*in == 0xA) {
2855 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002856 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002857 ctxt->input->line++;
2858 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002859 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002860 in--;
2861 }
2862 if (*in == '<') {
2863 return;
2864 }
2865 if (*in == '&') {
2866 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002867 }
2868 SHRINK;
2869 GROW;
2870 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002871 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002872 nbchar = 0;
2873 }
Daniel Veillard50582112001-03-26 22:52:16 +00002874 ctxt->input->line = line;
2875 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002876 xmlParseCharDataComplex(ctxt, cdata);
2877}
2878
Daniel Veillard01c13b52002-12-10 15:19:08 +00002879/**
2880 * xmlParseCharDataComplex:
2881 * @ctxt: an XML parser context
2882 * @cdata: int indicating whether we are within a CDATA section
2883 *
2884 * parse a CharData section.this is the fallback function
2885 * of xmlParseCharData() when the parsing requires handling
2886 * of non-ASCII characters.
2887 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002888void
2889xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002890 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2891 int nbchar = 0;
2892 int cur, l;
2893 int count = 0;
2894
2895 SHRINK;
2896 GROW;
2897 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002898 while ((cur != '<') && /* checked */
2899 (cur != '&') &&
2900 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002901 if ((cur == ']') && (NXT(1) == ']') &&
2902 (NXT(2) == '>')) {
2903 if (cdata) break;
2904 else {
2905 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2907 ctxt->sax->error(ctxt->userData,
2908 "Sequence ']]>' not allowed in content\n");
2909 /* Should this be relaxed ??? I see a "must here */
2910 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002911 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002912 }
2913 }
2914 COPY_BUF(l,buf,nbchar,cur);
2915 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2916 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002917 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002918 */
2919 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2920 if (areBlanks(ctxt, buf, nbchar)) {
2921 if (ctxt->sax->ignorableWhitespace != NULL)
2922 ctxt->sax->ignorableWhitespace(ctxt->userData,
2923 buf, nbchar);
2924 } else {
2925 if (ctxt->sax->characters != NULL)
2926 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2927 }
2928 }
2929 nbchar = 0;
2930 }
2931 count++;
2932 if (count > 50) {
2933 GROW;
2934 count = 0;
2935 }
2936 NEXTL(l);
2937 cur = CUR_CHAR(l);
2938 }
2939 if (nbchar != 0) {
2940 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002941 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002942 */
2943 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2944 if (areBlanks(ctxt, buf, nbchar)) {
2945 if (ctxt->sax->ignorableWhitespace != NULL)
2946 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2947 } else {
2948 if (ctxt->sax->characters != NULL)
2949 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2950 }
2951 }
2952 }
2953}
2954
2955/**
2956 * xmlParseExternalID:
2957 * @ctxt: an XML parser context
2958 * @publicID: a xmlChar** receiving PubidLiteral
2959 * @strict: indicate whether we should restrict parsing to only
2960 * production [75], see NOTE below
2961 *
2962 * Parse an External ID or a Public ID
2963 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002964 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002965 * 'PUBLIC' S PubidLiteral S SystemLiteral
2966 *
2967 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2968 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2969 *
2970 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2971 *
2972 * Returns the function returns SystemLiteral and in the second
2973 * case publicID receives PubidLiteral, is strict is off
2974 * it is possible to return NULL and have publicID set.
2975 */
2976
2977xmlChar *
2978xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2979 xmlChar *URI = NULL;
2980
2981 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002982
2983 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002984 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2985 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2986 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2987 SKIP(6);
2988 if (!IS_BLANK(CUR)) {
2989 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2991 ctxt->sax->error(ctxt->userData,
2992 "Space required after 'SYSTEM'\n");
2993 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002994 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002995 }
2996 SKIP_BLANKS;
2997 URI = xmlParseSystemLiteral(ctxt);
2998 if (URI == NULL) {
2999 ctxt->errNo = XML_ERR_URI_REQUIRED;
3000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3001 ctxt->sax->error(ctxt->userData,
3002 "xmlParseExternalID: SYSTEM, no URI\n");
3003 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003004 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003005 }
3006 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3007 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3008 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3009 SKIP(6);
3010 if (!IS_BLANK(CUR)) {
3011 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3013 ctxt->sax->error(ctxt->userData,
3014 "Space required after 'PUBLIC'\n");
3015 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003016 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003017 }
3018 SKIP_BLANKS;
3019 *publicID = xmlParsePubidLiteral(ctxt);
3020 if (*publicID == NULL) {
3021 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
3022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3023 ctxt->sax->error(ctxt->userData,
3024 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
3025 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003026 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003027 }
3028 if (strict) {
3029 /*
3030 * We don't handle [83] so "S SystemLiteral" is required.
3031 */
3032 if (!IS_BLANK(CUR)) {
3033 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3035 ctxt->sax->error(ctxt->userData,
3036 "Space required after the Public Identifier\n");
3037 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003038 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003039 }
3040 } else {
3041 /*
3042 * We handle [83] so we return immediately, if
3043 * "S SystemLiteral" is not detected. From a purely parsing
3044 * point of view that's a nice mess.
3045 */
3046 const xmlChar *ptr;
3047 GROW;
3048
3049 ptr = CUR_PTR;
3050 if (!IS_BLANK(*ptr)) return(NULL);
3051
3052 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3053 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3054 }
3055 SKIP_BLANKS;
3056 URI = xmlParseSystemLiteral(ctxt);
3057 if (URI == NULL) {
3058 ctxt->errNo = XML_ERR_URI_REQUIRED;
3059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3060 ctxt->sax->error(ctxt->userData,
3061 "xmlParseExternalID: PUBLIC, no URI\n");
3062 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003063 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003064 }
3065 }
3066 return(URI);
3067}
3068
3069/**
3070 * xmlParseComment:
3071 * @ctxt: an XML parser context
3072 *
3073 * Skip an XML (SGML) comment <!-- .... -->
3074 * The spec says that "For compatibility, the string "--" (double-hyphen)
3075 * must not occur within comments. "
3076 *
3077 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3078 */
3079void
3080xmlParseComment(xmlParserCtxtPtr ctxt) {
3081 xmlChar *buf = NULL;
3082 int len;
3083 int size = XML_PARSER_BUFFER_SIZE;
3084 int q, ql;
3085 int r, rl;
3086 int cur, l;
3087 xmlParserInputState state;
3088 xmlParserInputPtr input = ctxt->input;
3089 int count = 0;
3090
3091 /*
3092 * Check that there is a comment right here.
3093 */
3094 if ((RAW != '<') || (NXT(1) != '!') ||
3095 (NXT(2) != '-') || (NXT(3) != '-')) return;
3096
3097 state = ctxt->instate;
3098 ctxt->instate = XML_PARSER_COMMENT;
3099 SHRINK;
3100 SKIP(4);
3101 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3102 if (buf == NULL) {
3103 xmlGenericError(xmlGenericErrorContext,
3104 "malloc of %d byte failed\n", size);
3105 ctxt->instate = state;
3106 return;
3107 }
3108 q = CUR_CHAR(ql);
3109 NEXTL(ql);
3110 r = CUR_CHAR(rl);
3111 NEXTL(rl);
3112 cur = CUR_CHAR(l);
3113 len = 0;
3114 while (IS_CHAR(cur) && /* checked */
3115 ((cur != '>') ||
3116 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003117 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003118 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3119 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3120 ctxt->sax->error(ctxt->userData,
3121 "Comment must not contain '--' (double-hyphen)`\n");
3122 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003123 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003124 }
3125 if (len + 5 >= size) {
3126 size *= 2;
3127 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3128 if (buf == NULL) {
3129 xmlGenericError(xmlGenericErrorContext,
3130 "realloc of %d byte failed\n", size);
3131 ctxt->instate = state;
3132 return;
3133 }
3134 }
3135 COPY_BUF(ql,buf,len,q);
3136 q = r;
3137 ql = rl;
3138 r = cur;
3139 rl = l;
3140
3141 count++;
3142 if (count > 50) {
3143 GROW;
3144 count = 0;
3145 }
3146 NEXTL(l);
3147 cur = CUR_CHAR(l);
3148 if (cur == 0) {
3149 SHRINK;
3150 GROW;
3151 cur = CUR_CHAR(l);
3152 }
3153 }
3154 buf[len] = 0;
3155 if (!IS_CHAR(cur)) {
3156 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3158 ctxt->sax->error(ctxt->userData,
3159 "Comment not terminated \n<!--%.50s\n", buf);
3160 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003161 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003162 xmlFree(buf);
3163 } else {
3164 if (input != ctxt->input) {
3165 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3167 ctxt->sax->error(ctxt->userData,
3168"Comment doesn't start and stop in the same entity\n");
3169 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003170 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003171 }
3172 NEXT;
3173 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3174 (!ctxt->disableSAX))
3175 ctxt->sax->comment(ctxt->userData, buf);
3176 xmlFree(buf);
3177 }
3178 ctxt->instate = state;
3179}
3180
3181/**
3182 * xmlParsePITarget:
3183 * @ctxt: an XML parser context
3184 *
3185 * parse the name of a PI
3186 *
3187 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3188 *
3189 * Returns the PITarget name or NULL
3190 */
3191
3192xmlChar *
3193xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3194 xmlChar *name;
3195
3196 name = xmlParseName(ctxt);
3197 if ((name != NULL) &&
3198 ((name[0] == 'x') || (name[0] == 'X')) &&
3199 ((name[1] == 'm') || (name[1] == 'M')) &&
3200 ((name[2] == 'l') || (name[2] == 'L'))) {
3201 int i;
3202 if ((name[0] == 'x') && (name[1] == 'm') &&
3203 (name[2] == 'l') && (name[3] == 0)) {
3204 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3206 ctxt->sax->error(ctxt->userData,
3207 "XML declaration allowed only at the start of the document\n");
3208 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003209 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003210 return(name);
3211 } else if (name[3] == 0) {
3212 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3214 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3215 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003216 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003217 return(name);
3218 }
3219 for (i = 0;;i++) {
3220 if (xmlW3CPIs[i] == NULL) break;
3221 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3222 return(name);
3223 }
3224 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3225 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3226 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003227 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003228 }
3229 }
3230 return(name);
3231}
3232
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003233#ifdef LIBXML_CATALOG_ENABLED
3234/**
3235 * xmlParseCatalogPI:
3236 * @ctxt: an XML parser context
3237 * @catalog: the PI value string
3238 *
3239 * parse an XML Catalog Processing Instruction.
3240 *
3241 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3242 *
3243 * Occurs only if allowed by the user and if happening in the Misc
3244 * part of the document before any doctype informations
3245 * This will add the given catalog to the parsing context in order
3246 * to be used if there is a resolution need further down in the document
3247 */
3248
3249static void
3250xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3251 xmlChar *URL = NULL;
3252 const xmlChar *tmp, *base;
3253 xmlChar marker;
3254
3255 tmp = catalog;
3256 while (IS_BLANK(*tmp)) tmp++;
3257 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3258 goto error;
3259 tmp += 7;
3260 while (IS_BLANK(*tmp)) tmp++;
3261 if (*tmp != '=') {
3262 return;
3263 }
3264 tmp++;
3265 while (IS_BLANK(*tmp)) tmp++;
3266 marker = *tmp;
3267 if ((marker != '\'') && (marker != '"'))
3268 goto error;
3269 tmp++;
3270 base = tmp;
3271 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3272 if (*tmp == 0)
3273 goto error;
3274 URL = xmlStrndup(base, tmp - base);
3275 tmp++;
3276 while (IS_BLANK(*tmp)) tmp++;
3277 if (*tmp != 0)
3278 goto error;
3279
3280 if (URL != NULL) {
3281 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3282 xmlFree(URL);
3283 }
3284 return;
3285
3286error:
3287 ctxt->errNo = XML_WAR_CATALOG_PI;
3288 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3289 ctxt->sax->warning(ctxt->userData,
3290 "Catalog PI syntax error: %s\n", catalog);
3291 if (URL != NULL)
3292 xmlFree(URL);
3293}
3294#endif
3295
Owen Taylor3473f882001-02-23 17:55:21 +00003296/**
3297 * xmlParsePI:
3298 * @ctxt: an XML parser context
3299 *
3300 * parse an XML Processing Instruction.
3301 *
3302 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3303 *
3304 * The processing is transfered to SAX once parsed.
3305 */
3306
3307void
3308xmlParsePI(xmlParserCtxtPtr ctxt) {
3309 xmlChar *buf = NULL;
3310 int len = 0;
3311 int size = XML_PARSER_BUFFER_SIZE;
3312 int cur, l;
3313 xmlChar *target;
3314 xmlParserInputState state;
3315 int count = 0;
3316
3317 if ((RAW == '<') && (NXT(1) == '?')) {
3318 xmlParserInputPtr input = ctxt->input;
3319 state = ctxt->instate;
3320 ctxt->instate = XML_PARSER_PI;
3321 /*
3322 * this is a Processing Instruction.
3323 */
3324 SKIP(2);
3325 SHRINK;
3326
3327 /*
3328 * Parse the target name and check for special support like
3329 * namespace.
3330 */
3331 target = xmlParsePITarget(ctxt);
3332 if (target != NULL) {
3333 if ((RAW == '?') && (NXT(1) == '>')) {
3334 if (input != ctxt->input) {
3335 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3337 ctxt->sax->error(ctxt->userData,
3338 "PI declaration doesn't start and stop in the same entity\n");
3339 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003340 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003341 }
3342 SKIP(2);
3343
3344 /*
3345 * SAX: PI detected.
3346 */
3347 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3348 (ctxt->sax->processingInstruction != NULL))
3349 ctxt->sax->processingInstruction(ctxt->userData,
3350 target, NULL);
3351 ctxt->instate = state;
3352 xmlFree(target);
3353 return;
3354 }
3355 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3356 if (buf == NULL) {
3357 xmlGenericError(xmlGenericErrorContext,
3358 "malloc of %d byte failed\n", size);
3359 ctxt->instate = state;
3360 return;
3361 }
3362 cur = CUR;
3363 if (!IS_BLANK(cur)) {
3364 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3366 ctxt->sax->error(ctxt->userData,
3367 "xmlParsePI: PI %s space expected\n", target);
3368 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003369 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003370 }
3371 SKIP_BLANKS;
3372 cur = CUR_CHAR(l);
3373 while (IS_CHAR(cur) && /* checked */
3374 ((cur != '?') || (NXT(1) != '>'))) {
3375 if (len + 5 >= size) {
3376 size *= 2;
3377 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3378 if (buf == NULL) {
3379 xmlGenericError(xmlGenericErrorContext,
3380 "realloc of %d byte failed\n", size);
3381 ctxt->instate = state;
3382 return;
3383 }
3384 }
3385 count++;
3386 if (count > 50) {
3387 GROW;
3388 count = 0;
3389 }
3390 COPY_BUF(l,buf,len,cur);
3391 NEXTL(l);
3392 cur = CUR_CHAR(l);
3393 if (cur == 0) {
3394 SHRINK;
3395 GROW;
3396 cur = CUR_CHAR(l);
3397 }
3398 }
3399 buf[len] = 0;
3400 if (cur != '?') {
3401 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3403 ctxt->sax->error(ctxt->userData,
3404 "xmlParsePI: PI %s never end ...\n", target);
3405 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003406 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003407 } else {
3408 if (input != ctxt->input) {
3409 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3411 ctxt->sax->error(ctxt->userData,
3412 "PI declaration doesn't start and stop in the same entity\n");
3413 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003414 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003415 }
3416 SKIP(2);
3417
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003418#ifdef LIBXML_CATALOG_ENABLED
3419 if (((state == XML_PARSER_MISC) ||
3420 (state == XML_PARSER_START)) &&
3421 (xmlStrEqual(target, XML_CATALOG_PI))) {
3422 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3423 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3424 (allow == XML_CATA_ALLOW_ALL))
3425 xmlParseCatalogPI(ctxt, buf);
3426 }
3427#endif
3428
3429
Owen Taylor3473f882001-02-23 17:55:21 +00003430 /*
3431 * SAX: PI detected.
3432 */
3433 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3434 (ctxt->sax->processingInstruction != NULL))
3435 ctxt->sax->processingInstruction(ctxt->userData,
3436 target, buf);
3437 }
3438 xmlFree(buf);
3439 xmlFree(target);
3440 } else {
3441 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3443 ctxt->sax->error(ctxt->userData,
3444 "xmlParsePI : no target name\n");
3445 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003446 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003447 }
3448 ctxt->instate = state;
3449 }
3450}
3451
3452/**
3453 * xmlParseNotationDecl:
3454 * @ctxt: an XML parser context
3455 *
3456 * parse a notation declaration
3457 *
3458 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3459 *
3460 * Hence there is actually 3 choices:
3461 * 'PUBLIC' S PubidLiteral
3462 * 'PUBLIC' S PubidLiteral S SystemLiteral
3463 * and 'SYSTEM' S SystemLiteral
3464 *
3465 * See the NOTE on xmlParseExternalID().
3466 */
3467
3468void
3469xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3470 xmlChar *name;
3471 xmlChar *Pubid;
3472 xmlChar *Systemid;
3473
3474 if ((RAW == '<') && (NXT(1) == '!') &&
3475 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3476 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3477 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3478 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3479 xmlParserInputPtr input = ctxt->input;
3480 SHRINK;
3481 SKIP(10);
3482 if (!IS_BLANK(CUR)) {
3483 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3485 ctxt->sax->error(ctxt->userData,
3486 "Space required after '<!NOTATION'\n");
3487 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003488 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003489 return;
3490 }
3491 SKIP_BLANKS;
3492
Daniel Veillard76d66f42001-05-16 21:05:17 +00003493 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003494 if (name == NULL) {
3495 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3497 ctxt->sax->error(ctxt->userData,
3498 "NOTATION: Name expected here\n");
3499 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003500 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003501 return;
3502 }
3503 if (!IS_BLANK(CUR)) {
3504 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3506 ctxt->sax->error(ctxt->userData,
3507 "Space required after the NOTATION name'\n");
3508 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003509 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003510 return;
3511 }
3512 SKIP_BLANKS;
3513
3514 /*
3515 * Parse the IDs.
3516 */
3517 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3518 SKIP_BLANKS;
3519
3520 if (RAW == '>') {
3521 if (input != ctxt->input) {
3522 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3524 ctxt->sax->error(ctxt->userData,
3525"Notation declaration doesn't start and stop in the same entity\n");
3526 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003527 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003528 }
3529 NEXT;
3530 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3531 (ctxt->sax->notationDecl != NULL))
3532 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3533 } else {
3534 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3536 ctxt->sax->error(ctxt->userData,
3537 "'>' required to close NOTATION declaration\n");
3538 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003539 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003540 }
3541 xmlFree(name);
3542 if (Systemid != NULL) xmlFree(Systemid);
3543 if (Pubid != NULL) xmlFree(Pubid);
3544 }
3545}
3546
3547/**
3548 * xmlParseEntityDecl:
3549 * @ctxt: an XML parser context
3550 *
3551 * parse <!ENTITY declarations
3552 *
3553 * [70] EntityDecl ::= GEDecl | PEDecl
3554 *
3555 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3556 *
3557 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3558 *
3559 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3560 *
3561 * [74] PEDef ::= EntityValue | ExternalID
3562 *
3563 * [76] NDataDecl ::= S 'NDATA' S Name
3564 *
3565 * [ VC: Notation Declared ]
3566 * The Name must match the declared name of a notation.
3567 */
3568
3569void
3570xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3571 xmlChar *name = NULL;
3572 xmlChar *value = NULL;
3573 xmlChar *URI = NULL, *literal = NULL;
3574 xmlChar *ndata = NULL;
3575 int isParameter = 0;
3576 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003577 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003578
3579 GROW;
3580 if ((RAW == '<') && (NXT(1) == '!') &&
3581 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3582 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3583 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3584 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003585 SHRINK;
3586 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003587 skipped = SKIP_BLANKS;
3588 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003589 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3591 ctxt->sax->error(ctxt->userData,
3592 "Space required after '<!ENTITY'\n");
3593 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003594 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003595 }
Owen Taylor3473f882001-02-23 17:55:21 +00003596
3597 if (RAW == '%') {
3598 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003599 skipped = SKIP_BLANKS;
3600 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003601 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3603 ctxt->sax->error(ctxt->userData,
3604 "Space required after '%'\n");
3605 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003606 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003607 }
Owen Taylor3473f882001-02-23 17:55:21 +00003608 isParameter = 1;
3609 }
3610
Daniel Veillard76d66f42001-05-16 21:05:17 +00003611 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003612 if (name == NULL) {
3613 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3615 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3616 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003617 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003618 return;
3619 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003620 skipped = SKIP_BLANKS;
3621 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003622 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3624 ctxt->sax->error(ctxt->userData,
3625 "Space required after the entity name\n");
3626 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003627 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003628 }
Owen Taylor3473f882001-02-23 17:55:21 +00003629
Daniel Veillardf5582f12002-06-11 10:08:16 +00003630 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003631 /*
3632 * handle the various case of definitions...
3633 */
3634 if (isParameter) {
3635 if ((RAW == '"') || (RAW == '\'')) {
3636 value = xmlParseEntityValue(ctxt, &orig);
3637 if (value) {
3638 if ((ctxt->sax != NULL) &&
3639 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3640 ctxt->sax->entityDecl(ctxt->userData, name,
3641 XML_INTERNAL_PARAMETER_ENTITY,
3642 NULL, NULL, value);
3643 }
3644 } else {
3645 URI = xmlParseExternalID(ctxt, &literal, 1);
3646 if ((URI == NULL) && (literal == NULL)) {
3647 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3649 ctxt->sax->error(ctxt->userData,
3650 "Entity value required\n");
3651 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003652 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003653 }
3654 if (URI) {
3655 xmlURIPtr uri;
3656
3657 uri = xmlParseURI((const char *) URI);
3658 if (uri == NULL) {
3659 ctxt->errNo = XML_ERR_INVALID_URI;
3660 if ((ctxt->sax != NULL) &&
3661 (!ctxt->disableSAX) &&
3662 (ctxt->sax->error != NULL))
3663 ctxt->sax->error(ctxt->userData,
3664 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003665 /*
3666 * This really ought to be a well formedness error
3667 * but the XML Core WG decided otherwise c.f. issue
3668 * E26 of the XML erratas.
3669 */
Owen Taylor3473f882001-02-23 17:55:21 +00003670 } else {
3671 if (uri->fragment != NULL) {
3672 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3673 if ((ctxt->sax != NULL) &&
3674 (!ctxt->disableSAX) &&
3675 (ctxt->sax->error != NULL))
3676 ctxt->sax->error(ctxt->userData,
3677 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003678 /*
3679 * Okay this is foolish to block those but not
3680 * invalid URIs.
3681 */
Owen Taylor3473f882001-02-23 17:55:21 +00003682 ctxt->wellFormed = 0;
3683 } else {
3684 if ((ctxt->sax != NULL) &&
3685 (!ctxt->disableSAX) &&
3686 (ctxt->sax->entityDecl != NULL))
3687 ctxt->sax->entityDecl(ctxt->userData, name,
3688 XML_EXTERNAL_PARAMETER_ENTITY,
3689 literal, URI, NULL);
3690 }
3691 xmlFreeURI(uri);
3692 }
3693 }
3694 }
3695 } else {
3696 if ((RAW == '"') || (RAW == '\'')) {
3697 value = xmlParseEntityValue(ctxt, &orig);
3698 if ((ctxt->sax != NULL) &&
3699 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3700 ctxt->sax->entityDecl(ctxt->userData, name,
3701 XML_INTERNAL_GENERAL_ENTITY,
3702 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003703 /*
3704 * For expat compatibility in SAX mode.
3705 */
3706 if ((ctxt->myDoc == NULL) ||
3707 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3708 if (ctxt->myDoc == NULL) {
3709 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3710 }
3711 if (ctxt->myDoc->intSubset == NULL)
3712 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3713 BAD_CAST "fake", NULL, NULL);
3714
3715 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3716 NULL, NULL, value);
3717 }
Owen Taylor3473f882001-02-23 17:55:21 +00003718 } else {
3719 URI = xmlParseExternalID(ctxt, &literal, 1);
3720 if ((URI == NULL) && (literal == NULL)) {
3721 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3723 ctxt->sax->error(ctxt->userData,
3724 "Entity value required\n");
3725 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003726 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003727 }
3728 if (URI) {
3729 xmlURIPtr uri;
3730
3731 uri = xmlParseURI((const char *)URI);
3732 if (uri == NULL) {
3733 ctxt->errNo = XML_ERR_INVALID_URI;
3734 if ((ctxt->sax != NULL) &&
3735 (!ctxt->disableSAX) &&
3736 (ctxt->sax->error != NULL))
3737 ctxt->sax->error(ctxt->userData,
3738 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003739 /*
3740 * This really ought to be a well formedness error
3741 * but the XML Core WG decided otherwise c.f. issue
3742 * E26 of the XML erratas.
3743 */
Owen Taylor3473f882001-02-23 17:55:21 +00003744 } else {
3745 if (uri->fragment != NULL) {
3746 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3747 if ((ctxt->sax != NULL) &&
3748 (!ctxt->disableSAX) &&
3749 (ctxt->sax->error != NULL))
3750 ctxt->sax->error(ctxt->userData,
3751 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003752 /*
3753 * Okay this is foolish to block those but not
3754 * invalid URIs.
3755 */
Owen Taylor3473f882001-02-23 17:55:21 +00003756 ctxt->wellFormed = 0;
3757 }
3758 xmlFreeURI(uri);
3759 }
3760 }
3761 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3762 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3764 ctxt->sax->error(ctxt->userData,
3765 "Space required before 'NDATA'\n");
3766 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003767 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003768 }
3769 SKIP_BLANKS;
3770 if ((RAW == 'N') && (NXT(1) == 'D') &&
3771 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3772 (NXT(4) == 'A')) {
3773 SKIP(5);
3774 if (!IS_BLANK(CUR)) {
3775 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3777 ctxt->sax->error(ctxt->userData,
3778 "Space required after 'NDATA'\n");
3779 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003780 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003781 }
3782 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003783 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003784 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3785 (ctxt->sax->unparsedEntityDecl != NULL))
3786 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3787 literal, URI, ndata);
3788 } else {
3789 if ((ctxt->sax != NULL) &&
3790 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3791 ctxt->sax->entityDecl(ctxt->userData, name,
3792 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3793 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003794 /*
3795 * For expat compatibility in SAX mode.
3796 * assuming the entity repalcement was asked for
3797 */
3798 if ((ctxt->replaceEntities != 0) &&
3799 ((ctxt->myDoc == NULL) ||
3800 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3801 if (ctxt->myDoc == NULL) {
3802 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3803 }
3804
3805 if (ctxt->myDoc->intSubset == NULL)
3806 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3807 BAD_CAST "fake", NULL, NULL);
3808 entityDecl(ctxt, name,
3809 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3810 literal, URI, NULL);
3811 }
Owen Taylor3473f882001-02-23 17:55:21 +00003812 }
3813 }
3814 }
3815 SKIP_BLANKS;
3816 if (RAW != '>') {
3817 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3819 ctxt->sax->error(ctxt->userData,
3820 "xmlParseEntityDecl: entity %s not terminated\n", name);
3821 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003822 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003823 } else {
3824 if (input != ctxt->input) {
3825 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3827 ctxt->sax->error(ctxt->userData,
3828"Entity declaration doesn't start and stop in the same entity\n");
3829 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003830 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003831 }
3832 NEXT;
3833 }
3834 if (orig != NULL) {
3835 /*
3836 * Ugly mechanism to save the raw entity value.
3837 */
3838 xmlEntityPtr cur = NULL;
3839
3840 if (isParameter) {
3841 if ((ctxt->sax != NULL) &&
3842 (ctxt->sax->getParameterEntity != NULL))
3843 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3844 } else {
3845 if ((ctxt->sax != NULL) &&
3846 (ctxt->sax->getEntity != NULL))
3847 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003848 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3849 cur = getEntity(ctxt, name);
3850 }
Owen Taylor3473f882001-02-23 17:55:21 +00003851 }
3852 if (cur != NULL) {
3853 if (cur->orig != NULL)
3854 xmlFree(orig);
3855 else
3856 cur->orig = orig;
3857 } else
3858 xmlFree(orig);
3859 }
3860 if (name != NULL) xmlFree(name);
3861 if (value != NULL) xmlFree(value);
3862 if (URI != NULL) xmlFree(URI);
3863 if (literal != NULL) xmlFree(literal);
3864 if (ndata != NULL) xmlFree(ndata);
3865 }
3866}
3867
3868/**
3869 * xmlParseDefaultDecl:
3870 * @ctxt: an XML parser context
3871 * @value: Receive a possible fixed default value for the attribute
3872 *
3873 * Parse an attribute default declaration
3874 *
3875 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3876 *
3877 * [ VC: Required Attribute ]
3878 * if the default declaration is the keyword #REQUIRED, then the
3879 * attribute must be specified for all elements of the type in the
3880 * attribute-list declaration.
3881 *
3882 * [ VC: Attribute Default Legal ]
3883 * The declared default value must meet the lexical constraints of
3884 * the declared attribute type c.f. xmlValidateAttributeDecl()
3885 *
3886 * [ VC: Fixed Attribute Default ]
3887 * if an attribute has a default value declared with the #FIXED
3888 * keyword, instances of that attribute must match the default value.
3889 *
3890 * [ WFC: No < in Attribute Values ]
3891 * handled in xmlParseAttValue()
3892 *
3893 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3894 * or XML_ATTRIBUTE_FIXED.
3895 */
3896
3897int
3898xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3899 int val;
3900 xmlChar *ret;
3901
3902 *value = NULL;
3903 if ((RAW == '#') && (NXT(1) == 'R') &&
3904 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3905 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3906 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3907 (NXT(8) == 'D')) {
3908 SKIP(9);
3909 return(XML_ATTRIBUTE_REQUIRED);
3910 }
3911 if ((RAW == '#') && (NXT(1) == 'I') &&
3912 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3913 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3914 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3915 SKIP(8);
3916 return(XML_ATTRIBUTE_IMPLIED);
3917 }
3918 val = XML_ATTRIBUTE_NONE;
3919 if ((RAW == '#') && (NXT(1) == 'F') &&
3920 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3921 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3922 SKIP(6);
3923 val = XML_ATTRIBUTE_FIXED;
3924 if (!IS_BLANK(CUR)) {
3925 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3927 ctxt->sax->error(ctxt->userData,
3928 "Space required after '#FIXED'\n");
3929 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003930 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003931 }
3932 SKIP_BLANKS;
3933 }
3934 ret = xmlParseAttValue(ctxt);
3935 ctxt->instate = XML_PARSER_DTD;
3936 if (ret == NULL) {
3937 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3938 ctxt->sax->error(ctxt->userData,
3939 "Attribute default value declaration error\n");
3940 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003941 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003942 } else
3943 *value = ret;
3944 return(val);
3945}
3946
3947/**
3948 * xmlParseNotationType:
3949 * @ctxt: an XML parser context
3950 *
3951 * parse an Notation attribute type.
3952 *
3953 * Note: the leading 'NOTATION' S part has already being parsed...
3954 *
3955 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3956 *
3957 * [ VC: Notation Attributes ]
3958 * Values of this type must match one of the notation names included
3959 * in the declaration; all notation names in the declaration must be declared.
3960 *
3961 * Returns: the notation attribute tree built while parsing
3962 */
3963
3964xmlEnumerationPtr
3965xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3966 xmlChar *name;
3967 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3968
3969 if (RAW != '(') {
3970 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3972 ctxt->sax->error(ctxt->userData,
3973 "'(' required to start 'NOTATION'\n");
3974 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003975 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003976 return(NULL);
3977 }
3978 SHRINK;
3979 do {
3980 NEXT;
3981 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003982 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003983 if (name == NULL) {
3984 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3986 ctxt->sax->error(ctxt->userData,
3987 "Name expected in NOTATION declaration\n");
3988 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003989 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003990 return(ret);
3991 }
3992 cur = xmlCreateEnumeration(name);
3993 xmlFree(name);
3994 if (cur == NULL) return(ret);
3995 if (last == NULL) ret = last = cur;
3996 else {
3997 last->next = cur;
3998 last = cur;
3999 }
4000 SKIP_BLANKS;
4001 } while (RAW == '|');
4002 if (RAW != ')') {
4003 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
4004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4005 ctxt->sax->error(ctxt->userData,
4006 "')' required to finish NOTATION declaration\n");
4007 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004008 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004009 if ((last != NULL) && (last != ret))
4010 xmlFreeEnumeration(last);
4011 return(ret);
4012 }
4013 NEXT;
4014 return(ret);
4015}
4016
4017/**
4018 * xmlParseEnumerationType:
4019 * @ctxt: an XML parser context
4020 *
4021 * parse an Enumeration attribute type.
4022 *
4023 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4024 *
4025 * [ VC: Enumeration ]
4026 * Values of this type must match one of the Nmtoken tokens in
4027 * the declaration
4028 *
4029 * Returns: the enumeration attribute tree built while parsing
4030 */
4031
4032xmlEnumerationPtr
4033xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4034 xmlChar *name;
4035 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4036
4037 if (RAW != '(') {
4038 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4040 ctxt->sax->error(ctxt->userData,
4041 "'(' required to start ATTLIST enumeration\n");
4042 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004044 return(NULL);
4045 }
4046 SHRINK;
4047 do {
4048 NEXT;
4049 SKIP_BLANKS;
4050 name = xmlParseNmtoken(ctxt);
4051 if (name == NULL) {
4052 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4054 ctxt->sax->error(ctxt->userData,
4055 "NmToken expected in ATTLIST enumeration\n");
4056 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004058 return(ret);
4059 }
4060 cur = xmlCreateEnumeration(name);
4061 xmlFree(name);
4062 if (cur == NULL) return(ret);
4063 if (last == NULL) ret = last = cur;
4064 else {
4065 last->next = cur;
4066 last = cur;
4067 }
4068 SKIP_BLANKS;
4069 } while (RAW == '|');
4070 if (RAW != ')') {
4071 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4073 ctxt->sax->error(ctxt->userData,
4074 "')' required to finish ATTLIST enumeration\n");
4075 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004076 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004077 return(ret);
4078 }
4079 NEXT;
4080 return(ret);
4081}
4082
4083/**
4084 * xmlParseEnumeratedType:
4085 * @ctxt: an XML parser context
4086 * @tree: the enumeration tree built while parsing
4087 *
4088 * parse an Enumerated attribute type.
4089 *
4090 * [57] EnumeratedType ::= NotationType | Enumeration
4091 *
4092 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4093 *
4094 *
4095 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4096 */
4097
4098int
4099xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4100 if ((RAW == 'N') && (NXT(1) == 'O') &&
4101 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4102 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4103 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4104 SKIP(8);
4105 if (!IS_BLANK(CUR)) {
4106 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4108 ctxt->sax->error(ctxt->userData,
4109 "Space required after 'NOTATION'\n");
4110 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004111 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004112 return(0);
4113 }
4114 SKIP_BLANKS;
4115 *tree = xmlParseNotationType(ctxt);
4116 if (*tree == NULL) return(0);
4117 return(XML_ATTRIBUTE_NOTATION);
4118 }
4119 *tree = xmlParseEnumerationType(ctxt);
4120 if (*tree == NULL) return(0);
4121 return(XML_ATTRIBUTE_ENUMERATION);
4122}
4123
4124/**
4125 * xmlParseAttributeType:
4126 * @ctxt: an XML parser context
4127 * @tree: the enumeration tree built while parsing
4128 *
4129 * parse the Attribute list def for an element
4130 *
4131 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4132 *
4133 * [55] StringType ::= 'CDATA'
4134 *
4135 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4136 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4137 *
4138 * Validity constraints for attribute values syntax are checked in
4139 * xmlValidateAttributeValue()
4140 *
4141 * [ VC: ID ]
4142 * Values of type ID must match the Name production. A name must not
4143 * appear more than once in an XML document as a value of this type;
4144 * i.e., ID values must uniquely identify the elements which bear them.
4145 *
4146 * [ VC: One ID per Element Type ]
4147 * No element type may have more than one ID attribute specified.
4148 *
4149 * [ VC: ID Attribute Default ]
4150 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4151 *
4152 * [ VC: IDREF ]
4153 * Values of type IDREF must match the Name production, and values
4154 * of type IDREFS must match Names; each IDREF Name must match the value
4155 * of an ID attribute on some element in the XML document; i.e. IDREF
4156 * values must match the value of some ID attribute.
4157 *
4158 * [ VC: Entity Name ]
4159 * Values of type ENTITY must match the Name production, values
4160 * of type ENTITIES must match Names; each Entity Name must match the
4161 * name of an unparsed entity declared in the DTD.
4162 *
4163 * [ VC: Name Token ]
4164 * Values of type NMTOKEN must match the Nmtoken production; values
4165 * of type NMTOKENS must match Nmtokens.
4166 *
4167 * Returns the attribute type
4168 */
4169int
4170xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4171 SHRINK;
4172 if ((RAW == 'C') && (NXT(1) == 'D') &&
4173 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4174 (NXT(4) == 'A')) {
4175 SKIP(5);
4176 return(XML_ATTRIBUTE_CDATA);
4177 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4178 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4179 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4180 SKIP(6);
4181 return(XML_ATTRIBUTE_IDREFS);
4182 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4183 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4184 (NXT(4) == 'F')) {
4185 SKIP(5);
4186 return(XML_ATTRIBUTE_IDREF);
4187 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4188 SKIP(2);
4189 return(XML_ATTRIBUTE_ID);
4190 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4191 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4192 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4193 SKIP(6);
4194 return(XML_ATTRIBUTE_ENTITY);
4195 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4196 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4197 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4198 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4199 SKIP(8);
4200 return(XML_ATTRIBUTE_ENTITIES);
4201 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4202 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4203 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4204 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4205 SKIP(8);
4206 return(XML_ATTRIBUTE_NMTOKENS);
4207 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4208 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4209 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4210 (NXT(6) == 'N')) {
4211 SKIP(7);
4212 return(XML_ATTRIBUTE_NMTOKEN);
4213 }
4214 return(xmlParseEnumeratedType(ctxt, tree));
4215}
4216
4217/**
4218 * xmlParseAttributeListDecl:
4219 * @ctxt: an XML parser context
4220 *
4221 * : parse the Attribute list def for an element
4222 *
4223 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4224 *
4225 * [53] AttDef ::= S Name S AttType S DefaultDecl
4226 *
4227 */
4228void
4229xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4230 xmlChar *elemName;
4231 xmlChar *attrName;
4232 xmlEnumerationPtr tree;
4233
4234 if ((RAW == '<') && (NXT(1) == '!') &&
4235 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4236 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4237 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4238 (NXT(8) == 'T')) {
4239 xmlParserInputPtr input = ctxt->input;
4240
4241 SKIP(9);
4242 if (!IS_BLANK(CUR)) {
4243 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4245 ctxt->sax->error(ctxt->userData,
4246 "Space required after '<!ATTLIST'\n");
4247 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004248 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004249 }
4250 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004251 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004252 if (elemName == NULL) {
4253 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4254 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4255 ctxt->sax->error(ctxt->userData,
4256 "ATTLIST: no name for Element\n");
4257 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004258 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004259 return;
4260 }
4261 SKIP_BLANKS;
4262 GROW;
4263 while (RAW != '>') {
4264 const xmlChar *check = CUR_PTR;
4265 int type;
4266 int def;
4267 xmlChar *defaultValue = NULL;
4268
4269 GROW;
4270 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004271 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004272 if (attrName == NULL) {
4273 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4275 ctxt->sax->error(ctxt->userData,
4276 "ATTLIST: no name for Attribute\n");
4277 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004278 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004279 break;
4280 }
4281 GROW;
4282 if (!IS_BLANK(CUR)) {
4283 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4285 ctxt->sax->error(ctxt->userData,
4286 "Space required after the attribute name\n");
4287 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004288 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004289 if (attrName != NULL)
4290 xmlFree(attrName);
4291 if (defaultValue != NULL)
4292 xmlFree(defaultValue);
4293 break;
4294 }
4295 SKIP_BLANKS;
4296
4297 type = xmlParseAttributeType(ctxt, &tree);
4298 if (type <= 0) {
4299 if (attrName != NULL)
4300 xmlFree(attrName);
4301 if (defaultValue != NULL)
4302 xmlFree(defaultValue);
4303 break;
4304 }
4305
4306 GROW;
4307 if (!IS_BLANK(CUR)) {
4308 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4310 ctxt->sax->error(ctxt->userData,
4311 "Space required after the attribute type\n");
4312 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004313 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004314 if (attrName != NULL)
4315 xmlFree(attrName);
4316 if (defaultValue != NULL)
4317 xmlFree(defaultValue);
4318 if (tree != NULL)
4319 xmlFreeEnumeration(tree);
4320 break;
4321 }
4322 SKIP_BLANKS;
4323
4324 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4325 if (def <= 0) {
4326 if (attrName != NULL)
4327 xmlFree(attrName);
4328 if (defaultValue != NULL)
4329 xmlFree(defaultValue);
4330 if (tree != NULL)
4331 xmlFreeEnumeration(tree);
4332 break;
4333 }
4334
4335 GROW;
4336 if (RAW != '>') {
4337 if (!IS_BLANK(CUR)) {
4338 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4340 ctxt->sax->error(ctxt->userData,
4341 "Space required after the attribute default value\n");
4342 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004343 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004344 if (attrName != NULL)
4345 xmlFree(attrName);
4346 if (defaultValue != NULL)
4347 xmlFree(defaultValue);
4348 if (tree != NULL)
4349 xmlFreeEnumeration(tree);
4350 break;
4351 }
4352 SKIP_BLANKS;
4353 }
4354 if (check == CUR_PTR) {
4355 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4357 ctxt->sax->error(ctxt->userData,
4358 "xmlParseAttributeListDecl: detected internal error\n");
4359 if (attrName != NULL)
4360 xmlFree(attrName);
4361 if (defaultValue != NULL)
4362 xmlFree(defaultValue);
4363 if (tree != NULL)
4364 xmlFreeEnumeration(tree);
4365 break;
4366 }
4367 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4368 (ctxt->sax->attributeDecl != NULL))
4369 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4370 type, def, defaultValue, tree);
4371 if (attrName != NULL)
4372 xmlFree(attrName);
4373 if (defaultValue != NULL)
4374 xmlFree(defaultValue);
4375 GROW;
4376 }
4377 if (RAW == '>') {
4378 if (input != ctxt->input) {
4379 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4381 ctxt->sax->error(ctxt->userData,
4382"Attribute list declaration doesn't start and stop in the same entity\n");
4383 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004384 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004385 }
4386 NEXT;
4387 }
4388
4389 xmlFree(elemName);
4390 }
4391}
4392
4393/**
4394 * xmlParseElementMixedContentDecl:
4395 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004396 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004397 *
4398 * parse the declaration for a Mixed Element content
4399 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4400 *
4401 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4402 * '(' S? '#PCDATA' S? ')'
4403 *
4404 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4405 *
4406 * [ VC: No Duplicate Types ]
4407 * The same name must not appear more than once in a single
4408 * mixed-content declaration.
4409 *
4410 * returns: the list of the xmlElementContentPtr describing the element choices
4411 */
4412xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004413xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004414 xmlElementContentPtr ret = NULL, cur = NULL, n;
4415 xmlChar *elem = NULL;
4416
4417 GROW;
4418 if ((RAW == '#') && (NXT(1) == 'P') &&
4419 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4420 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4421 (NXT(6) == 'A')) {
4422 SKIP(7);
4423 SKIP_BLANKS;
4424 SHRINK;
4425 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004426 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4427 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4428 if (ctxt->vctxt.error != NULL)
4429 ctxt->vctxt.error(ctxt->vctxt.userData,
4430"Element content declaration doesn't start and stop in the same entity\n");
4431 ctxt->valid = 0;
4432 }
Owen Taylor3473f882001-02-23 17:55:21 +00004433 NEXT;
4434 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4435 if (RAW == '*') {
4436 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4437 NEXT;
4438 }
4439 return(ret);
4440 }
4441 if ((RAW == '(') || (RAW == '|')) {
4442 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4443 if (ret == NULL) return(NULL);
4444 }
4445 while (RAW == '|') {
4446 NEXT;
4447 if (elem == NULL) {
4448 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4449 if (ret == NULL) return(NULL);
4450 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004451 if (cur != NULL)
4452 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004453 cur = ret;
4454 } else {
4455 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4456 if (n == NULL) return(NULL);
4457 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004458 if (n->c1 != NULL)
4459 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004460 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004461 if (n != NULL)
4462 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004463 cur = n;
4464 xmlFree(elem);
4465 }
4466 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004467 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004468 if (elem == NULL) {
4469 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4471 ctxt->sax->error(ctxt->userData,
4472 "xmlParseElementMixedContentDecl : Name expected\n");
4473 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004474 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004475 xmlFreeElementContent(cur);
4476 return(NULL);
4477 }
4478 SKIP_BLANKS;
4479 GROW;
4480 }
4481 if ((RAW == ')') && (NXT(1) == '*')) {
4482 if (elem != NULL) {
4483 cur->c2 = xmlNewElementContent(elem,
4484 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004485 if (cur->c2 != NULL)
4486 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004487 xmlFree(elem);
4488 }
4489 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004490 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4491 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4492 if (ctxt->vctxt.error != NULL)
4493 ctxt->vctxt.error(ctxt->vctxt.userData,
4494"Element content declaration doesn't start and stop in the same entity\n");
4495 ctxt->valid = 0;
4496 }
Owen Taylor3473f882001-02-23 17:55:21 +00004497 SKIP(2);
4498 } else {
4499 if (elem != NULL) xmlFree(elem);
4500 xmlFreeElementContent(ret);
4501 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4503 ctxt->sax->error(ctxt->userData,
4504 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4505 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004506 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004507 return(NULL);
4508 }
4509
4510 } else {
4511 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4513 ctxt->sax->error(ctxt->userData,
4514 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4515 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004516 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004517 }
4518 return(ret);
4519}
4520
4521/**
4522 * xmlParseElementChildrenContentDecl:
4523 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004524 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004525 *
4526 * parse the declaration for a Mixed Element content
4527 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4528 *
4529 *
4530 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4531 *
4532 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4533 *
4534 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4535 *
4536 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4537 *
4538 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4539 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004540 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004541 * opening or closing parentheses in a choice, seq, or Mixed
4542 * construct is contained in the replacement text for a parameter
4543 * entity, both must be contained in the same replacement text. For
4544 * interoperability, if a parameter-entity reference appears in a
4545 * choice, seq, or Mixed construct, its replacement text should not
4546 * be empty, and neither the first nor last non-blank character of
4547 * the replacement text should be a connector (| or ,).
4548 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004549 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004550 * hierarchy.
4551 */
4552xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004553xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004554(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004555 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4556 xmlChar *elem;
4557 xmlChar type = 0;
4558
4559 SKIP_BLANKS;
4560 GROW;
4561 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004562 xmlParserInputPtr input = ctxt->input;
4563
Owen Taylor3473f882001-02-23 17:55:21 +00004564 /* Recurse on first child */
4565 NEXT;
4566 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004567 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004568 SKIP_BLANKS;
4569 GROW;
4570 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004571 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004572 if (elem == NULL) {
4573 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4575 ctxt->sax->error(ctxt->userData,
4576 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4577 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004578 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004579 return(NULL);
4580 }
4581 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4582 GROW;
4583 if (RAW == '?') {
4584 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4585 NEXT;
4586 } else if (RAW == '*') {
4587 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4588 NEXT;
4589 } else if (RAW == '+') {
4590 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4591 NEXT;
4592 } else {
4593 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4594 }
4595 xmlFree(elem);
4596 GROW;
4597 }
4598 SKIP_BLANKS;
4599 SHRINK;
4600 while (RAW != ')') {
4601 /*
4602 * Each loop we parse one separator and one element.
4603 */
4604 if (RAW == ',') {
4605 if (type == 0) type = CUR;
4606
4607 /*
4608 * Detect "Name | Name , Name" error
4609 */
4610 else if (type != CUR) {
4611 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4613 ctxt->sax->error(ctxt->userData,
4614 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4615 type);
4616 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004617 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004618 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004619 xmlFreeElementContent(last);
4620 if (ret != NULL)
4621 xmlFreeElementContent(ret);
4622 return(NULL);
4623 }
4624 NEXT;
4625
4626 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4627 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004628 if ((last != NULL) && (last != ret))
4629 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004630 xmlFreeElementContent(ret);
4631 return(NULL);
4632 }
4633 if (last == NULL) {
4634 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004635 if (ret != NULL)
4636 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004637 ret = cur = op;
4638 } else {
4639 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004640 if (op != NULL)
4641 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004642 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004643 if (last != NULL)
4644 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004645 cur =op;
4646 last = NULL;
4647 }
4648 } else if (RAW == '|') {
4649 if (type == 0) type = CUR;
4650
4651 /*
4652 * Detect "Name , Name | Name" error
4653 */
4654 else if (type != CUR) {
4655 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4657 ctxt->sax->error(ctxt->userData,
4658 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4659 type);
4660 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004661 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004662 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004663 xmlFreeElementContent(last);
4664 if (ret != NULL)
4665 xmlFreeElementContent(ret);
4666 return(NULL);
4667 }
4668 NEXT;
4669
4670 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4671 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004672 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004673 xmlFreeElementContent(last);
4674 if (ret != NULL)
4675 xmlFreeElementContent(ret);
4676 return(NULL);
4677 }
4678 if (last == NULL) {
4679 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004680 if (ret != NULL)
4681 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004682 ret = cur = op;
4683 } else {
4684 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004685 if (op != NULL)
4686 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004687 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004688 if (last != NULL)
4689 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004690 cur =op;
4691 last = NULL;
4692 }
4693 } else {
4694 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4696 ctxt->sax->error(ctxt->userData,
4697 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4698 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004699 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004700 if (ret != NULL)
4701 xmlFreeElementContent(ret);
4702 return(NULL);
4703 }
4704 GROW;
4705 SKIP_BLANKS;
4706 GROW;
4707 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004708 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004709 /* Recurse on second child */
4710 NEXT;
4711 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004712 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004713 SKIP_BLANKS;
4714 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004715 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004716 if (elem == NULL) {
4717 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4719 ctxt->sax->error(ctxt->userData,
4720 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4721 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004722 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004723 if (ret != NULL)
4724 xmlFreeElementContent(ret);
4725 return(NULL);
4726 }
4727 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4728 xmlFree(elem);
4729 if (RAW == '?') {
4730 last->ocur = XML_ELEMENT_CONTENT_OPT;
4731 NEXT;
4732 } else if (RAW == '*') {
4733 last->ocur = XML_ELEMENT_CONTENT_MULT;
4734 NEXT;
4735 } else if (RAW == '+') {
4736 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4737 NEXT;
4738 } else {
4739 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4740 }
4741 }
4742 SKIP_BLANKS;
4743 GROW;
4744 }
4745 if ((cur != NULL) && (last != NULL)) {
4746 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004747 if (last != NULL)
4748 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004749 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004750 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4751 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4752 if (ctxt->vctxt.error != NULL)
4753 ctxt->vctxt.error(ctxt->vctxt.userData,
4754"Element content declaration doesn't start and stop in the same entity\n");
4755 ctxt->valid = 0;
4756 }
Owen Taylor3473f882001-02-23 17:55:21 +00004757 NEXT;
4758 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004759 if (ret != NULL)
4760 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004761 NEXT;
4762 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004763 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004764 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004765 cur = ret;
4766 /*
4767 * Some normalization:
4768 * (a | b* | c?)* == (a | b | c)*
4769 */
4770 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4771 if ((cur->c1 != NULL) &&
4772 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4773 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4774 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4775 if ((cur->c2 != NULL) &&
4776 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4777 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4778 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4779 cur = cur->c2;
4780 }
4781 }
Owen Taylor3473f882001-02-23 17:55:21 +00004782 NEXT;
4783 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004784 if (ret != NULL) {
4785 int found = 0;
4786
Daniel Veillarde470df72001-04-18 21:41:07 +00004787 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004788 /*
4789 * Some normalization:
4790 * (a | b*)+ == (a | b)*
4791 * (a | b?)+ == (a | b)*
4792 */
4793 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4794 if ((cur->c1 != NULL) &&
4795 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4796 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4797 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4798 found = 1;
4799 }
4800 if ((cur->c2 != NULL) &&
4801 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4802 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4803 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4804 found = 1;
4805 }
4806 cur = cur->c2;
4807 }
4808 if (found)
4809 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4810 }
Owen Taylor3473f882001-02-23 17:55:21 +00004811 NEXT;
4812 }
4813 return(ret);
4814}
4815
4816/**
4817 * xmlParseElementContentDecl:
4818 * @ctxt: an XML parser context
4819 * @name: the name of the element being defined.
4820 * @result: the Element Content pointer will be stored here if any
4821 *
4822 * parse the declaration for an Element content either Mixed or Children,
4823 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4824 *
4825 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4826 *
4827 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4828 */
4829
4830int
4831xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4832 xmlElementContentPtr *result) {
4833
4834 xmlElementContentPtr tree = NULL;
4835 xmlParserInputPtr input = ctxt->input;
4836 int res;
4837
4838 *result = NULL;
4839
4840 if (RAW != '(') {
4841 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4843 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004844 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004845 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004846 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004847 return(-1);
4848 }
4849 NEXT;
4850 GROW;
4851 SKIP_BLANKS;
4852 if ((RAW == '#') && (NXT(1) == 'P') &&
4853 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4854 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4855 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004856 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004857 res = XML_ELEMENT_TYPE_MIXED;
4858 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004859 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004860 res = XML_ELEMENT_TYPE_ELEMENT;
4861 }
Owen Taylor3473f882001-02-23 17:55:21 +00004862 SKIP_BLANKS;
4863 *result = tree;
4864 return(res);
4865}
4866
4867/**
4868 * xmlParseElementDecl:
4869 * @ctxt: an XML parser context
4870 *
4871 * parse an Element declaration.
4872 *
4873 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4874 *
4875 * [ VC: Unique Element Type Declaration ]
4876 * No element type may be declared more than once
4877 *
4878 * Returns the type of the element, or -1 in case of error
4879 */
4880int
4881xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4882 xmlChar *name;
4883 int ret = -1;
4884 xmlElementContentPtr content = NULL;
4885
4886 GROW;
4887 if ((RAW == '<') && (NXT(1) == '!') &&
4888 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4889 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4890 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4891 (NXT(8) == 'T')) {
4892 xmlParserInputPtr input = ctxt->input;
4893
4894 SKIP(9);
4895 if (!IS_BLANK(CUR)) {
4896 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4898 ctxt->sax->error(ctxt->userData,
4899 "Space required after 'ELEMENT'\n");
4900 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004901 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004902 }
4903 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004904 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004905 if (name == NULL) {
4906 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4908 ctxt->sax->error(ctxt->userData,
4909 "xmlParseElementDecl: no name for Element\n");
4910 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004911 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004912 return(-1);
4913 }
4914 while ((RAW == 0) && (ctxt->inputNr > 1))
4915 xmlPopInput(ctxt);
4916 if (!IS_BLANK(CUR)) {
4917 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4919 ctxt->sax->error(ctxt->userData,
4920 "Space required after the element name\n");
4921 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004922 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004923 }
4924 SKIP_BLANKS;
4925 if ((RAW == 'E') && (NXT(1) == 'M') &&
4926 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4927 (NXT(4) == 'Y')) {
4928 SKIP(5);
4929 /*
4930 * Element must always be empty.
4931 */
4932 ret = XML_ELEMENT_TYPE_EMPTY;
4933 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4934 (NXT(2) == 'Y')) {
4935 SKIP(3);
4936 /*
4937 * Element is a generic container.
4938 */
4939 ret = XML_ELEMENT_TYPE_ANY;
4940 } else if (RAW == '(') {
4941 ret = xmlParseElementContentDecl(ctxt, name, &content);
4942 } else {
4943 /*
4944 * [ WFC: PEs in Internal Subset ] error handling.
4945 */
4946 if ((RAW == '%') && (ctxt->external == 0) &&
4947 (ctxt->inputNr == 1)) {
4948 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4950 ctxt->sax->error(ctxt->userData,
4951 "PEReference: forbidden within markup decl in internal subset\n");
4952 } else {
4953 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4955 ctxt->sax->error(ctxt->userData,
4956 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4957 }
4958 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004959 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004960 if (name != NULL) xmlFree(name);
4961 return(-1);
4962 }
4963
4964 SKIP_BLANKS;
4965 /*
4966 * Pop-up of finished entities.
4967 */
4968 while ((RAW == 0) && (ctxt->inputNr > 1))
4969 xmlPopInput(ctxt);
4970 SKIP_BLANKS;
4971
4972 if (RAW != '>') {
4973 ctxt->errNo = XML_ERR_GT_REQUIRED;
4974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4975 ctxt->sax->error(ctxt->userData,
4976 "xmlParseElementDecl: expected '>' at the end\n");
4977 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004978 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004979 } else {
4980 if (input != ctxt->input) {
4981 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4983 ctxt->sax->error(ctxt->userData,
4984"Element declaration doesn't start and stop in the same entity\n");
4985 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004986 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004987 }
4988
4989 NEXT;
4990 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4991 (ctxt->sax->elementDecl != NULL))
4992 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4993 content);
4994 }
4995 if (content != NULL) {
4996 xmlFreeElementContent(content);
4997 }
4998 if (name != NULL) {
4999 xmlFree(name);
5000 }
5001 }
5002 return(ret);
5003}
5004
5005/**
Owen Taylor3473f882001-02-23 17:55:21 +00005006 * xmlParseConditionalSections
5007 * @ctxt: an XML parser context
5008 *
5009 * [61] conditionalSect ::= includeSect | ignoreSect
5010 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5011 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5012 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5013 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5014 */
5015
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005016static void
Owen Taylor3473f882001-02-23 17:55:21 +00005017xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5018 SKIP(3);
5019 SKIP_BLANKS;
5020 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5021 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5022 (NXT(6) == 'E')) {
5023 SKIP(7);
5024 SKIP_BLANKS;
5025 if (RAW != '[') {
5026 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5028 ctxt->sax->error(ctxt->userData,
5029 "XML conditional section '[' expected\n");
5030 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005031 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005032 } else {
5033 NEXT;
5034 }
5035 if (xmlParserDebugEntities) {
5036 if ((ctxt->input != NULL) && (ctxt->input->filename))
5037 xmlGenericError(xmlGenericErrorContext,
5038 "%s(%d): ", ctxt->input->filename,
5039 ctxt->input->line);
5040 xmlGenericError(xmlGenericErrorContext,
5041 "Entering INCLUDE Conditional Section\n");
5042 }
5043
5044 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5045 (NXT(2) != '>'))) {
5046 const xmlChar *check = CUR_PTR;
5047 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005048
5049 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5050 xmlParseConditionalSections(ctxt);
5051 } else if (IS_BLANK(CUR)) {
5052 NEXT;
5053 } else if (RAW == '%') {
5054 xmlParsePEReference(ctxt);
5055 } else
5056 xmlParseMarkupDecl(ctxt);
5057
5058 /*
5059 * Pop-up of finished entities.
5060 */
5061 while ((RAW == 0) && (ctxt->inputNr > 1))
5062 xmlPopInput(ctxt);
5063
Daniel Veillardfdc91562002-07-01 21:52:03 +00005064 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005065 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5067 ctxt->sax->error(ctxt->userData,
5068 "Content error in the external subset\n");
5069 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005070 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005071 break;
5072 }
5073 }
5074 if (xmlParserDebugEntities) {
5075 if ((ctxt->input != NULL) && (ctxt->input->filename))
5076 xmlGenericError(xmlGenericErrorContext,
5077 "%s(%d): ", ctxt->input->filename,
5078 ctxt->input->line);
5079 xmlGenericError(xmlGenericErrorContext,
5080 "Leaving INCLUDE Conditional Section\n");
5081 }
5082
5083 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5084 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5085 int state;
5086 int instate;
5087 int depth = 0;
5088
5089 SKIP(6);
5090 SKIP_BLANKS;
5091 if (RAW != '[') {
5092 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5094 ctxt->sax->error(ctxt->userData,
5095 "XML conditional section '[' expected\n");
5096 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005097 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005098 } else {
5099 NEXT;
5100 }
5101 if (xmlParserDebugEntities) {
5102 if ((ctxt->input != NULL) && (ctxt->input->filename))
5103 xmlGenericError(xmlGenericErrorContext,
5104 "%s(%d): ", ctxt->input->filename,
5105 ctxt->input->line);
5106 xmlGenericError(xmlGenericErrorContext,
5107 "Entering IGNORE Conditional Section\n");
5108 }
5109
5110 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005111 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005112 * But disable SAX event generating DTD building in the meantime
5113 */
5114 state = ctxt->disableSAX;
5115 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005116 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005117 ctxt->instate = XML_PARSER_IGNORE;
5118
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005119 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005120 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5121 depth++;
5122 SKIP(3);
5123 continue;
5124 }
5125 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5126 if (--depth >= 0) SKIP(3);
5127 continue;
5128 }
5129 NEXT;
5130 continue;
5131 }
5132
5133 ctxt->disableSAX = state;
5134 ctxt->instate = instate;
5135
5136 if (xmlParserDebugEntities) {
5137 if ((ctxt->input != NULL) && (ctxt->input->filename))
5138 xmlGenericError(xmlGenericErrorContext,
5139 "%s(%d): ", ctxt->input->filename,
5140 ctxt->input->line);
5141 xmlGenericError(xmlGenericErrorContext,
5142 "Leaving IGNORE Conditional Section\n");
5143 }
5144
5145 } else {
5146 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5147 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5148 ctxt->sax->error(ctxt->userData,
5149 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5150 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005151 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005152 }
5153
5154 if (RAW == 0)
5155 SHRINK;
5156
5157 if (RAW == 0) {
5158 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5159 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5160 ctxt->sax->error(ctxt->userData,
5161 "XML conditional section not closed\n");
5162 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005163 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005164 } else {
5165 SKIP(3);
5166 }
5167}
5168
5169/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005170 * xmlParseMarkupDecl:
5171 * @ctxt: an XML parser context
5172 *
5173 * parse Markup declarations
5174 *
5175 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5176 * NotationDecl | PI | Comment
5177 *
5178 * [ VC: Proper Declaration/PE Nesting ]
5179 * Parameter-entity replacement text must be properly nested with
5180 * markup declarations. That is to say, if either the first character
5181 * or the last character of a markup declaration (markupdecl above) is
5182 * contained in the replacement text for a parameter-entity reference,
5183 * both must be contained in the same replacement text.
5184 *
5185 * [ WFC: PEs in Internal Subset ]
5186 * In the internal DTD subset, parameter-entity references can occur
5187 * only where markup declarations can occur, not within markup declarations.
5188 * (This does not apply to references that occur in external parameter
5189 * entities or to the external subset.)
5190 */
5191void
5192xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5193 GROW;
5194 xmlParseElementDecl(ctxt);
5195 xmlParseAttributeListDecl(ctxt);
5196 xmlParseEntityDecl(ctxt);
5197 xmlParseNotationDecl(ctxt);
5198 xmlParsePI(ctxt);
5199 xmlParseComment(ctxt);
5200 /*
5201 * This is only for internal subset. On external entities,
5202 * the replacement is done before parsing stage
5203 */
5204 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5205 xmlParsePEReference(ctxt);
5206
5207 /*
5208 * Conditional sections are allowed from entities included
5209 * by PE References in the internal subset.
5210 */
5211 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5212 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5213 xmlParseConditionalSections(ctxt);
5214 }
5215 }
5216
5217 ctxt->instate = XML_PARSER_DTD;
5218}
5219
5220/**
5221 * xmlParseTextDecl:
5222 * @ctxt: an XML parser context
5223 *
5224 * parse an XML declaration header for external entities
5225 *
5226 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5227 *
5228 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5229 */
5230
5231void
5232xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5233 xmlChar *version;
5234
5235 /*
5236 * We know that '<?xml' is here.
5237 */
5238 if ((RAW == '<') && (NXT(1) == '?') &&
5239 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5240 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5241 SKIP(5);
5242 } else {
5243 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5245 ctxt->sax->error(ctxt->userData,
5246 "Text declaration '<?xml' required\n");
5247 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005248 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005249
5250 return;
5251 }
5252
5253 if (!IS_BLANK(CUR)) {
5254 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5256 ctxt->sax->error(ctxt->userData,
5257 "Space needed after '<?xml'\n");
5258 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005259 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005260 }
5261 SKIP_BLANKS;
5262
5263 /*
5264 * We may have the VersionInfo here.
5265 */
5266 version = xmlParseVersionInfo(ctxt);
5267 if (version == NULL)
5268 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005269 else {
5270 if (!IS_BLANK(CUR)) {
5271 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5273 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5274 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005275 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005276 }
5277 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005278 ctxt->input->version = version;
5279
5280 /*
5281 * We must have the encoding declaration
5282 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005283 xmlParseEncodingDecl(ctxt);
5284 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5285 /*
5286 * The XML REC instructs us to stop parsing right here
5287 */
5288 return;
5289 }
5290
5291 SKIP_BLANKS;
5292 if ((RAW == '?') && (NXT(1) == '>')) {
5293 SKIP(2);
5294 } else if (RAW == '>') {
5295 /* Deprecated old WD ... */
5296 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5298 ctxt->sax->error(ctxt->userData,
5299 "XML declaration must end-up with '?>'\n");
5300 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005301 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005302 NEXT;
5303 } else {
5304 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5306 ctxt->sax->error(ctxt->userData,
5307 "parsing XML declaration: '?>' expected\n");
5308 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005309 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005310 MOVETO_ENDTAG(CUR_PTR);
5311 NEXT;
5312 }
5313}
5314
5315/**
Owen Taylor3473f882001-02-23 17:55:21 +00005316 * xmlParseExternalSubset:
5317 * @ctxt: an XML parser context
5318 * @ExternalID: the external identifier
5319 * @SystemID: the system identifier (or URL)
5320 *
5321 * parse Markup declarations from an external subset
5322 *
5323 * [30] extSubset ::= textDecl? extSubsetDecl
5324 *
5325 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5326 */
5327void
5328xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5329 const xmlChar *SystemID) {
5330 GROW;
5331 if ((RAW == '<') && (NXT(1) == '?') &&
5332 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5333 (NXT(4) == 'l')) {
5334 xmlParseTextDecl(ctxt);
5335 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5336 /*
5337 * The XML REC instructs us to stop parsing right here
5338 */
5339 ctxt->instate = XML_PARSER_EOF;
5340 return;
5341 }
5342 }
5343 if (ctxt->myDoc == NULL) {
5344 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5345 }
5346 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5347 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5348
5349 ctxt->instate = XML_PARSER_DTD;
5350 ctxt->external = 1;
5351 while (((RAW == '<') && (NXT(1) == '?')) ||
5352 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005353 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005354 const xmlChar *check = CUR_PTR;
5355 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005356
5357 GROW;
5358 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5359 xmlParseConditionalSections(ctxt);
5360 } else if (IS_BLANK(CUR)) {
5361 NEXT;
5362 } else if (RAW == '%') {
5363 xmlParsePEReference(ctxt);
5364 } else
5365 xmlParseMarkupDecl(ctxt);
5366
5367 /*
5368 * Pop-up of finished entities.
5369 */
5370 while ((RAW == 0) && (ctxt->inputNr > 1))
5371 xmlPopInput(ctxt);
5372
Daniel Veillardfdc91562002-07-01 21:52:03 +00005373 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005374 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5376 ctxt->sax->error(ctxt->userData,
5377 "Content error in the external subset\n");
5378 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005379 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005380 break;
5381 }
5382 }
5383
5384 if (RAW != 0) {
5385 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5387 ctxt->sax->error(ctxt->userData,
5388 "Extra content at the end of the document\n");
5389 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005390 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005391 }
5392
5393}
5394
5395/**
5396 * xmlParseReference:
5397 * @ctxt: an XML parser context
5398 *
5399 * parse and handle entity references in content, depending on the SAX
5400 * interface, this may end-up in a call to character() if this is a
5401 * CharRef, a predefined entity, if there is no reference() callback.
5402 * or if the parser was asked to switch to that mode.
5403 *
5404 * [67] Reference ::= EntityRef | CharRef
5405 */
5406void
5407xmlParseReference(xmlParserCtxtPtr ctxt) {
5408 xmlEntityPtr ent;
5409 xmlChar *val;
5410 if (RAW != '&') return;
5411
5412 if (NXT(1) == '#') {
5413 int i = 0;
5414 xmlChar out[10];
5415 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005416 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005417
5418 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5419 /*
5420 * So we are using non-UTF-8 buffers
5421 * Check that the char fit on 8bits, if not
5422 * generate a CharRef.
5423 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005424 if (value <= 0xFF) {
5425 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005426 out[1] = 0;
5427 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5428 (!ctxt->disableSAX))
5429 ctxt->sax->characters(ctxt->userData, out, 1);
5430 } else {
5431 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005432 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005433 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005434 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005435 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5436 (!ctxt->disableSAX))
5437 ctxt->sax->reference(ctxt->userData, out);
5438 }
5439 } else {
5440 /*
5441 * Just encode the value in UTF-8
5442 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005443 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005444 out[i] = 0;
5445 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5446 (!ctxt->disableSAX))
5447 ctxt->sax->characters(ctxt->userData, out, i);
5448 }
5449 } else {
5450 ent = xmlParseEntityRef(ctxt);
5451 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005452 if (!ctxt->wellFormed)
5453 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005454 if ((ent->name != NULL) &&
5455 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5456 xmlNodePtr list = NULL;
5457 int ret;
5458
5459
5460 /*
5461 * The first reference to the entity trigger a parsing phase
5462 * where the ent->children is filled with the result from
5463 * the parsing.
5464 */
5465 if (ent->children == NULL) {
5466 xmlChar *value;
5467 value = ent->content;
5468
5469 /*
5470 * Check that this entity is well formed
5471 */
5472 if ((value != NULL) &&
5473 (value[1] == 0) && (value[0] == '<') &&
5474 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5475 /*
5476 * DONE: get definite answer on this !!!
5477 * Lots of entity decls are used to declare a single
5478 * char
5479 * <!ENTITY lt "<">
5480 * Which seems to be valid since
5481 * 2.4: The ampersand character (&) and the left angle
5482 * bracket (<) may appear in their literal form only
5483 * when used ... They are also legal within the literal
5484 * entity value of an internal entity declaration;i
5485 * see "4.3.2 Well-Formed Parsed Entities".
5486 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5487 * Looking at the OASIS test suite and James Clark
5488 * tests, this is broken. However the XML REC uses
5489 * it. Is the XML REC not well-formed ????
5490 * This is a hack to avoid this problem
5491 *
5492 * ANSWER: since lt gt amp .. are already defined,
5493 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005494 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005495 * is lousy but acceptable.
5496 */
5497 list = xmlNewDocText(ctxt->myDoc, value);
5498 if (list != NULL) {
5499 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5500 (ent->children == NULL)) {
5501 ent->children = list;
5502 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005503 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005504 list->parent = (xmlNodePtr) ent;
5505 } else {
5506 xmlFreeNodeList(list);
5507 }
5508 } else if (list != NULL) {
5509 xmlFreeNodeList(list);
5510 }
5511 } else {
5512 /*
5513 * 4.3.2: An internal general parsed entity is well-formed
5514 * if its replacement text matches the production labeled
5515 * content.
5516 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005517
5518 void *user_data;
5519 /*
5520 * This is a bit hackish but this seems the best
5521 * way to make sure both SAX and DOM entity support
5522 * behaves okay.
5523 */
5524 if (ctxt->userData == ctxt)
5525 user_data = NULL;
5526 else
5527 user_data = ctxt->userData;
5528
Owen Taylor3473f882001-02-23 17:55:21 +00005529 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5530 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005531 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5532 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005533 ctxt->depth--;
5534 } else if (ent->etype ==
5535 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5536 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005537 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005538 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005539 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005540 ctxt->depth--;
5541 } else {
5542 ret = -1;
5543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5544 ctxt->sax->error(ctxt->userData,
5545 "Internal: invalid entity type\n");
5546 }
5547 if (ret == XML_ERR_ENTITY_LOOP) {
5548 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5550 ctxt->sax->error(ctxt->userData,
5551 "Detected entity reference loop\n");
5552 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005553 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005554 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005555 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005556 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5557 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005558 (ent->children == NULL)) {
5559 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005560 if (ctxt->replaceEntities) {
5561 /*
5562 * Prune it directly in the generated document
5563 * except for single text nodes.
5564 */
5565 if ((list->type == XML_TEXT_NODE) &&
5566 (list->next == NULL)) {
5567 list->parent = (xmlNodePtr) ent;
5568 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005569 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005570 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005571 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005572 while (list != NULL) {
5573 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005574 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005575 if (list->next == NULL)
5576 ent->last = list;
5577 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005578 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005579 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005580 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5581 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005582 }
5583 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005584 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005585 while (list != NULL) {
5586 list->parent = (xmlNodePtr) ent;
5587 if (list->next == NULL)
5588 ent->last = list;
5589 list = list->next;
5590 }
Owen Taylor3473f882001-02-23 17:55:21 +00005591 }
5592 } else {
5593 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005594 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005595 }
5596 } else if (ret > 0) {
5597 ctxt->errNo = ret;
5598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5599 ctxt->sax->error(ctxt->userData,
5600 "Entity value required\n");
5601 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005602 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005603 } else if (list != NULL) {
5604 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005605 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005606 }
5607 }
5608 }
5609 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5610 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5611 /*
5612 * Create a node.
5613 */
5614 ctxt->sax->reference(ctxt->userData, ent->name);
5615 return;
5616 } else if (ctxt->replaceEntities) {
5617 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5618 /*
5619 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005620 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005621 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005622 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005623 if ((list == NULL) && (ent->owner == 0)) {
5624 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005625 cur = ent->children;
5626 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005627 nw = xmlCopyNode(cur, 1);
5628 if (nw != NULL) {
5629 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005630 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005631 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005632 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005633 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005634 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005635 if (cur == ent->last)
5636 break;
5637 cur = cur->next;
5638 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005639 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005640 xmlAddEntityReference(ent, firstChild, nw);
5641 } else if (list == NULL) {
5642 xmlNodePtr nw = NULL, cur, next, last,
5643 firstChild = NULL;
5644 /*
5645 * Copy the entity child list and make it the new
5646 * entity child list. The goal is to make sure any
5647 * ID or REF referenced will be the one from the
5648 * document content and not the entity copy.
5649 */
5650 cur = ent->children;
5651 ent->children = NULL;
5652 last = ent->last;
5653 ent->last = NULL;
5654 while (cur != NULL) {
5655 next = cur->next;
5656 cur->next = NULL;
5657 cur->parent = NULL;
5658 nw = xmlCopyNode(cur, 1);
5659 if (nw != NULL) {
5660 nw->_private = cur->_private;
5661 if (firstChild == NULL){
5662 firstChild = cur;
5663 }
5664 xmlAddChild((xmlNodePtr) ent, nw);
5665 xmlAddChild(ctxt->node, cur);
5666 }
5667 if (cur == last)
5668 break;
5669 cur = next;
5670 }
5671 ent->owner = 1;
5672 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5673 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005674 } else {
5675 /*
5676 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005677 * node with a possible previous text one which
5678 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005679 */
5680 if (ent->children->type == XML_TEXT_NODE)
5681 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5682 if ((ent->last != ent->children) &&
5683 (ent->last->type == XML_TEXT_NODE))
5684 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5685 xmlAddChildList(ctxt->node, ent->children);
5686 }
5687
Owen Taylor3473f882001-02-23 17:55:21 +00005688 /*
5689 * This is to avoid a nasty side effect, see
5690 * characters() in SAX.c
5691 */
5692 ctxt->nodemem = 0;
5693 ctxt->nodelen = 0;
5694 return;
5695 } else {
5696 /*
5697 * Probably running in SAX mode
5698 */
5699 xmlParserInputPtr input;
5700
5701 input = xmlNewEntityInputStream(ctxt, ent);
5702 xmlPushInput(ctxt, input);
5703 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5704 (RAW == '<') && (NXT(1) == '?') &&
5705 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5706 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5707 xmlParseTextDecl(ctxt);
5708 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5709 /*
5710 * The XML REC instructs us to stop parsing right here
5711 */
5712 ctxt->instate = XML_PARSER_EOF;
5713 return;
5714 }
5715 if (input->standalone == 1) {
5716 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5718 ctxt->sax->error(ctxt->userData,
5719 "external parsed entities cannot be standalone\n");
5720 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005721 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005722 }
5723 }
5724 return;
5725 }
5726 }
5727 } else {
5728 val = ent->content;
5729 if (val == NULL) return;
5730 /*
5731 * inline the entity.
5732 */
5733 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5734 (!ctxt->disableSAX))
5735 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5736 }
5737 }
5738}
5739
5740/**
5741 * xmlParseEntityRef:
5742 * @ctxt: an XML parser context
5743 *
5744 * parse ENTITY references declarations
5745 *
5746 * [68] EntityRef ::= '&' Name ';'
5747 *
5748 * [ WFC: Entity Declared ]
5749 * In a document without any DTD, a document with only an internal DTD
5750 * subset which contains no parameter entity references, or a document
5751 * with "standalone='yes'", the Name given in the entity reference
5752 * must match that in an entity declaration, except that well-formed
5753 * documents need not declare any of the following entities: amp, lt,
5754 * gt, apos, quot. The declaration of a parameter entity must precede
5755 * any reference to it. Similarly, the declaration of a general entity
5756 * must precede any reference to it which appears in a default value in an
5757 * attribute-list declaration. Note that if entities are declared in the
5758 * external subset or in external parameter entities, a non-validating
5759 * processor is not obligated to read and process their declarations;
5760 * for such documents, the rule that an entity must be declared is a
5761 * well-formedness constraint only if standalone='yes'.
5762 *
5763 * [ WFC: Parsed Entity ]
5764 * An entity reference must not contain the name of an unparsed entity
5765 *
5766 * Returns the xmlEntityPtr if found, or NULL otherwise.
5767 */
5768xmlEntityPtr
5769xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5770 xmlChar *name;
5771 xmlEntityPtr ent = NULL;
5772
5773 GROW;
5774
5775 if (RAW == '&') {
5776 NEXT;
5777 name = xmlParseName(ctxt);
5778 if (name == NULL) {
5779 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5781 ctxt->sax->error(ctxt->userData,
5782 "xmlParseEntityRef: no name\n");
5783 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005784 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005785 } else {
5786 if (RAW == ';') {
5787 NEXT;
5788 /*
5789 * Ask first SAX for entity resolution, otherwise try the
5790 * predefined set.
5791 */
5792 if (ctxt->sax != NULL) {
5793 if (ctxt->sax->getEntity != NULL)
5794 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005795 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005796 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005797 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5798 (ctxt->userData==ctxt)) {
Daniel Veillard5997aca2002-03-18 18:36:20 +00005799 ent = getEntity(ctxt, name);
5800 }
Owen Taylor3473f882001-02-23 17:55:21 +00005801 }
5802 /*
5803 * [ WFC: Entity Declared ]
5804 * In a document without any DTD, a document with only an
5805 * internal DTD subset which contains no parameter entity
5806 * references, or a document with "standalone='yes'", the
5807 * Name given in the entity reference must match that in an
5808 * entity declaration, except that well-formed documents
5809 * need not declare any of the following entities: amp, lt,
5810 * gt, apos, quot.
5811 * The declaration of a parameter entity must precede any
5812 * reference to it.
5813 * Similarly, the declaration of a general entity must
5814 * precede any reference to it which appears in a default
5815 * value in an attribute-list declaration. Note that if
5816 * entities are declared in the external subset or in
5817 * external parameter entities, a non-validating processor
5818 * is not obligated to read and process their declarations;
5819 * for such documents, the rule that an entity must be
5820 * declared is a well-formedness constraint only if
5821 * standalone='yes'.
5822 */
5823 if (ent == NULL) {
5824 if ((ctxt->standalone == 1) ||
5825 ((ctxt->hasExternalSubset == 0) &&
5826 (ctxt->hasPErefs == 0))) {
5827 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5829 ctxt->sax->error(ctxt->userData,
5830 "Entity '%s' not defined\n", name);
5831 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005832 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005833 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005834 } else {
5835 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005837 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005838 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005839 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005840 }
5841 }
5842
5843 /*
5844 * [ WFC: Parsed Entity ]
5845 * An entity reference must not contain the name of an
5846 * unparsed entity
5847 */
5848 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5849 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5851 ctxt->sax->error(ctxt->userData,
5852 "Entity reference to unparsed entity %s\n", name);
5853 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005854 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005855 }
5856
5857 /*
5858 * [ WFC: No External Entity References ]
5859 * Attribute values cannot contain direct or indirect
5860 * entity references to external entities.
5861 */
5862 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5863 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5864 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5866 ctxt->sax->error(ctxt->userData,
5867 "Attribute references external entity '%s'\n", name);
5868 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005869 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005870 }
5871 /*
5872 * [ WFC: No < in Attribute Values ]
5873 * The replacement text of any entity referred to directly or
5874 * indirectly in an attribute value (other than "&lt;") must
5875 * not contain a <.
5876 */
5877 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5878 (ent != NULL) &&
5879 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5880 (ent->content != NULL) &&
5881 (xmlStrchr(ent->content, '<'))) {
5882 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5884 ctxt->sax->error(ctxt->userData,
5885 "'<' in entity '%s' is not allowed in attributes values\n", name);
5886 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005887 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005888 }
5889
5890 /*
5891 * Internal check, no parameter entities here ...
5892 */
5893 else {
5894 switch (ent->etype) {
5895 case XML_INTERNAL_PARAMETER_ENTITY:
5896 case XML_EXTERNAL_PARAMETER_ENTITY:
5897 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5899 ctxt->sax->error(ctxt->userData,
5900 "Attempt to reference the parameter entity '%s'\n", name);
5901 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005902 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005903 break;
5904 default:
5905 break;
5906 }
5907 }
5908
5909 /*
5910 * [ WFC: No Recursion ]
5911 * A parsed entity must not contain a recursive reference
5912 * to itself, either directly or indirectly.
5913 * Done somewhere else
5914 */
5915
5916 } else {
5917 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5919 ctxt->sax->error(ctxt->userData,
5920 "xmlParseEntityRef: expecting ';'\n");
5921 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005922 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005923 }
5924 xmlFree(name);
5925 }
5926 }
5927 return(ent);
5928}
5929
5930/**
5931 * xmlParseStringEntityRef:
5932 * @ctxt: an XML parser context
5933 * @str: a pointer to an index in the string
5934 *
5935 * parse ENTITY references declarations, but this version parses it from
5936 * a string value.
5937 *
5938 * [68] EntityRef ::= '&' Name ';'
5939 *
5940 * [ WFC: Entity Declared ]
5941 * In a document without any DTD, a document with only an internal DTD
5942 * subset which contains no parameter entity references, or a document
5943 * with "standalone='yes'", the Name given in the entity reference
5944 * must match that in an entity declaration, except that well-formed
5945 * documents need not declare any of the following entities: amp, lt,
5946 * gt, apos, quot. The declaration of a parameter entity must precede
5947 * any reference to it. Similarly, the declaration of a general entity
5948 * must precede any reference to it which appears in a default value in an
5949 * attribute-list declaration. Note that if entities are declared in the
5950 * external subset or in external parameter entities, a non-validating
5951 * processor is not obligated to read and process their declarations;
5952 * for such documents, the rule that an entity must be declared is a
5953 * well-formedness constraint only if standalone='yes'.
5954 *
5955 * [ WFC: Parsed Entity ]
5956 * An entity reference must not contain the name of an unparsed entity
5957 *
5958 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5959 * is updated to the current location in the string.
5960 */
5961xmlEntityPtr
5962xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5963 xmlChar *name;
5964 const xmlChar *ptr;
5965 xmlChar cur;
5966 xmlEntityPtr ent = NULL;
5967
5968 if ((str == NULL) || (*str == NULL))
5969 return(NULL);
5970 ptr = *str;
5971 cur = *ptr;
5972 if (cur == '&') {
5973 ptr++;
5974 cur = *ptr;
5975 name = xmlParseStringName(ctxt, &ptr);
5976 if (name == NULL) {
5977 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5979 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005980 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005981 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005982 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005983 } else {
5984 if (*ptr == ';') {
5985 ptr++;
5986 /*
5987 * Ask first SAX for entity resolution, otherwise try the
5988 * predefined set.
5989 */
5990 if (ctxt->sax != NULL) {
5991 if (ctxt->sax->getEntity != NULL)
5992 ent = ctxt->sax->getEntity(ctxt->userData, name);
5993 if (ent == NULL)
5994 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005995 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5996 ent = getEntity(ctxt, name);
5997 }
Owen Taylor3473f882001-02-23 17:55:21 +00005998 }
5999 /*
6000 * [ WFC: Entity Declared ]
6001 * In a document without any DTD, a document with only an
6002 * internal DTD subset which contains no parameter entity
6003 * references, or a document with "standalone='yes'", the
6004 * Name given in the entity reference must match that in an
6005 * entity declaration, except that well-formed documents
6006 * need not declare any of the following entities: amp, lt,
6007 * gt, apos, quot.
6008 * The declaration of a parameter entity must precede any
6009 * reference to it.
6010 * Similarly, the declaration of a general entity must
6011 * precede any reference to it which appears in a default
6012 * value in an attribute-list declaration. Note that if
6013 * entities are declared in the external subset or in
6014 * external parameter entities, a non-validating processor
6015 * is not obligated to read and process their declarations;
6016 * for such documents, the rule that an entity must be
6017 * declared is a well-formedness constraint only if
6018 * standalone='yes'.
6019 */
6020 if (ent == NULL) {
6021 if ((ctxt->standalone == 1) ||
6022 ((ctxt->hasExternalSubset == 0) &&
6023 (ctxt->hasPErefs == 0))) {
6024 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6026 ctxt->sax->error(ctxt->userData,
6027 "Entity '%s' not defined\n", name);
6028 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006029 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006030 } else {
6031 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6032 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6033 ctxt->sax->warning(ctxt->userData,
6034 "Entity '%s' not defined\n", name);
6035 }
6036 }
6037
6038 /*
6039 * [ WFC: Parsed Entity ]
6040 * An entity reference must not contain the name of an
6041 * unparsed entity
6042 */
6043 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6044 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6046 ctxt->sax->error(ctxt->userData,
6047 "Entity reference to unparsed entity %s\n", name);
6048 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006049 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006050 }
6051
6052 /*
6053 * [ WFC: No External Entity References ]
6054 * Attribute values cannot contain direct or indirect
6055 * entity references to external entities.
6056 */
6057 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6058 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6059 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6061 ctxt->sax->error(ctxt->userData,
6062 "Attribute references external entity '%s'\n", name);
6063 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006064 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006065 }
6066 /*
6067 * [ WFC: No < in Attribute Values ]
6068 * The replacement text of any entity referred to directly or
6069 * indirectly in an attribute value (other than "&lt;") must
6070 * not contain a <.
6071 */
6072 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6073 (ent != NULL) &&
6074 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6075 (ent->content != NULL) &&
6076 (xmlStrchr(ent->content, '<'))) {
6077 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6079 ctxt->sax->error(ctxt->userData,
6080 "'<' in entity '%s' is not allowed in attributes values\n", name);
6081 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006082 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006083 }
6084
6085 /*
6086 * Internal check, no parameter entities here ...
6087 */
6088 else {
6089 switch (ent->etype) {
6090 case XML_INTERNAL_PARAMETER_ENTITY:
6091 case XML_EXTERNAL_PARAMETER_ENTITY:
6092 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6094 ctxt->sax->error(ctxt->userData,
6095 "Attempt to reference the parameter entity '%s'\n", name);
6096 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006097 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006098 break;
6099 default:
6100 break;
6101 }
6102 }
6103
6104 /*
6105 * [ WFC: No Recursion ]
6106 * A parsed entity must not contain a recursive reference
6107 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006108 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006109 */
6110
6111 } else {
6112 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6114 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006115 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006116 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006117 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006118 }
6119 xmlFree(name);
6120 }
6121 }
6122 *str = ptr;
6123 return(ent);
6124}
6125
6126/**
6127 * xmlParsePEReference:
6128 * @ctxt: an XML parser context
6129 *
6130 * parse PEReference declarations
6131 * The entity content is handled directly by pushing it's content as
6132 * a new input stream.
6133 *
6134 * [69] PEReference ::= '%' Name ';'
6135 *
6136 * [ WFC: No Recursion ]
6137 * A parsed entity must not contain a recursive
6138 * reference to itself, either directly or indirectly.
6139 *
6140 * [ WFC: Entity Declared ]
6141 * In a document without any DTD, a document with only an internal DTD
6142 * subset which contains no parameter entity references, or a document
6143 * with "standalone='yes'", ... ... The declaration of a parameter
6144 * entity must precede any reference to it...
6145 *
6146 * [ VC: Entity Declared ]
6147 * In a document with an external subset or external parameter entities
6148 * with "standalone='no'", ... ... The declaration of a parameter entity
6149 * must precede any reference to it...
6150 *
6151 * [ WFC: In DTD ]
6152 * Parameter-entity references may only appear in the DTD.
6153 * NOTE: misleading but this is handled.
6154 */
6155void
6156xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6157 xmlChar *name;
6158 xmlEntityPtr entity = NULL;
6159 xmlParserInputPtr input;
6160
6161 if (RAW == '%') {
6162 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006163 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006164 if (name == NULL) {
6165 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6167 ctxt->sax->error(ctxt->userData,
6168 "xmlParsePEReference: no name\n");
6169 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006170 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006171 } else {
6172 if (RAW == ';') {
6173 NEXT;
6174 if ((ctxt->sax != NULL) &&
6175 (ctxt->sax->getParameterEntity != NULL))
6176 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6177 name);
6178 if (entity == NULL) {
6179 /*
6180 * [ WFC: Entity Declared ]
6181 * In a document without any DTD, a document with only an
6182 * internal DTD subset which contains no parameter entity
6183 * references, or a document with "standalone='yes'", ...
6184 * ... The declaration of a parameter entity must precede
6185 * any reference to it...
6186 */
6187 if ((ctxt->standalone == 1) ||
6188 ((ctxt->hasExternalSubset == 0) &&
6189 (ctxt->hasPErefs == 0))) {
6190 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6191 if ((!ctxt->disableSAX) &&
6192 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6193 ctxt->sax->error(ctxt->userData,
6194 "PEReference: %%%s; not found\n", name);
6195 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006196 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006197 } else {
6198 /*
6199 * [ VC: Entity Declared ]
6200 * In a document with an external subset or external
6201 * parameter entities with "standalone='no'", ...
6202 * ... The declaration of a parameter entity must precede
6203 * any reference to it...
6204 */
6205 if ((!ctxt->disableSAX) &&
6206 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6207 ctxt->sax->warning(ctxt->userData,
6208 "PEReference: %%%s; not found\n", name);
6209 ctxt->valid = 0;
6210 }
6211 } else {
6212 /*
6213 * Internal checking in case the entity quest barfed
6214 */
6215 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6216 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6217 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6218 ctxt->sax->warning(ctxt->userData,
6219 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006220 } else if (ctxt->input->free != deallocblankswrapper) {
6221 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6222 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006223 } else {
6224 /*
6225 * TODO !!!
6226 * handle the extra spaces added before and after
6227 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6228 */
6229 input = xmlNewEntityInputStream(ctxt, entity);
6230 xmlPushInput(ctxt, input);
6231 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6232 (RAW == '<') && (NXT(1) == '?') &&
6233 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6234 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6235 xmlParseTextDecl(ctxt);
6236 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6237 /*
6238 * The XML REC instructs us to stop parsing
6239 * right here
6240 */
6241 ctxt->instate = XML_PARSER_EOF;
6242 xmlFree(name);
6243 return;
6244 }
6245 }
Owen Taylor3473f882001-02-23 17:55:21 +00006246 }
6247 }
6248 ctxt->hasPErefs = 1;
6249 } else {
6250 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6251 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6252 ctxt->sax->error(ctxt->userData,
6253 "xmlParsePEReference: expecting ';'\n");
6254 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006255 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006256 }
6257 xmlFree(name);
6258 }
6259 }
6260}
6261
6262/**
6263 * xmlParseStringPEReference:
6264 * @ctxt: an XML parser context
6265 * @str: a pointer to an index in the string
6266 *
6267 * parse PEReference declarations
6268 *
6269 * [69] PEReference ::= '%' Name ';'
6270 *
6271 * [ WFC: No Recursion ]
6272 * A parsed entity must not contain a recursive
6273 * reference to itself, either directly or indirectly.
6274 *
6275 * [ WFC: Entity Declared ]
6276 * In a document without any DTD, a document with only an internal DTD
6277 * subset which contains no parameter entity references, or a document
6278 * with "standalone='yes'", ... ... The declaration of a parameter
6279 * entity must precede any reference to it...
6280 *
6281 * [ VC: Entity Declared ]
6282 * In a document with an external subset or external parameter entities
6283 * with "standalone='no'", ... ... The declaration of a parameter entity
6284 * must precede any reference to it...
6285 *
6286 * [ WFC: In DTD ]
6287 * Parameter-entity references may only appear in the DTD.
6288 * NOTE: misleading but this is handled.
6289 *
6290 * Returns the string of the entity content.
6291 * str is updated to the current value of the index
6292 */
6293xmlEntityPtr
6294xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6295 const xmlChar *ptr;
6296 xmlChar cur;
6297 xmlChar *name;
6298 xmlEntityPtr entity = NULL;
6299
6300 if ((str == NULL) || (*str == NULL)) return(NULL);
6301 ptr = *str;
6302 cur = *ptr;
6303 if (cur == '%') {
6304 ptr++;
6305 cur = *ptr;
6306 name = xmlParseStringName(ctxt, &ptr);
6307 if (name == NULL) {
6308 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6310 ctxt->sax->error(ctxt->userData,
6311 "xmlParseStringPEReference: no name\n");
6312 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006313 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006314 } else {
6315 cur = *ptr;
6316 if (cur == ';') {
6317 ptr++;
6318 cur = *ptr;
6319 if ((ctxt->sax != NULL) &&
6320 (ctxt->sax->getParameterEntity != NULL))
6321 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6322 name);
6323 if (entity == NULL) {
6324 /*
6325 * [ WFC: Entity Declared ]
6326 * In a document without any DTD, a document with only an
6327 * internal DTD subset which contains no parameter entity
6328 * references, or a document with "standalone='yes'", ...
6329 * ... The declaration of a parameter entity must precede
6330 * any reference to it...
6331 */
6332 if ((ctxt->standalone == 1) ||
6333 ((ctxt->hasExternalSubset == 0) &&
6334 (ctxt->hasPErefs == 0))) {
6335 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6337 ctxt->sax->error(ctxt->userData,
6338 "PEReference: %%%s; not found\n", name);
6339 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006340 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006341 } else {
6342 /*
6343 * [ VC: Entity Declared ]
6344 * In a document with an external subset or external
6345 * parameter entities with "standalone='no'", ...
6346 * ... The declaration of a parameter entity must
6347 * precede any reference to it...
6348 */
6349 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6350 ctxt->sax->warning(ctxt->userData,
6351 "PEReference: %%%s; not found\n", name);
6352 ctxt->valid = 0;
6353 }
6354 } else {
6355 /*
6356 * Internal checking in case the entity quest barfed
6357 */
6358 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6359 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6360 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6361 ctxt->sax->warning(ctxt->userData,
6362 "Internal: %%%s; is not a parameter entity\n", name);
6363 }
6364 }
6365 ctxt->hasPErefs = 1;
6366 } else {
6367 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6369 ctxt->sax->error(ctxt->userData,
6370 "xmlParseStringPEReference: expecting ';'\n");
6371 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006372 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006373 }
6374 xmlFree(name);
6375 }
6376 }
6377 *str = ptr;
6378 return(entity);
6379}
6380
6381/**
6382 * xmlParseDocTypeDecl:
6383 * @ctxt: an XML parser context
6384 *
6385 * parse a DOCTYPE declaration
6386 *
6387 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6388 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6389 *
6390 * [ VC: Root Element Type ]
6391 * The Name in the document type declaration must match the element
6392 * type of the root element.
6393 */
6394
6395void
6396xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6397 xmlChar *name = NULL;
6398 xmlChar *ExternalID = NULL;
6399 xmlChar *URI = NULL;
6400
6401 /*
6402 * We know that '<!DOCTYPE' has been detected.
6403 */
6404 SKIP(9);
6405
6406 SKIP_BLANKS;
6407
6408 /*
6409 * Parse the DOCTYPE name.
6410 */
6411 name = xmlParseName(ctxt);
6412 if (name == NULL) {
6413 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6415 ctxt->sax->error(ctxt->userData,
6416 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6417 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006418 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006419 }
6420 ctxt->intSubName = name;
6421
6422 SKIP_BLANKS;
6423
6424 /*
6425 * Check for SystemID and ExternalID
6426 */
6427 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6428
6429 if ((URI != NULL) || (ExternalID != NULL)) {
6430 ctxt->hasExternalSubset = 1;
6431 }
6432 ctxt->extSubURI = URI;
6433 ctxt->extSubSystem = ExternalID;
6434
6435 SKIP_BLANKS;
6436
6437 /*
6438 * Create and update the internal subset.
6439 */
6440 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6441 (!ctxt->disableSAX))
6442 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6443
6444 /*
6445 * Is there any internal subset declarations ?
6446 * they are handled separately in xmlParseInternalSubset()
6447 */
6448 if (RAW == '[')
6449 return;
6450
6451 /*
6452 * We should be at the end of the DOCTYPE declaration.
6453 */
6454 if (RAW != '>') {
6455 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006457 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006458 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006459 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006460 }
6461 NEXT;
6462}
6463
6464/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006465 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006466 * @ctxt: an XML parser context
6467 *
6468 * parse the internal subset declaration
6469 *
6470 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6471 */
6472
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006473static void
Owen Taylor3473f882001-02-23 17:55:21 +00006474xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6475 /*
6476 * Is there any DTD definition ?
6477 */
6478 if (RAW == '[') {
6479 ctxt->instate = XML_PARSER_DTD;
6480 NEXT;
6481 /*
6482 * Parse the succession of Markup declarations and
6483 * PEReferences.
6484 * Subsequence (markupdecl | PEReference | S)*
6485 */
6486 while (RAW != ']') {
6487 const xmlChar *check = CUR_PTR;
6488 int cons = ctxt->input->consumed;
6489
6490 SKIP_BLANKS;
6491 xmlParseMarkupDecl(ctxt);
6492 xmlParsePEReference(ctxt);
6493
6494 /*
6495 * Pop-up of finished entities.
6496 */
6497 while ((RAW == 0) && (ctxt->inputNr > 1))
6498 xmlPopInput(ctxt);
6499
6500 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6501 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6503 ctxt->sax->error(ctxt->userData,
6504 "xmlParseInternalSubset: error detected in Markup declaration\n");
6505 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006506 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006507 break;
6508 }
6509 }
6510 if (RAW == ']') {
6511 NEXT;
6512 SKIP_BLANKS;
6513 }
6514 }
6515
6516 /*
6517 * We should be at the end of the DOCTYPE declaration.
6518 */
6519 if (RAW != '>') {
6520 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006522 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006523 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006524 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006525 }
6526 NEXT;
6527}
6528
6529/**
6530 * xmlParseAttribute:
6531 * @ctxt: an XML parser context
6532 * @value: a xmlChar ** used to store the value of the attribute
6533 *
6534 * parse an attribute
6535 *
6536 * [41] Attribute ::= Name Eq AttValue
6537 *
6538 * [ WFC: No External Entity References ]
6539 * Attribute values cannot contain direct or indirect entity references
6540 * to external entities.
6541 *
6542 * [ WFC: No < in Attribute Values ]
6543 * The replacement text of any entity referred to directly or indirectly in
6544 * an attribute value (other than "&lt;") must not contain a <.
6545 *
6546 * [ VC: Attribute Value Type ]
6547 * The attribute must have been declared; the value must be of the type
6548 * declared for it.
6549 *
6550 * [25] Eq ::= S? '=' S?
6551 *
6552 * With namespace:
6553 *
6554 * [NS 11] Attribute ::= QName Eq AttValue
6555 *
6556 * Also the case QName == xmlns:??? is handled independently as a namespace
6557 * definition.
6558 *
6559 * Returns the attribute name, and the value in *value.
6560 */
6561
6562xmlChar *
6563xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6564 xmlChar *name, *val;
6565
6566 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006567 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006568 name = xmlParseName(ctxt);
6569 if (name == NULL) {
6570 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6572 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6573 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006574 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006575 return(NULL);
6576 }
6577
6578 /*
6579 * read the value
6580 */
6581 SKIP_BLANKS;
6582 if (RAW == '=') {
6583 NEXT;
6584 SKIP_BLANKS;
6585 val = xmlParseAttValue(ctxt);
6586 ctxt->instate = XML_PARSER_CONTENT;
6587 } else {
6588 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6590 ctxt->sax->error(ctxt->userData,
6591 "Specification mandate value for attribute %s\n", name);
6592 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006593 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006594 xmlFree(name);
6595 return(NULL);
6596 }
6597
6598 /*
6599 * Check that xml:lang conforms to the specification
6600 * No more registered as an error, just generate a warning now
6601 * since this was deprecated in XML second edition
6602 */
6603 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6604 if (!xmlCheckLanguageID(val)) {
6605 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6606 ctxt->sax->warning(ctxt->userData,
6607 "Malformed value for xml:lang : %s\n", val);
6608 }
6609 }
6610
6611 /*
6612 * Check that xml:space conforms to the specification
6613 */
6614 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6615 if (xmlStrEqual(val, BAD_CAST "default"))
6616 *(ctxt->space) = 0;
6617 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6618 *(ctxt->space) = 1;
6619 else {
6620 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6622 ctxt->sax->error(ctxt->userData,
6623"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6624 val);
6625 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006626 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006627 }
6628 }
6629
6630 *value = val;
6631 return(name);
6632}
6633
6634/**
6635 * xmlParseStartTag:
6636 * @ctxt: an XML parser context
6637 *
6638 * parse a start of tag either for rule element or
6639 * EmptyElement. In both case we don't parse the tag closing chars.
6640 *
6641 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6642 *
6643 * [ WFC: Unique Att Spec ]
6644 * No attribute name may appear more than once in the same start-tag or
6645 * empty-element tag.
6646 *
6647 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6648 *
6649 * [ WFC: Unique Att Spec ]
6650 * No attribute name may appear more than once in the same start-tag or
6651 * empty-element tag.
6652 *
6653 * With namespace:
6654 *
6655 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6656 *
6657 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6658 *
6659 * Returns the element name parsed
6660 */
6661
6662xmlChar *
6663xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6664 xmlChar *name;
6665 xmlChar *attname;
6666 xmlChar *attvalue;
6667 const xmlChar **atts = NULL;
6668 int nbatts = 0;
6669 int maxatts = 0;
6670 int i;
6671
6672 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006673 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006674
6675 name = xmlParseName(ctxt);
6676 if (name == NULL) {
6677 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6679 ctxt->sax->error(ctxt->userData,
6680 "xmlParseStartTag: invalid element name\n");
6681 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006682 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006683 return(NULL);
6684 }
6685
6686 /*
6687 * Now parse the attributes, it ends up with the ending
6688 *
6689 * (S Attribute)* S?
6690 */
6691 SKIP_BLANKS;
6692 GROW;
6693
Daniel Veillard21a0f912001-02-25 19:54:14 +00006694 while ((RAW != '>') &&
6695 ((RAW != '/') || (NXT(1) != '>')) &&
6696 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006697 const xmlChar *q = CUR_PTR;
6698 int cons = ctxt->input->consumed;
6699
6700 attname = xmlParseAttribute(ctxt, &attvalue);
6701 if ((attname != NULL) && (attvalue != NULL)) {
6702 /*
6703 * [ WFC: Unique Att Spec ]
6704 * No attribute name may appear more than once in the same
6705 * start-tag or empty-element tag.
6706 */
6707 for (i = 0; i < nbatts;i += 2) {
6708 if (xmlStrEqual(atts[i], attname)) {
6709 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6711 ctxt->sax->error(ctxt->userData,
6712 "Attribute %s redefined\n",
6713 attname);
6714 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006715 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006716 xmlFree(attname);
6717 xmlFree(attvalue);
6718 goto failed;
6719 }
6720 }
6721
6722 /*
6723 * Add the pair to atts
6724 */
6725 if (atts == NULL) {
6726 maxatts = 10;
6727 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6728 if (atts == NULL) {
6729 xmlGenericError(xmlGenericErrorContext,
6730 "malloc of %ld byte failed\n",
6731 maxatts * (long)sizeof(xmlChar *));
6732 return(NULL);
6733 }
6734 } else if (nbatts + 4 > maxatts) {
6735 maxatts *= 2;
6736 atts = (const xmlChar **) xmlRealloc((void *) atts,
6737 maxatts * sizeof(xmlChar *));
6738 if (atts == NULL) {
6739 xmlGenericError(xmlGenericErrorContext,
6740 "realloc of %ld byte failed\n",
6741 maxatts * (long)sizeof(xmlChar *));
6742 return(NULL);
6743 }
6744 }
6745 atts[nbatts++] = attname;
6746 atts[nbatts++] = attvalue;
6747 atts[nbatts] = NULL;
6748 atts[nbatts + 1] = NULL;
6749 } else {
6750 if (attname != NULL)
6751 xmlFree(attname);
6752 if (attvalue != NULL)
6753 xmlFree(attvalue);
6754 }
6755
6756failed:
6757
Daniel Veillard3772de32002-12-17 10:31:45 +00006758 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006759 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6760 break;
6761 if (!IS_BLANK(RAW)) {
6762 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6764 ctxt->sax->error(ctxt->userData,
6765 "attributes construct error\n");
6766 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006767 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006768 }
6769 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006770 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6771 (attname == NULL) && (attvalue == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006772 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6774 ctxt->sax->error(ctxt->userData,
6775 "xmlParseStartTag: problem parsing attributes\n");
6776 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006777 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006778 break;
6779 }
6780 GROW;
6781 }
6782
6783 /*
6784 * SAX: Start of Element !
6785 */
6786 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6787 (!ctxt->disableSAX))
6788 ctxt->sax->startElement(ctxt->userData, name, atts);
6789
6790 if (atts != NULL) {
6791 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6792 xmlFree((void *) atts);
6793 }
6794 return(name);
6795}
6796
6797/**
6798 * xmlParseEndTag:
6799 * @ctxt: an XML parser context
6800 *
6801 * parse an end of tag
6802 *
6803 * [42] ETag ::= '</' Name S? '>'
6804 *
6805 * With namespace
6806 *
6807 * [NS 9] ETag ::= '</' QName S? '>'
6808 */
6809
6810void
6811xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6812 xmlChar *name;
6813 xmlChar *oldname;
6814
6815 GROW;
6816 if ((RAW != '<') || (NXT(1) != '/')) {
6817 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6819 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6820 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006821 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006822 return;
6823 }
6824 SKIP(2);
6825
Daniel Veillard46de64e2002-05-29 08:21:33 +00006826 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006827
6828 /*
6829 * We should definitely be at the ending "S? '>'" part
6830 */
6831 GROW;
6832 SKIP_BLANKS;
6833 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6834 ctxt->errNo = XML_ERR_GT_REQUIRED;
6835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6836 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6837 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006838 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006839 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006840 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006841
6842 /*
6843 * [ WFC: Element Type Match ]
6844 * The Name in an element's end-tag must match the element type in the
6845 * start-tag.
6846 *
6847 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006848 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006849 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006851 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006852 ctxt->sax->error(ctxt->userData,
6853 "Opening and ending tag mismatch: %s and %s\n",
6854 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006855 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006856 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006857 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006858 }
6859
6860 }
6861 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006862 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6863#if 0
6864 else {
6865 /*
6866 * Recover in case of one missing close
6867 */
6868 if ((ctxt->nameNr > 2) &&
6869 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6870 namePop(ctxt);
6871 spacePop(ctxt);
6872 }
6873 }
6874#endif
6875 if (name != NULL)
6876 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006877 }
6878
6879 /*
6880 * SAX: End of Tag
6881 */
6882 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6883 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006884 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006885
Owen Taylor3473f882001-02-23 17:55:21 +00006886 oldname = namePop(ctxt);
6887 spacePop(ctxt);
6888 if (oldname != NULL) {
6889#ifdef DEBUG_STACK
6890 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6891#endif
6892 xmlFree(oldname);
6893 }
6894 return;
6895}
6896
6897/**
6898 * xmlParseCDSect:
6899 * @ctxt: an XML parser context
6900 *
6901 * Parse escaped pure raw content.
6902 *
6903 * [18] CDSect ::= CDStart CData CDEnd
6904 *
6905 * [19] CDStart ::= '<![CDATA['
6906 *
6907 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6908 *
6909 * [21] CDEnd ::= ']]>'
6910 */
6911void
6912xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6913 xmlChar *buf = NULL;
6914 int len = 0;
6915 int size = XML_PARSER_BUFFER_SIZE;
6916 int r, rl;
6917 int s, sl;
6918 int cur, l;
6919 int count = 0;
6920
6921 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6922 (NXT(2) == '[') && (NXT(3) == 'C') &&
6923 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6924 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6925 (NXT(8) == '[')) {
6926 SKIP(9);
6927 } else
6928 return;
6929
6930 ctxt->instate = XML_PARSER_CDATA_SECTION;
6931 r = CUR_CHAR(rl);
6932 if (!IS_CHAR(r)) {
6933 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6935 ctxt->sax->error(ctxt->userData,
6936 "CData section not finished\n");
6937 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006938 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006939 ctxt->instate = XML_PARSER_CONTENT;
6940 return;
6941 }
6942 NEXTL(rl);
6943 s = CUR_CHAR(sl);
6944 if (!IS_CHAR(s)) {
6945 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6947 ctxt->sax->error(ctxt->userData,
6948 "CData section not finished\n");
6949 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006950 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006951 ctxt->instate = XML_PARSER_CONTENT;
6952 return;
6953 }
6954 NEXTL(sl);
6955 cur = CUR_CHAR(l);
6956 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6957 if (buf == NULL) {
6958 xmlGenericError(xmlGenericErrorContext,
6959 "malloc of %d byte failed\n", size);
6960 return;
6961 }
6962 while (IS_CHAR(cur) &&
6963 ((r != ']') || (s != ']') || (cur != '>'))) {
6964 if (len + 5 >= size) {
6965 size *= 2;
6966 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6967 if (buf == NULL) {
6968 xmlGenericError(xmlGenericErrorContext,
6969 "realloc of %d byte failed\n", size);
6970 return;
6971 }
6972 }
6973 COPY_BUF(rl,buf,len,r);
6974 r = s;
6975 rl = sl;
6976 s = cur;
6977 sl = l;
6978 count++;
6979 if (count > 50) {
6980 GROW;
6981 count = 0;
6982 }
6983 NEXTL(l);
6984 cur = CUR_CHAR(l);
6985 }
6986 buf[len] = 0;
6987 ctxt->instate = XML_PARSER_CONTENT;
6988 if (cur != '>') {
6989 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6991 ctxt->sax->error(ctxt->userData,
6992 "CData section not finished\n%.50s\n", buf);
6993 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006994 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006995 xmlFree(buf);
6996 return;
6997 }
6998 NEXTL(l);
6999
7000 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007001 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007002 */
7003 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7004 if (ctxt->sax->cdataBlock != NULL)
7005 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007006 else if (ctxt->sax->characters != NULL)
7007 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007008 }
7009 xmlFree(buf);
7010}
7011
7012/**
7013 * xmlParseContent:
7014 * @ctxt: an XML parser context
7015 *
7016 * Parse a content:
7017 *
7018 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7019 */
7020
7021void
7022xmlParseContent(xmlParserCtxtPtr ctxt) {
7023 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007024 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007025 ((RAW != '<') || (NXT(1) != '/'))) {
7026 const xmlChar *test = CUR_PTR;
7027 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007028 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007029
7030 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007031 * First case : a Processing Instruction.
7032 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007033 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007034 xmlParsePI(ctxt);
7035 }
7036
7037 /*
7038 * Second case : a CDSection
7039 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007040 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007041 (NXT(2) == '[') && (NXT(3) == 'C') &&
7042 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7043 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7044 (NXT(8) == '[')) {
7045 xmlParseCDSect(ctxt);
7046 }
7047
7048 /*
7049 * Third case : a comment
7050 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007051 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007052 (NXT(2) == '-') && (NXT(3) == '-')) {
7053 xmlParseComment(ctxt);
7054 ctxt->instate = XML_PARSER_CONTENT;
7055 }
7056
7057 /*
7058 * Fourth case : a sub-element.
7059 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007060 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007061 xmlParseElement(ctxt);
7062 }
7063
7064 /*
7065 * Fifth case : a reference. If if has not been resolved,
7066 * parsing returns it's Name, create the node
7067 */
7068
Daniel Veillard21a0f912001-02-25 19:54:14 +00007069 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007070 xmlParseReference(ctxt);
7071 }
7072
7073 /*
7074 * Last case, text. Note that References are handled directly.
7075 */
7076 else {
7077 xmlParseCharData(ctxt, 0);
7078 }
7079
7080 GROW;
7081 /*
7082 * Pop-up of finished entities.
7083 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007084 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007085 xmlPopInput(ctxt);
7086 SHRINK;
7087
Daniel Veillardfdc91562002-07-01 21:52:03 +00007088 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007089 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7091 ctxt->sax->error(ctxt->userData,
7092 "detected an error in element content\n");
7093 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007094 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007095 ctxt->instate = XML_PARSER_EOF;
7096 break;
7097 }
7098 }
7099}
7100
7101/**
7102 * xmlParseElement:
7103 * @ctxt: an XML parser context
7104 *
7105 * parse an XML element, this is highly recursive
7106 *
7107 * [39] element ::= EmptyElemTag | STag content ETag
7108 *
7109 * [ WFC: Element Type Match ]
7110 * The Name in an element's end-tag must match the element type in the
7111 * start-tag.
7112 *
7113 * [ VC: Element Valid ]
7114 * An element is valid if there is a declaration matching elementdecl
7115 * where the Name matches the element type and one of the following holds:
7116 * - The declaration matches EMPTY and the element has no content.
7117 * - The declaration matches children and the sequence of child elements
7118 * belongs to the language generated by the regular expression in the
7119 * content model, with optional white space (characters matching the
7120 * nonterminal S) between each pair of child elements.
7121 * - The declaration matches Mixed and the content consists of character
7122 * data and child elements whose types match names in the content model.
7123 * - The declaration matches ANY, and the types of any child elements have
7124 * been declared.
7125 */
7126
7127void
7128xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007129 xmlChar *name;
7130 xmlChar *oldname;
7131 xmlParserNodeInfo node_info;
7132 xmlNodePtr ret;
7133
7134 /* Capture start position */
7135 if (ctxt->record_info) {
7136 node_info.begin_pos = ctxt->input->consumed +
7137 (CUR_PTR - ctxt->input->base);
7138 node_info.begin_line = ctxt->input->line;
7139 }
7140
7141 if (ctxt->spaceNr == 0)
7142 spacePush(ctxt, -1);
7143 else
7144 spacePush(ctxt, *ctxt->space);
7145
7146 name = xmlParseStartTag(ctxt);
7147 if (name == NULL) {
7148 spacePop(ctxt);
7149 return;
7150 }
7151 namePush(ctxt, name);
7152 ret = ctxt->node;
7153
7154 /*
7155 * [ VC: Root Element Type ]
7156 * The Name in the document type declaration must match the element
7157 * type of the root element.
7158 */
7159 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7160 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7161 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7162
7163 /*
7164 * Check for an Empty Element.
7165 */
7166 if ((RAW == '/') && (NXT(1) == '>')) {
7167 SKIP(2);
7168 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7169 (!ctxt->disableSAX))
7170 ctxt->sax->endElement(ctxt->userData, name);
7171 oldname = namePop(ctxt);
7172 spacePop(ctxt);
7173 if (oldname != NULL) {
7174#ifdef DEBUG_STACK
7175 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7176#endif
7177 xmlFree(oldname);
7178 }
7179 if ( ret != NULL && ctxt->record_info ) {
7180 node_info.end_pos = ctxt->input->consumed +
7181 (CUR_PTR - ctxt->input->base);
7182 node_info.end_line = ctxt->input->line;
7183 node_info.node = ret;
7184 xmlParserAddNodeInfo(ctxt, &node_info);
7185 }
7186 return;
7187 }
7188 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007189 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007190 } else {
7191 ctxt->errNo = XML_ERR_GT_REQUIRED;
7192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7193 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007194 "Couldn't find end of Start Tag %s\n",
7195 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007196 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007197 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007198
7199 /*
7200 * end of parsing of this node.
7201 */
7202 nodePop(ctxt);
7203 oldname = namePop(ctxt);
7204 spacePop(ctxt);
7205 if (oldname != NULL) {
7206#ifdef DEBUG_STACK
7207 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7208#endif
7209 xmlFree(oldname);
7210 }
7211
7212 /*
7213 * Capture end position and add node
7214 */
7215 if ( ret != NULL && ctxt->record_info ) {
7216 node_info.end_pos = ctxt->input->consumed +
7217 (CUR_PTR - ctxt->input->base);
7218 node_info.end_line = ctxt->input->line;
7219 node_info.node = ret;
7220 xmlParserAddNodeInfo(ctxt, &node_info);
7221 }
7222 return;
7223 }
7224
7225 /*
7226 * Parse the content of the element:
7227 */
7228 xmlParseContent(ctxt);
7229 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007230 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007231 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7232 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007233 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007234 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007235 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007236
7237 /*
7238 * end of parsing of this node.
7239 */
7240 nodePop(ctxt);
7241 oldname = namePop(ctxt);
7242 spacePop(ctxt);
7243 if (oldname != NULL) {
7244#ifdef DEBUG_STACK
7245 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7246#endif
7247 xmlFree(oldname);
7248 }
7249 return;
7250 }
7251
7252 /*
7253 * parse the end of tag: '</' should be here.
7254 */
7255 xmlParseEndTag(ctxt);
7256
7257 /*
7258 * Capture end position and add node
7259 */
7260 if ( ret != NULL && ctxt->record_info ) {
7261 node_info.end_pos = ctxt->input->consumed +
7262 (CUR_PTR - ctxt->input->base);
7263 node_info.end_line = ctxt->input->line;
7264 node_info.node = ret;
7265 xmlParserAddNodeInfo(ctxt, &node_info);
7266 }
7267}
7268
7269/**
7270 * xmlParseVersionNum:
7271 * @ctxt: an XML parser context
7272 *
7273 * parse the XML version value.
7274 *
7275 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7276 *
7277 * Returns the string giving the XML version number, or NULL
7278 */
7279xmlChar *
7280xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7281 xmlChar *buf = NULL;
7282 int len = 0;
7283 int size = 10;
7284 xmlChar cur;
7285
7286 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7287 if (buf == NULL) {
7288 xmlGenericError(xmlGenericErrorContext,
7289 "malloc of %d byte failed\n", size);
7290 return(NULL);
7291 }
7292 cur = CUR;
7293 while (((cur >= 'a') && (cur <= 'z')) ||
7294 ((cur >= 'A') && (cur <= 'Z')) ||
7295 ((cur >= '0') && (cur <= '9')) ||
7296 (cur == '_') || (cur == '.') ||
7297 (cur == ':') || (cur == '-')) {
7298 if (len + 1 >= size) {
7299 size *= 2;
7300 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7301 if (buf == NULL) {
7302 xmlGenericError(xmlGenericErrorContext,
7303 "realloc of %d byte failed\n", size);
7304 return(NULL);
7305 }
7306 }
7307 buf[len++] = cur;
7308 NEXT;
7309 cur=CUR;
7310 }
7311 buf[len] = 0;
7312 return(buf);
7313}
7314
7315/**
7316 * xmlParseVersionInfo:
7317 * @ctxt: an XML parser context
7318 *
7319 * parse the XML version.
7320 *
7321 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7322 *
7323 * [25] Eq ::= S? '=' S?
7324 *
7325 * Returns the version string, e.g. "1.0"
7326 */
7327
7328xmlChar *
7329xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7330 xmlChar *version = NULL;
7331 const xmlChar *q;
7332
7333 if ((RAW == 'v') && (NXT(1) == 'e') &&
7334 (NXT(2) == 'r') && (NXT(3) == 's') &&
7335 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7336 (NXT(6) == 'n')) {
7337 SKIP(7);
7338 SKIP_BLANKS;
7339 if (RAW != '=') {
7340 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7342 ctxt->sax->error(ctxt->userData,
7343 "xmlParseVersionInfo : expected '='\n");
7344 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007345 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007346 return(NULL);
7347 }
7348 NEXT;
7349 SKIP_BLANKS;
7350 if (RAW == '"') {
7351 NEXT;
7352 q = CUR_PTR;
7353 version = xmlParseVersionNum(ctxt);
7354 if (RAW != '"') {
7355 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7357 ctxt->sax->error(ctxt->userData,
7358 "String not closed\n%.50s\n", q);
7359 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007360 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007361 } else
7362 NEXT;
7363 } else if (RAW == '\''){
7364 NEXT;
7365 q = CUR_PTR;
7366 version = xmlParseVersionNum(ctxt);
7367 if (RAW != '\'') {
7368 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7370 ctxt->sax->error(ctxt->userData,
7371 "String not closed\n%.50s\n", q);
7372 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007373 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007374 } else
7375 NEXT;
7376 } else {
7377 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7379 ctxt->sax->error(ctxt->userData,
7380 "xmlParseVersionInfo : expected ' or \"\n");
7381 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007382 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007383 }
7384 }
7385 return(version);
7386}
7387
7388/**
7389 * xmlParseEncName:
7390 * @ctxt: an XML parser context
7391 *
7392 * parse the XML encoding name
7393 *
7394 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7395 *
7396 * Returns the encoding name value or NULL
7397 */
7398xmlChar *
7399xmlParseEncName(xmlParserCtxtPtr ctxt) {
7400 xmlChar *buf = NULL;
7401 int len = 0;
7402 int size = 10;
7403 xmlChar cur;
7404
7405 cur = CUR;
7406 if (((cur >= 'a') && (cur <= 'z')) ||
7407 ((cur >= 'A') && (cur <= 'Z'))) {
7408 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7409 if (buf == NULL) {
7410 xmlGenericError(xmlGenericErrorContext,
7411 "malloc of %d byte failed\n", size);
7412 return(NULL);
7413 }
7414
7415 buf[len++] = cur;
7416 NEXT;
7417 cur = CUR;
7418 while (((cur >= 'a') && (cur <= 'z')) ||
7419 ((cur >= 'A') && (cur <= 'Z')) ||
7420 ((cur >= '0') && (cur <= '9')) ||
7421 (cur == '.') || (cur == '_') ||
7422 (cur == '-')) {
7423 if (len + 1 >= size) {
7424 size *= 2;
7425 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7426 if (buf == NULL) {
7427 xmlGenericError(xmlGenericErrorContext,
7428 "realloc of %d byte failed\n", size);
7429 return(NULL);
7430 }
7431 }
7432 buf[len++] = cur;
7433 NEXT;
7434 cur = CUR;
7435 if (cur == 0) {
7436 SHRINK;
7437 GROW;
7438 cur = CUR;
7439 }
7440 }
7441 buf[len] = 0;
7442 } else {
7443 ctxt->errNo = XML_ERR_ENCODING_NAME;
7444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7445 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7446 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007447 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007448 }
7449 return(buf);
7450}
7451
7452/**
7453 * xmlParseEncodingDecl:
7454 * @ctxt: an XML parser context
7455 *
7456 * parse the XML encoding declaration
7457 *
7458 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7459 *
7460 * this setups the conversion filters.
7461 *
7462 * Returns the encoding value or NULL
7463 */
7464
7465xmlChar *
7466xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7467 xmlChar *encoding = NULL;
7468 const xmlChar *q;
7469
7470 SKIP_BLANKS;
7471 if ((RAW == 'e') && (NXT(1) == 'n') &&
7472 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7473 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7474 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7475 SKIP(8);
7476 SKIP_BLANKS;
7477 if (RAW != '=') {
7478 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7480 ctxt->sax->error(ctxt->userData,
7481 "xmlParseEncodingDecl : expected '='\n");
7482 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007483 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007484 return(NULL);
7485 }
7486 NEXT;
7487 SKIP_BLANKS;
7488 if (RAW == '"') {
7489 NEXT;
7490 q = CUR_PTR;
7491 encoding = xmlParseEncName(ctxt);
7492 if (RAW != '"') {
7493 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7495 ctxt->sax->error(ctxt->userData,
7496 "String not closed\n%.50s\n", q);
7497 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007498 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007499 } else
7500 NEXT;
7501 } else if (RAW == '\''){
7502 NEXT;
7503 q = CUR_PTR;
7504 encoding = xmlParseEncName(ctxt);
7505 if (RAW != '\'') {
7506 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7508 ctxt->sax->error(ctxt->userData,
7509 "String not closed\n%.50s\n", q);
7510 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007511 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007512 } else
7513 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007514 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007515 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7517 ctxt->sax->error(ctxt->userData,
7518 "xmlParseEncodingDecl : expected ' or \"\n");
7519 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007520 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007521 }
7522 if (encoding != NULL) {
7523 xmlCharEncoding enc;
7524 xmlCharEncodingHandlerPtr handler;
7525
7526 if (ctxt->input->encoding != NULL)
7527 xmlFree((xmlChar *) ctxt->input->encoding);
7528 ctxt->input->encoding = encoding;
7529
7530 enc = xmlParseCharEncoding((const char *) encoding);
7531 /*
7532 * registered set of known encodings
7533 */
7534 if (enc != XML_CHAR_ENCODING_ERROR) {
7535 xmlSwitchEncoding(ctxt, enc);
7536 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007537 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007538 xmlFree(encoding);
7539 return(NULL);
7540 }
7541 } else {
7542 /*
7543 * fallback for unknown encodings
7544 */
7545 handler = xmlFindCharEncodingHandler((const char *) encoding);
7546 if (handler != NULL) {
7547 xmlSwitchToEncoding(ctxt, handler);
7548 } else {
7549 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7551 ctxt->sax->error(ctxt->userData,
7552 "Unsupported encoding %s\n", encoding);
7553 return(NULL);
7554 }
7555 }
7556 }
7557 }
7558 return(encoding);
7559}
7560
7561/**
7562 * xmlParseSDDecl:
7563 * @ctxt: an XML parser context
7564 *
7565 * parse the XML standalone declaration
7566 *
7567 * [32] SDDecl ::= S 'standalone' Eq
7568 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7569 *
7570 * [ VC: Standalone Document Declaration ]
7571 * TODO The standalone document declaration must have the value "no"
7572 * if any external markup declarations contain declarations of:
7573 * - attributes with default values, if elements to which these
7574 * attributes apply appear in the document without specifications
7575 * of values for these attributes, or
7576 * - entities (other than amp, lt, gt, apos, quot), if references
7577 * to those entities appear in the document, or
7578 * - attributes with values subject to normalization, where the
7579 * attribute appears in the document with a value which will change
7580 * as a result of normalization, or
7581 * - element types with element content, if white space occurs directly
7582 * within any instance of those types.
7583 *
7584 * Returns 1 if standalone, 0 otherwise
7585 */
7586
7587int
7588xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7589 int standalone = -1;
7590
7591 SKIP_BLANKS;
7592 if ((RAW == 's') && (NXT(1) == 't') &&
7593 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7594 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7595 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7596 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7597 SKIP(10);
7598 SKIP_BLANKS;
7599 if (RAW != '=') {
7600 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7602 ctxt->sax->error(ctxt->userData,
7603 "XML standalone declaration : expected '='\n");
7604 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007605 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007606 return(standalone);
7607 }
7608 NEXT;
7609 SKIP_BLANKS;
7610 if (RAW == '\''){
7611 NEXT;
7612 if ((RAW == 'n') && (NXT(1) == 'o')) {
7613 standalone = 0;
7614 SKIP(2);
7615 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7616 (NXT(2) == 's')) {
7617 standalone = 1;
7618 SKIP(3);
7619 } else {
7620 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7622 ctxt->sax->error(ctxt->userData,
7623 "standalone accepts only 'yes' or 'no'\n");
7624 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007625 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007626 }
7627 if (RAW != '\'') {
7628 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7630 ctxt->sax->error(ctxt->userData, "String not closed\n");
7631 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007632 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007633 } else
7634 NEXT;
7635 } else if (RAW == '"'){
7636 NEXT;
7637 if ((RAW == 'n') && (NXT(1) == 'o')) {
7638 standalone = 0;
7639 SKIP(2);
7640 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7641 (NXT(2) == 's')) {
7642 standalone = 1;
7643 SKIP(3);
7644 } else {
7645 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7647 ctxt->sax->error(ctxt->userData,
7648 "standalone accepts only 'yes' or 'no'\n");
7649 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007650 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007651 }
7652 if (RAW != '"') {
7653 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7655 ctxt->sax->error(ctxt->userData, "String not closed\n");
7656 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007657 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007658 } else
7659 NEXT;
7660 } else {
7661 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7663 ctxt->sax->error(ctxt->userData,
7664 "Standalone value not found\n");
7665 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007666 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007667 }
7668 }
7669 return(standalone);
7670}
7671
7672/**
7673 * xmlParseXMLDecl:
7674 * @ctxt: an XML parser context
7675 *
7676 * parse an XML declaration header
7677 *
7678 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7679 */
7680
7681void
7682xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7683 xmlChar *version;
7684
7685 /*
7686 * We know that '<?xml' is here.
7687 */
7688 SKIP(5);
7689
7690 if (!IS_BLANK(RAW)) {
7691 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7693 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7694 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007695 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007696 }
7697 SKIP_BLANKS;
7698
7699 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007700 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007701 */
7702 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007703 if (version == NULL) {
7704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7705 ctxt->sax->error(ctxt->userData,
7706 "Malformed declaration expecting version\n");
7707 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007708 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007709 } else {
7710 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7711 /*
7712 * TODO: Blueberry should be detected here
7713 */
7714 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7715 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7716 version);
7717 }
7718 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007719 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007720 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007721 }
Owen Taylor3473f882001-02-23 17:55:21 +00007722
7723 /*
7724 * We may have the encoding declaration
7725 */
7726 if (!IS_BLANK(RAW)) {
7727 if ((RAW == '?') && (NXT(1) == '>')) {
7728 SKIP(2);
7729 return;
7730 }
7731 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7733 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7734 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007735 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007736 }
7737 xmlParseEncodingDecl(ctxt);
7738 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7739 /*
7740 * The XML REC instructs us to stop parsing right here
7741 */
7742 return;
7743 }
7744
7745 /*
7746 * We may have the standalone status.
7747 */
7748 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7749 if ((RAW == '?') && (NXT(1) == '>')) {
7750 SKIP(2);
7751 return;
7752 }
7753 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7755 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7756 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007757 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007758 }
7759 SKIP_BLANKS;
7760 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7761
7762 SKIP_BLANKS;
7763 if ((RAW == '?') && (NXT(1) == '>')) {
7764 SKIP(2);
7765 } else if (RAW == '>') {
7766 /* Deprecated old WD ... */
7767 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7769 ctxt->sax->error(ctxt->userData,
7770 "XML declaration must end-up with '?>'\n");
7771 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007772 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007773 NEXT;
7774 } else {
7775 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7777 ctxt->sax->error(ctxt->userData,
7778 "parsing XML declaration: '?>' expected\n");
7779 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007780 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007781 MOVETO_ENDTAG(CUR_PTR);
7782 NEXT;
7783 }
7784}
7785
7786/**
7787 * xmlParseMisc:
7788 * @ctxt: an XML parser context
7789 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007790 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007791 *
7792 * [27] Misc ::= Comment | PI | S
7793 */
7794
7795void
7796xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007797 while (((RAW == '<') && (NXT(1) == '?')) ||
7798 ((RAW == '<') && (NXT(1) == '!') &&
7799 (NXT(2) == '-') && (NXT(3) == '-')) ||
7800 IS_BLANK(CUR)) {
7801 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007802 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007803 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007804 NEXT;
7805 } else
7806 xmlParseComment(ctxt);
7807 }
7808}
7809
7810/**
7811 * xmlParseDocument:
7812 * @ctxt: an XML parser context
7813 *
7814 * parse an XML document (and build a tree if using the standard SAX
7815 * interface).
7816 *
7817 * [1] document ::= prolog element Misc*
7818 *
7819 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7820 *
7821 * Returns 0, -1 in case of error. the parser context is augmented
7822 * as a result of the parsing.
7823 */
7824
7825int
7826xmlParseDocument(xmlParserCtxtPtr ctxt) {
7827 xmlChar start[4];
7828 xmlCharEncoding enc;
7829
7830 xmlInitParser();
7831
7832 GROW;
7833
7834 /*
7835 * SAX: beginning of the document processing.
7836 */
7837 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7838 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7839
Daniel Veillard50f34372001-08-03 12:06:36 +00007840 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007841 /*
7842 * Get the 4 first bytes and decode the charset
7843 * if enc != XML_CHAR_ENCODING_NONE
7844 * plug some encoding conversion routines.
7845 */
7846 start[0] = RAW;
7847 start[1] = NXT(1);
7848 start[2] = NXT(2);
7849 start[3] = NXT(3);
7850 enc = xmlDetectCharEncoding(start, 4);
7851 if (enc != XML_CHAR_ENCODING_NONE) {
7852 xmlSwitchEncoding(ctxt, enc);
7853 }
Owen Taylor3473f882001-02-23 17:55:21 +00007854 }
7855
7856
7857 if (CUR == 0) {
7858 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7860 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7861 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007862 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007863 }
7864
7865 /*
7866 * Check for the XMLDecl in the Prolog.
7867 */
7868 GROW;
7869 if ((RAW == '<') && (NXT(1) == '?') &&
7870 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7871 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7872
7873 /*
7874 * Note that we will switch encoding on the fly.
7875 */
7876 xmlParseXMLDecl(ctxt);
7877 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7878 /*
7879 * The XML REC instructs us to stop parsing right here
7880 */
7881 return(-1);
7882 }
7883 ctxt->standalone = ctxt->input->standalone;
7884 SKIP_BLANKS;
7885 } else {
7886 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7887 }
7888 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7889 ctxt->sax->startDocument(ctxt->userData);
7890
7891 /*
7892 * The Misc part of the Prolog
7893 */
7894 GROW;
7895 xmlParseMisc(ctxt);
7896
7897 /*
7898 * Then possibly doc type declaration(s) and more Misc
7899 * (doctypedecl Misc*)?
7900 */
7901 GROW;
7902 if ((RAW == '<') && (NXT(1) == '!') &&
7903 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7904 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7905 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7906 (NXT(8) == 'E')) {
7907
7908 ctxt->inSubset = 1;
7909 xmlParseDocTypeDecl(ctxt);
7910 if (RAW == '[') {
7911 ctxt->instate = XML_PARSER_DTD;
7912 xmlParseInternalSubset(ctxt);
7913 }
7914
7915 /*
7916 * Create and update the external subset.
7917 */
7918 ctxt->inSubset = 2;
7919 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7920 (!ctxt->disableSAX))
7921 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7922 ctxt->extSubSystem, ctxt->extSubURI);
7923 ctxt->inSubset = 0;
7924
7925
7926 ctxt->instate = XML_PARSER_PROLOG;
7927 xmlParseMisc(ctxt);
7928 }
7929
7930 /*
7931 * Time to start parsing the tree itself
7932 */
7933 GROW;
7934 if (RAW != '<') {
7935 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7937 ctxt->sax->error(ctxt->userData,
7938 "Start tag expected, '<' not found\n");
7939 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007940 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007941 ctxt->instate = XML_PARSER_EOF;
7942 } else {
7943 ctxt->instate = XML_PARSER_CONTENT;
7944 xmlParseElement(ctxt);
7945 ctxt->instate = XML_PARSER_EPILOG;
7946
7947
7948 /*
7949 * The Misc part at the end
7950 */
7951 xmlParseMisc(ctxt);
7952
Daniel Veillard561b7f82002-03-20 21:55:57 +00007953 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007954 ctxt->errNo = XML_ERR_DOCUMENT_END;
7955 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7956 ctxt->sax->error(ctxt->userData,
7957 "Extra content at the end of the document\n");
7958 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007959 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007960 }
7961 ctxt->instate = XML_PARSER_EOF;
7962 }
7963
7964 /*
7965 * SAX: end of the document processing.
7966 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007967 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007968 ctxt->sax->endDocument(ctxt->userData);
7969
Daniel Veillard5997aca2002-03-18 18:36:20 +00007970 /*
7971 * Remove locally kept entity definitions if the tree was not built
7972 */
7973 if ((ctxt->myDoc != NULL) &&
7974 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7975 xmlFreeDoc(ctxt->myDoc);
7976 ctxt->myDoc = NULL;
7977 }
7978
Daniel Veillardc7612992002-02-17 22:47:37 +00007979 if (! ctxt->wellFormed) {
7980 ctxt->valid = 0;
7981 return(-1);
7982 }
Owen Taylor3473f882001-02-23 17:55:21 +00007983 return(0);
7984}
7985
7986/**
7987 * xmlParseExtParsedEnt:
7988 * @ctxt: an XML parser context
7989 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007990 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007991 * An external general parsed entity is well-formed if it matches the
7992 * production labeled extParsedEnt.
7993 *
7994 * [78] extParsedEnt ::= TextDecl? content
7995 *
7996 * Returns 0, -1 in case of error. the parser context is augmented
7997 * as a result of the parsing.
7998 */
7999
8000int
8001xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8002 xmlChar start[4];
8003 xmlCharEncoding enc;
8004
8005 xmlDefaultSAXHandlerInit();
8006
8007 GROW;
8008
8009 /*
8010 * SAX: beginning of the document processing.
8011 */
8012 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8013 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8014
8015 /*
8016 * Get the 4 first bytes and decode the charset
8017 * if enc != XML_CHAR_ENCODING_NONE
8018 * plug some encoding conversion routines.
8019 */
8020 start[0] = RAW;
8021 start[1] = NXT(1);
8022 start[2] = NXT(2);
8023 start[3] = NXT(3);
8024 enc = xmlDetectCharEncoding(start, 4);
8025 if (enc != XML_CHAR_ENCODING_NONE) {
8026 xmlSwitchEncoding(ctxt, enc);
8027 }
8028
8029
8030 if (CUR == 0) {
8031 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8033 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8034 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008035 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008036 }
8037
8038 /*
8039 * Check for the XMLDecl in the Prolog.
8040 */
8041 GROW;
8042 if ((RAW == '<') && (NXT(1) == '?') &&
8043 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8044 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8045
8046 /*
8047 * Note that we will switch encoding on the fly.
8048 */
8049 xmlParseXMLDecl(ctxt);
8050 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8051 /*
8052 * The XML REC instructs us to stop parsing right here
8053 */
8054 return(-1);
8055 }
8056 SKIP_BLANKS;
8057 } else {
8058 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8059 }
8060 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8061 ctxt->sax->startDocument(ctxt->userData);
8062
8063 /*
8064 * Doing validity checking on chunk doesn't make sense
8065 */
8066 ctxt->instate = XML_PARSER_CONTENT;
8067 ctxt->validate = 0;
8068 ctxt->loadsubset = 0;
8069 ctxt->depth = 0;
8070
8071 xmlParseContent(ctxt);
8072
8073 if ((RAW == '<') && (NXT(1) == '/')) {
8074 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8076 ctxt->sax->error(ctxt->userData,
8077 "chunk is not well balanced\n");
8078 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008079 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008080 } else if (RAW != 0) {
8081 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8082 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8083 ctxt->sax->error(ctxt->userData,
8084 "extra content at the end of well balanced chunk\n");
8085 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008086 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008087 }
8088
8089 /*
8090 * SAX: end of the document processing.
8091 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008092 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008093 ctxt->sax->endDocument(ctxt->userData);
8094
8095 if (! ctxt->wellFormed) return(-1);
8096 return(0);
8097}
8098
8099/************************************************************************
8100 * *
8101 * Progressive parsing interfaces *
8102 * *
8103 ************************************************************************/
8104
8105/**
8106 * xmlParseLookupSequence:
8107 * @ctxt: an XML parser context
8108 * @first: the first char to lookup
8109 * @next: the next char to lookup or zero
8110 * @third: the next char to lookup or zero
8111 *
8112 * Try to find if a sequence (first, next, third) or just (first next) or
8113 * (first) is available in the input stream.
8114 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8115 * to avoid rescanning sequences of bytes, it DOES change the state of the
8116 * parser, do not use liberally.
8117 *
8118 * Returns the index to the current parsing point if the full sequence
8119 * is available, -1 otherwise.
8120 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008121static int
Owen Taylor3473f882001-02-23 17:55:21 +00008122xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8123 xmlChar next, xmlChar third) {
8124 int base, len;
8125 xmlParserInputPtr in;
8126 const xmlChar *buf;
8127
8128 in = ctxt->input;
8129 if (in == NULL) return(-1);
8130 base = in->cur - in->base;
8131 if (base < 0) return(-1);
8132 if (ctxt->checkIndex > base)
8133 base = ctxt->checkIndex;
8134 if (in->buf == NULL) {
8135 buf = in->base;
8136 len = in->length;
8137 } else {
8138 buf = in->buf->buffer->content;
8139 len = in->buf->buffer->use;
8140 }
8141 /* take into account the sequence length */
8142 if (third) len -= 2;
8143 else if (next) len --;
8144 for (;base < len;base++) {
8145 if (buf[base] == first) {
8146 if (third != 0) {
8147 if ((buf[base + 1] != next) ||
8148 (buf[base + 2] != third)) continue;
8149 } else if (next != 0) {
8150 if (buf[base + 1] != next) continue;
8151 }
8152 ctxt->checkIndex = 0;
8153#ifdef DEBUG_PUSH
8154 if (next == 0)
8155 xmlGenericError(xmlGenericErrorContext,
8156 "PP: lookup '%c' found at %d\n",
8157 first, base);
8158 else if (third == 0)
8159 xmlGenericError(xmlGenericErrorContext,
8160 "PP: lookup '%c%c' found at %d\n",
8161 first, next, base);
8162 else
8163 xmlGenericError(xmlGenericErrorContext,
8164 "PP: lookup '%c%c%c' found at %d\n",
8165 first, next, third, base);
8166#endif
8167 return(base - (in->cur - in->base));
8168 }
8169 }
8170 ctxt->checkIndex = base;
8171#ifdef DEBUG_PUSH
8172 if (next == 0)
8173 xmlGenericError(xmlGenericErrorContext,
8174 "PP: lookup '%c' failed\n", first);
8175 else if (third == 0)
8176 xmlGenericError(xmlGenericErrorContext,
8177 "PP: lookup '%c%c' failed\n", first, next);
8178 else
8179 xmlGenericError(xmlGenericErrorContext,
8180 "PP: lookup '%c%c%c' failed\n", first, next, third);
8181#endif
8182 return(-1);
8183}
8184
8185/**
8186 * xmlParseTryOrFinish:
8187 * @ctxt: an XML parser context
8188 * @terminate: last chunk indicator
8189 *
8190 * Try to progress on parsing
8191 *
8192 * Returns zero if no parsing was possible
8193 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008194static int
Owen Taylor3473f882001-02-23 17:55:21 +00008195xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8196 int ret = 0;
8197 int avail;
8198 xmlChar cur, next;
8199
8200#ifdef DEBUG_PUSH
8201 switch (ctxt->instate) {
8202 case XML_PARSER_EOF:
8203 xmlGenericError(xmlGenericErrorContext,
8204 "PP: try EOF\n"); break;
8205 case XML_PARSER_START:
8206 xmlGenericError(xmlGenericErrorContext,
8207 "PP: try START\n"); break;
8208 case XML_PARSER_MISC:
8209 xmlGenericError(xmlGenericErrorContext,
8210 "PP: try MISC\n");break;
8211 case XML_PARSER_COMMENT:
8212 xmlGenericError(xmlGenericErrorContext,
8213 "PP: try COMMENT\n");break;
8214 case XML_PARSER_PROLOG:
8215 xmlGenericError(xmlGenericErrorContext,
8216 "PP: try PROLOG\n");break;
8217 case XML_PARSER_START_TAG:
8218 xmlGenericError(xmlGenericErrorContext,
8219 "PP: try START_TAG\n");break;
8220 case XML_PARSER_CONTENT:
8221 xmlGenericError(xmlGenericErrorContext,
8222 "PP: try CONTENT\n");break;
8223 case XML_PARSER_CDATA_SECTION:
8224 xmlGenericError(xmlGenericErrorContext,
8225 "PP: try CDATA_SECTION\n");break;
8226 case XML_PARSER_END_TAG:
8227 xmlGenericError(xmlGenericErrorContext,
8228 "PP: try END_TAG\n");break;
8229 case XML_PARSER_ENTITY_DECL:
8230 xmlGenericError(xmlGenericErrorContext,
8231 "PP: try ENTITY_DECL\n");break;
8232 case XML_PARSER_ENTITY_VALUE:
8233 xmlGenericError(xmlGenericErrorContext,
8234 "PP: try ENTITY_VALUE\n");break;
8235 case XML_PARSER_ATTRIBUTE_VALUE:
8236 xmlGenericError(xmlGenericErrorContext,
8237 "PP: try ATTRIBUTE_VALUE\n");break;
8238 case XML_PARSER_DTD:
8239 xmlGenericError(xmlGenericErrorContext,
8240 "PP: try DTD\n");break;
8241 case XML_PARSER_EPILOG:
8242 xmlGenericError(xmlGenericErrorContext,
8243 "PP: try EPILOG\n");break;
8244 case XML_PARSER_PI:
8245 xmlGenericError(xmlGenericErrorContext,
8246 "PP: try PI\n");break;
8247 case XML_PARSER_IGNORE:
8248 xmlGenericError(xmlGenericErrorContext,
8249 "PP: try IGNORE\n");break;
8250 }
8251#endif
8252
8253 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008254 SHRINK;
8255
Owen Taylor3473f882001-02-23 17:55:21 +00008256 /*
8257 * Pop-up of finished entities.
8258 */
8259 while ((RAW == 0) && (ctxt->inputNr > 1))
8260 xmlPopInput(ctxt);
8261
8262 if (ctxt->input ==NULL) break;
8263 if (ctxt->input->buf == NULL)
8264 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008265 else {
8266 /*
8267 * If we are operating on converted input, try to flush
8268 * remainng chars to avoid them stalling in the non-converted
8269 * buffer.
8270 */
8271 if ((ctxt->input->buf->raw != NULL) &&
8272 (ctxt->input->buf->raw->use > 0)) {
8273 int base = ctxt->input->base -
8274 ctxt->input->buf->buffer->content;
8275 int current = ctxt->input->cur - ctxt->input->base;
8276
8277 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8278 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8279 ctxt->input->cur = ctxt->input->base + current;
8280 ctxt->input->end =
8281 &ctxt->input->buf->buffer->content[
8282 ctxt->input->buf->buffer->use];
8283 }
8284 avail = ctxt->input->buf->buffer->use -
8285 (ctxt->input->cur - ctxt->input->base);
8286 }
Owen Taylor3473f882001-02-23 17:55:21 +00008287 if (avail < 1)
8288 goto done;
8289 switch (ctxt->instate) {
8290 case XML_PARSER_EOF:
8291 /*
8292 * Document parsing is done !
8293 */
8294 goto done;
8295 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008296 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8297 xmlChar start[4];
8298 xmlCharEncoding enc;
8299
8300 /*
8301 * Very first chars read from the document flow.
8302 */
8303 if (avail < 4)
8304 goto done;
8305
8306 /*
8307 * Get the 4 first bytes and decode the charset
8308 * if enc != XML_CHAR_ENCODING_NONE
8309 * plug some encoding conversion routines.
8310 */
8311 start[0] = RAW;
8312 start[1] = NXT(1);
8313 start[2] = NXT(2);
8314 start[3] = NXT(3);
8315 enc = xmlDetectCharEncoding(start, 4);
8316 if (enc != XML_CHAR_ENCODING_NONE) {
8317 xmlSwitchEncoding(ctxt, enc);
8318 }
8319 break;
8320 }
Owen Taylor3473f882001-02-23 17:55:21 +00008321
8322 cur = ctxt->input->cur[0];
8323 next = ctxt->input->cur[1];
8324 if (cur == 0) {
8325 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8326 ctxt->sax->setDocumentLocator(ctxt->userData,
8327 &xmlDefaultSAXLocator);
8328 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8330 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8331 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008332 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008333 ctxt->instate = XML_PARSER_EOF;
8334#ifdef DEBUG_PUSH
8335 xmlGenericError(xmlGenericErrorContext,
8336 "PP: entering EOF\n");
8337#endif
8338 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8339 ctxt->sax->endDocument(ctxt->userData);
8340 goto done;
8341 }
8342 if ((cur == '<') && (next == '?')) {
8343 /* PI or XML decl */
8344 if (avail < 5) return(ret);
8345 if ((!terminate) &&
8346 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8347 return(ret);
8348 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8349 ctxt->sax->setDocumentLocator(ctxt->userData,
8350 &xmlDefaultSAXLocator);
8351 if ((ctxt->input->cur[2] == 'x') &&
8352 (ctxt->input->cur[3] == 'm') &&
8353 (ctxt->input->cur[4] == 'l') &&
8354 (IS_BLANK(ctxt->input->cur[5]))) {
8355 ret += 5;
8356#ifdef DEBUG_PUSH
8357 xmlGenericError(xmlGenericErrorContext,
8358 "PP: Parsing XML Decl\n");
8359#endif
8360 xmlParseXMLDecl(ctxt);
8361 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8362 /*
8363 * The XML REC instructs us to stop parsing right
8364 * here
8365 */
8366 ctxt->instate = XML_PARSER_EOF;
8367 return(0);
8368 }
8369 ctxt->standalone = ctxt->input->standalone;
8370 if ((ctxt->encoding == NULL) &&
8371 (ctxt->input->encoding != NULL))
8372 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8373 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8374 (!ctxt->disableSAX))
8375 ctxt->sax->startDocument(ctxt->userData);
8376 ctxt->instate = XML_PARSER_MISC;
8377#ifdef DEBUG_PUSH
8378 xmlGenericError(xmlGenericErrorContext,
8379 "PP: entering MISC\n");
8380#endif
8381 } else {
8382 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8383 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8384 (!ctxt->disableSAX))
8385 ctxt->sax->startDocument(ctxt->userData);
8386 ctxt->instate = XML_PARSER_MISC;
8387#ifdef DEBUG_PUSH
8388 xmlGenericError(xmlGenericErrorContext,
8389 "PP: entering MISC\n");
8390#endif
8391 }
8392 } else {
8393 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8394 ctxt->sax->setDocumentLocator(ctxt->userData,
8395 &xmlDefaultSAXLocator);
8396 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8397 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8398 (!ctxt->disableSAX))
8399 ctxt->sax->startDocument(ctxt->userData);
8400 ctxt->instate = XML_PARSER_MISC;
8401#ifdef DEBUG_PUSH
8402 xmlGenericError(xmlGenericErrorContext,
8403 "PP: entering MISC\n");
8404#endif
8405 }
8406 break;
8407 case XML_PARSER_MISC:
8408 SKIP_BLANKS;
8409 if (ctxt->input->buf == NULL)
8410 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8411 else
8412 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8413 if (avail < 2)
8414 goto done;
8415 cur = ctxt->input->cur[0];
8416 next = ctxt->input->cur[1];
8417 if ((cur == '<') && (next == '?')) {
8418 if ((!terminate) &&
8419 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8420 goto done;
8421#ifdef DEBUG_PUSH
8422 xmlGenericError(xmlGenericErrorContext,
8423 "PP: Parsing PI\n");
8424#endif
8425 xmlParsePI(ctxt);
8426 } else if ((cur == '<') && (next == '!') &&
8427 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8428 if ((!terminate) &&
8429 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8430 goto done;
8431#ifdef DEBUG_PUSH
8432 xmlGenericError(xmlGenericErrorContext,
8433 "PP: Parsing Comment\n");
8434#endif
8435 xmlParseComment(ctxt);
8436 ctxt->instate = XML_PARSER_MISC;
8437 } else if ((cur == '<') && (next == '!') &&
8438 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8439 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8440 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8441 (ctxt->input->cur[8] == 'E')) {
8442 if ((!terminate) &&
8443 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8444 goto done;
8445#ifdef DEBUG_PUSH
8446 xmlGenericError(xmlGenericErrorContext,
8447 "PP: Parsing internal subset\n");
8448#endif
8449 ctxt->inSubset = 1;
8450 xmlParseDocTypeDecl(ctxt);
8451 if (RAW == '[') {
8452 ctxt->instate = XML_PARSER_DTD;
8453#ifdef DEBUG_PUSH
8454 xmlGenericError(xmlGenericErrorContext,
8455 "PP: entering DTD\n");
8456#endif
8457 } else {
8458 /*
8459 * Create and update the external subset.
8460 */
8461 ctxt->inSubset = 2;
8462 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8463 (ctxt->sax->externalSubset != NULL))
8464 ctxt->sax->externalSubset(ctxt->userData,
8465 ctxt->intSubName, ctxt->extSubSystem,
8466 ctxt->extSubURI);
8467 ctxt->inSubset = 0;
8468 ctxt->instate = XML_PARSER_PROLOG;
8469#ifdef DEBUG_PUSH
8470 xmlGenericError(xmlGenericErrorContext,
8471 "PP: entering PROLOG\n");
8472#endif
8473 }
8474 } else if ((cur == '<') && (next == '!') &&
8475 (avail < 9)) {
8476 goto done;
8477 } else {
8478 ctxt->instate = XML_PARSER_START_TAG;
8479#ifdef DEBUG_PUSH
8480 xmlGenericError(xmlGenericErrorContext,
8481 "PP: entering START_TAG\n");
8482#endif
8483 }
8484 break;
8485 case XML_PARSER_IGNORE:
8486 xmlGenericError(xmlGenericErrorContext,
8487 "PP: internal error, state == IGNORE");
8488 ctxt->instate = XML_PARSER_DTD;
8489#ifdef DEBUG_PUSH
8490 xmlGenericError(xmlGenericErrorContext,
8491 "PP: entering DTD\n");
8492#endif
8493 break;
8494 case XML_PARSER_PROLOG:
8495 SKIP_BLANKS;
8496 if (ctxt->input->buf == NULL)
8497 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8498 else
8499 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8500 if (avail < 2)
8501 goto done;
8502 cur = ctxt->input->cur[0];
8503 next = ctxt->input->cur[1];
8504 if ((cur == '<') && (next == '?')) {
8505 if ((!terminate) &&
8506 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8507 goto done;
8508#ifdef DEBUG_PUSH
8509 xmlGenericError(xmlGenericErrorContext,
8510 "PP: Parsing PI\n");
8511#endif
8512 xmlParsePI(ctxt);
8513 } else if ((cur == '<') && (next == '!') &&
8514 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8515 if ((!terminate) &&
8516 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8517 goto done;
8518#ifdef DEBUG_PUSH
8519 xmlGenericError(xmlGenericErrorContext,
8520 "PP: Parsing Comment\n");
8521#endif
8522 xmlParseComment(ctxt);
8523 ctxt->instate = XML_PARSER_PROLOG;
8524 } else if ((cur == '<') && (next == '!') &&
8525 (avail < 4)) {
8526 goto done;
8527 } else {
8528 ctxt->instate = XML_PARSER_START_TAG;
8529#ifdef DEBUG_PUSH
8530 xmlGenericError(xmlGenericErrorContext,
8531 "PP: entering START_TAG\n");
8532#endif
8533 }
8534 break;
8535 case XML_PARSER_EPILOG:
8536 SKIP_BLANKS;
8537 if (ctxt->input->buf == NULL)
8538 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8539 else
8540 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8541 if (avail < 2)
8542 goto done;
8543 cur = ctxt->input->cur[0];
8544 next = ctxt->input->cur[1];
8545 if ((cur == '<') && (next == '?')) {
8546 if ((!terminate) &&
8547 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8548 goto done;
8549#ifdef DEBUG_PUSH
8550 xmlGenericError(xmlGenericErrorContext,
8551 "PP: Parsing PI\n");
8552#endif
8553 xmlParsePI(ctxt);
8554 ctxt->instate = XML_PARSER_EPILOG;
8555 } else if ((cur == '<') && (next == '!') &&
8556 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8557 if ((!terminate) &&
8558 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8559 goto done;
8560#ifdef DEBUG_PUSH
8561 xmlGenericError(xmlGenericErrorContext,
8562 "PP: Parsing Comment\n");
8563#endif
8564 xmlParseComment(ctxt);
8565 ctxt->instate = XML_PARSER_EPILOG;
8566 } else if ((cur == '<') && (next == '!') &&
8567 (avail < 4)) {
8568 goto done;
8569 } else {
8570 ctxt->errNo = XML_ERR_DOCUMENT_END;
8571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8572 ctxt->sax->error(ctxt->userData,
8573 "Extra content at the end of the document\n");
8574 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008575 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008576 ctxt->instate = XML_PARSER_EOF;
8577#ifdef DEBUG_PUSH
8578 xmlGenericError(xmlGenericErrorContext,
8579 "PP: entering EOF\n");
8580#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008581 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008582 ctxt->sax->endDocument(ctxt->userData);
8583 goto done;
8584 }
8585 break;
8586 case XML_PARSER_START_TAG: {
8587 xmlChar *name, *oldname;
8588
8589 if ((avail < 2) && (ctxt->inputNr == 1))
8590 goto done;
8591 cur = ctxt->input->cur[0];
8592 if (cur != '<') {
8593 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8595 ctxt->sax->error(ctxt->userData,
8596 "Start tag expect, '<' not found\n");
8597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008599 ctxt->instate = XML_PARSER_EOF;
8600#ifdef DEBUG_PUSH
8601 xmlGenericError(xmlGenericErrorContext,
8602 "PP: entering EOF\n");
8603#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008604 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008605 ctxt->sax->endDocument(ctxt->userData);
8606 goto done;
8607 }
8608 if ((!terminate) &&
8609 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8610 goto done;
8611 if (ctxt->spaceNr == 0)
8612 spacePush(ctxt, -1);
8613 else
8614 spacePush(ctxt, *ctxt->space);
8615 name = xmlParseStartTag(ctxt);
8616 if (name == NULL) {
8617 spacePop(ctxt);
8618 ctxt->instate = XML_PARSER_EOF;
8619#ifdef DEBUG_PUSH
8620 xmlGenericError(xmlGenericErrorContext,
8621 "PP: entering EOF\n");
8622#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008623 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008624 ctxt->sax->endDocument(ctxt->userData);
8625 goto done;
8626 }
8627 namePush(ctxt, xmlStrdup(name));
8628
8629 /*
8630 * [ VC: Root Element Type ]
8631 * The Name in the document type declaration must match
8632 * the element type of the root element.
8633 */
8634 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8635 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8636 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8637
8638 /*
8639 * Check for an Empty Element.
8640 */
8641 if ((RAW == '/') && (NXT(1) == '>')) {
8642 SKIP(2);
8643 if ((ctxt->sax != NULL) &&
8644 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8645 ctxt->sax->endElement(ctxt->userData, name);
8646 xmlFree(name);
8647 oldname = namePop(ctxt);
8648 spacePop(ctxt);
8649 if (oldname != NULL) {
8650#ifdef DEBUG_STACK
8651 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8652#endif
8653 xmlFree(oldname);
8654 }
8655 if (ctxt->name == NULL) {
8656 ctxt->instate = XML_PARSER_EPILOG;
8657#ifdef DEBUG_PUSH
8658 xmlGenericError(xmlGenericErrorContext,
8659 "PP: entering EPILOG\n");
8660#endif
8661 } else {
8662 ctxt->instate = XML_PARSER_CONTENT;
8663#ifdef DEBUG_PUSH
8664 xmlGenericError(xmlGenericErrorContext,
8665 "PP: entering CONTENT\n");
8666#endif
8667 }
8668 break;
8669 }
8670 if (RAW == '>') {
8671 NEXT;
8672 } else {
8673 ctxt->errNo = XML_ERR_GT_REQUIRED;
8674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8675 ctxt->sax->error(ctxt->userData,
8676 "Couldn't find end of Start Tag %s\n",
8677 name);
8678 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008679 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008680
8681 /*
8682 * end of parsing of this node.
8683 */
8684 nodePop(ctxt);
8685 oldname = namePop(ctxt);
8686 spacePop(ctxt);
8687 if (oldname != NULL) {
8688#ifdef DEBUG_STACK
8689 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8690#endif
8691 xmlFree(oldname);
8692 }
8693 }
8694 xmlFree(name);
8695 ctxt->instate = XML_PARSER_CONTENT;
8696#ifdef DEBUG_PUSH
8697 xmlGenericError(xmlGenericErrorContext,
8698 "PP: entering CONTENT\n");
8699#endif
8700 break;
8701 }
8702 case XML_PARSER_CONTENT: {
8703 const xmlChar *test;
8704 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008705 if ((avail < 2) && (ctxt->inputNr == 1))
8706 goto done;
8707 cur = ctxt->input->cur[0];
8708 next = ctxt->input->cur[1];
8709
8710 test = CUR_PTR;
8711 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008712 if ((cur == '<') && (next == '?')) {
8713 if ((!terminate) &&
8714 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8715 goto done;
8716#ifdef DEBUG_PUSH
8717 xmlGenericError(xmlGenericErrorContext,
8718 "PP: Parsing PI\n");
8719#endif
8720 xmlParsePI(ctxt);
8721 } else if ((cur == '<') && (next == '!') &&
8722 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8723 if ((!terminate) &&
8724 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8725 goto done;
8726#ifdef DEBUG_PUSH
8727 xmlGenericError(xmlGenericErrorContext,
8728 "PP: Parsing Comment\n");
8729#endif
8730 xmlParseComment(ctxt);
8731 ctxt->instate = XML_PARSER_CONTENT;
8732 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8733 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8734 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8735 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8736 (ctxt->input->cur[8] == '[')) {
8737 SKIP(9);
8738 ctxt->instate = XML_PARSER_CDATA_SECTION;
8739#ifdef DEBUG_PUSH
8740 xmlGenericError(xmlGenericErrorContext,
8741 "PP: entering CDATA_SECTION\n");
8742#endif
8743 break;
8744 } else if ((cur == '<') && (next == '!') &&
8745 (avail < 9)) {
8746 goto done;
8747 } else if ((cur == '<') && (next == '/')) {
8748 ctxt->instate = XML_PARSER_END_TAG;
8749#ifdef DEBUG_PUSH
8750 xmlGenericError(xmlGenericErrorContext,
8751 "PP: entering END_TAG\n");
8752#endif
8753 break;
8754 } else if (cur == '<') {
8755 ctxt->instate = XML_PARSER_START_TAG;
8756#ifdef DEBUG_PUSH
8757 xmlGenericError(xmlGenericErrorContext,
8758 "PP: entering START_TAG\n");
8759#endif
8760 break;
8761 } else if (cur == '&') {
8762 if ((!terminate) &&
8763 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8764 goto done;
8765#ifdef DEBUG_PUSH
8766 xmlGenericError(xmlGenericErrorContext,
8767 "PP: Parsing Reference\n");
8768#endif
8769 xmlParseReference(ctxt);
8770 } else {
8771 /* TODO Avoid the extra copy, handle directly !!! */
8772 /*
8773 * Goal of the following test is:
8774 * - minimize calls to the SAX 'character' callback
8775 * when they are mergeable
8776 * - handle an problem for isBlank when we only parse
8777 * a sequence of blank chars and the next one is
8778 * not available to check against '<' presence.
8779 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008780 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008781 * of the parser.
8782 */
8783 if ((ctxt->inputNr == 1) &&
8784 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8785 if ((!terminate) &&
8786 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8787 goto done;
8788 }
8789 ctxt->checkIndex = 0;
8790#ifdef DEBUG_PUSH
8791 xmlGenericError(xmlGenericErrorContext,
8792 "PP: Parsing char data\n");
8793#endif
8794 xmlParseCharData(ctxt, 0);
8795 }
8796 /*
8797 * Pop-up of finished entities.
8798 */
8799 while ((RAW == 0) && (ctxt->inputNr > 1))
8800 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008801 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008802 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8803 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8804 ctxt->sax->error(ctxt->userData,
8805 "detected an error in element content\n");
8806 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008807 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008808 ctxt->instate = XML_PARSER_EOF;
8809 break;
8810 }
8811 break;
8812 }
8813 case XML_PARSER_CDATA_SECTION: {
8814 /*
8815 * The Push mode need to have the SAX callback for
8816 * cdataBlock merge back contiguous callbacks.
8817 */
8818 int base;
8819
8820 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8821 if (base < 0) {
8822 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8823 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8824 if (ctxt->sax->cdataBlock != NULL)
8825 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8826 XML_PARSER_BIG_BUFFER_SIZE);
8827 }
8828 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8829 ctxt->checkIndex = 0;
8830 }
8831 goto done;
8832 } else {
8833 if ((ctxt->sax != NULL) && (base > 0) &&
8834 (!ctxt->disableSAX)) {
8835 if (ctxt->sax->cdataBlock != NULL)
8836 ctxt->sax->cdataBlock(ctxt->userData,
8837 ctxt->input->cur, base);
8838 }
8839 SKIP(base + 3);
8840 ctxt->checkIndex = 0;
8841 ctxt->instate = XML_PARSER_CONTENT;
8842#ifdef DEBUG_PUSH
8843 xmlGenericError(xmlGenericErrorContext,
8844 "PP: entering CONTENT\n");
8845#endif
8846 }
8847 break;
8848 }
8849 case XML_PARSER_END_TAG:
8850 if (avail < 2)
8851 goto done;
8852 if ((!terminate) &&
8853 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8854 goto done;
8855 xmlParseEndTag(ctxt);
8856 if (ctxt->name == NULL) {
8857 ctxt->instate = XML_PARSER_EPILOG;
8858#ifdef DEBUG_PUSH
8859 xmlGenericError(xmlGenericErrorContext,
8860 "PP: entering EPILOG\n");
8861#endif
8862 } else {
8863 ctxt->instate = XML_PARSER_CONTENT;
8864#ifdef DEBUG_PUSH
8865 xmlGenericError(xmlGenericErrorContext,
8866 "PP: entering CONTENT\n");
8867#endif
8868 }
8869 break;
8870 case XML_PARSER_DTD: {
8871 /*
8872 * Sorry but progressive parsing of the internal subset
8873 * is not expected to be supported. We first check that
8874 * the full content of the internal subset is available and
8875 * the parsing is launched only at that point.
8876 * Internal subset ends up with "']' S? '>'" in an unescaped
8877 * section and not in a ']]>' sequence which are conditional
8878 * sections (whoever argued to keep that crap in XML deserve
8879 * a place in hell !).
8880 */
8881 int base, i;
8882 xmlChar *buf;
8883 xmlChar quote = 0;
8884
8885 base = ctxt->input->cur - ctxt->input->base;
8886 if (base < 0) return(0);
8887 if (ctxt->checkIndex > base)
8888 base = ctxt->checkIndex;
8889 buf = ctxt->input->buf->buffer->content;
8890 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8891 base++) {
8892 if (quote != 0) {
8893 if (buf[base] == quote)
8894 quote = 0;
8895 continue;
8896 }
8897 if (buf[base] == '"') {
8898 quote = '"';
8899 continue;
8900 }
8901 if (buf[base] == '\'') {
8902 quote = '\'';
8903 continue;
8904 }
8905 if (buf[base] == ']') {
8906 if ((unsigned int) base +1 >=
8907 ctxt->input->buf->buffer->use)
8908 break;
8909 if (buf[base + 1] == ']') {
8910 /* conditional crap, skip both ']' ! */
8911 base++;
8912 continue;
8913 }
8914 for (i = 0;
8915 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8916 i++) {
8917 if (buf[base + i] == '>')
8918 goto found_end_int_subset;
8919 }
8920 break;
8921 }
8922 }
8923 /*
8924 * We didn't found the end of the Internal subset
8925 */
8926 if (quote == 0)
8927 ctxt->checkIndex = base;
8928#ifdef DEBUG_PUSH
8929 if (next == 0)
8930 xmlGenericError(xmlGenericErrorContext,
8931 "PP: lookup of int subset end filed\n");
8932#endif
8933 goto done;
8934
8935found_end_int_subset:
8936 xmlParseInternalSubset(ctxt);
8937 ctxt->inSubset = 2;
8938 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8939 (ctxt->sax->externalSubset != NULL))
8940 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8941 ctxt->extSubSystem, ctxt->extSubURI);
8942 ctxt->inSubset = 0;
8943 ctxt->instate = XML_PARSER_PROLOG;
8944 ctxt->checkIndex = 0;
8945#ifdef DEBUG_PUSH
8946 xmlGenericError(xmlGenericErrorContext,
8947 "PP: entering PROLOG\n");
8948#endif
8949 break;
8950 }
8951 case XML_PARSER_COMMENT:
8952 xmlGenericError(xmlGenericErrorContext,
8953 "PP: internal error, state == COMMENT\n");
8954 ctxt->instate = XML_PARSER_CONTENT;
8955#ifdef DEBUG_PUSH
8956 xmlGenericError(xmlGenericErrorContext,
8957 "PP: entering CONTENT\n");
8958#endif
8959 break;
8960 case XML_PARSER_PI:
8961 xmlGenericError(xmlGenericErrorContext,
8962 "PP: internal error, state == PI\n");
8963 ctxt->instate = XML_PARSER_CONTENT;
8964#ifdef DEBUG_PUSH
8965 xmlGenericError(xmlGenericErrorContext,
8966 "PP: entering CONTENT\n");
8967#endif
8968 break;
8969 case XML_PARSER_ENTITY_DECL:
8970 xmlGenericError(xmlGenericErrorContext,
8971 "PP: internal error, state == ENTITY_DECL\n");
8972 ctxt->instate = XML_PARSER_DTD;
8973#ifdef DEBUG_PUSH
8974 xmlGenericError(xmlGenericErrorContext,
8975 "PP: entering DTD\n");
8976#endif
8977 break;
8978 case XML_PARSER_ENTITY_VALUE:
8979 xmlGenericError(xmlGenericErrorContext,
8980 "PP: internal error, state == ENTITY_VALUE\n");
8981 ctxt->instate = XML_PARSER_CONTENT;
8982#ifdef DEBUG_PUSH
8983 xmlGenericError(xmlGenericErrorContext,
8984 "PP: entering DTD\n");
8985#endif
8986 break;
8987 case XML_PARSER_ATTRIBUTE_VALUE:
8988 xmlGenericError(xmlGenericErrorContext,
8989 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8990 ctxt->instate = XML_PARSER_START_TAG;
8991#ifdef DEBUG_PUSH
8992 xmlGenericError(xmlGenericErrorContext,
8993 "PP: entering START_TAG\n");
8994#endif
8995 break;
8996 case XML_PARSER_SYSTEM_LITERAL:
8997 xmlGenericError(xmlGenericErrorContext,
8998 "PP: internal error, state == SYSTEM_LITERAL\n");
8999 ctxt->instate = XML_PARSER_START_TAG;
9000#ifdef DEBUG_PUSH
9001 xmlGenericError(xmlGenericErrorContext,
9002 "PP: entering START_TAG\n");
9003#endif
9004 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009005 case XML_PARSER_PUBLIC_LITERAL:
9006 xmlGenericError(xmlGenericErrorContext,
9007 "PP: internal error, state == PUBLIC_LITERAL\n");
9008 ctxt->instate = XML_PARSER_START_TAG;
9009#ifdef DEBUG_PUSH
9010 xmlGenericError(xmlGenericErrorContext,
9011 "PP: entering START_TAG\n");
9012#endif
9013 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009014 }
9015 }
9016done:
9017#ifdef DEBUG_PUSH
9018 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9019#endif
9020 return(ret);
9021}
9022
9023/**
Owen Taylor3473f882001-02-23 17:55:21 +00009024 * xmlParseChunk:
9025 * @ctxt: an XML parser context
9026 * @chunk: an char array
9027 * @size: the size in byte of the chunk
9028 * @terminate: last chunk indicator
9029 *
9030 * Parse a Chunk of memory
9031 *
9032 * Returns zero if no error, the xmlParserErrors otherwise.
9033 */
9034int
9035xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9036 int terminate) {
9037 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9038 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9039 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9040 int cur = ctxt->input->cur - ctxt->input->base;
9041
9042 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9043 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9044 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009045 ctxt->input->end =
9046 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009047#ifdef DEBUG_PUSH
9048 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9049#endif
9050
9051 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9052 xmlParseTryOrFinish(ctxt, terminate);
9053 } else if (ctxt->instate != XML_PARSER_EOF) {
9054 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9055 xmlParserInputBufferPtr in = ctxt->input->buf;
9056 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9057 (in->raw != NULL)) {
9058 int nbchars;
9059
9060 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9061 if (nbchars < 0) {
9062 xmlGenericError(xmlGenericErrorContext,
9063 "xmlParseChunk: encoder error\n");
9064 return(XML_ERR_INVALID_ENCODING);
9065 }
9066 }
9067 }
9068 }
9069 xmlParseTryOrFinish(ctxt, terminate);
9070 if (terminate) {
9071 /*
9072 * Check for termination
9073 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009074 int avail = 0;
9075 if (ctxt->input->buf == NULL)
9076 avail = ctxt->input->length -
9077 (ctxt->input->cur - ctxt->input->base);
9078 else
9079 avail = ctxt->input->buf->buffer->use -
9080 (ctxt->input->cur - ctxt->input->base);
9081
Owen Taylor3473f882001-02-23 17:55:21 +00009082 if ((ctxt->instate != XML_PARSER_EOF) &&
9083 (ctxt->instate != XML_PARSER_EPILOG)) {
9084 ctxt->errNo = XML_ERR_DOCUMENT_END;
9085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9086 ctxt->sax->error(ctxt->userData,
9087 "Extra content at the end of the document\n");
9088 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009089 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009090 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009091 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9092 ctxt->errNo = XML_ERR_DOCUMENT_END;
9093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9094 ctxt->sax->error(ctxt->userData,
9095 "Extra content at the end of the document\n");
9096 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009097 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009098
9099 }
Owen Taylor3473f882001-02-23 17:55:21 +00009100 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009101 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009102 ctxt->sax->endDocument(ctxt->userData);
9103 }
9104 ctxt->instate = XML_PARSER_EOF;
9105 }
9106 return((xmlParserErrors) ctxt->errNo);
9107}
9108
9109/************************************************************************
9110 * *
9111 * I/O front end functions to the parser *
9112 * *
9113 ************************************************************************/
9114
9115/**
9116 * xmlStopParser:
9117 * @ctxt: an XML parser context
9118 *
9119 * Blocks further parser processing
9120 */
9121void
9122xmlStopParser(xmlParserCtxtPtr ctxt) {
9123 ctxt->instate = XML_PARSER_EOF;
9124 if (ctxt->input != NULL)
9125 ctxt->input->cur = BAD_CAST"";
9126}
9127
9128/**
9129 * xmlCreatePushParserCtxt:
9130 * @sax: a SAX handler
9131 * @user_data: The user data returned on SAX callbacks
9132 * @chunk: a pointer to an array of chars
9133 * @size: number of chars in the array
9134 * @filename: an optional file name or URI
9135 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009136 * Create a parser context for using the XML parser in push mode.
9137 * If @buffer and @size are non-NULL, the data is used to detect
9138 * the encoding. The remaining characters will be parsed so they
9139 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009140 * To allow content encoding detection, @size should be >= 4
9141 * The value of @filename is used for fetching external entities
9142 * and error/warning reports.
9143 *
9144 * Returns the new parser context or NULL
9145 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009146
Owen Taylor3473f882001-02-23 17:55:21 +00009147xmlParserCtxtPtr
9148xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9149 const char *chunk, int size, const char *filename) {
9150 xmlParserCtxtPtr ctxt;
9151 xmlParserInputPtr inputStream;
9152 xmlParserInputBufferPtr buf;
9153 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9154
9155 /*
9156 * plug some encoding conversion routines
9157 */
9158 if ((chunk != NULL) && (size >= 4))
9159 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9160
9161 buf = xmlAllocParserInputBuffer(enc);
9162 if (buf == NULL) return(NULL);
9163
9164 ctxt = xmlNewParserCtxt();
9165 if (ctxt == NULL) {
9166 xmlFree(buf);
9167 return(NULL);
9168 }
9169 if (sax != NULL) {
9170 if (ctxt->sax != &xmlDefaultSAXHandler)
9171 xmlFree(ctxt->sax);
9172 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9173 if (ctxt->sax == NULL) {
9174 xmlFree(buf);
9175 xmlFree(ctxt);
9176 return(NULL);
9177 }
9178 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9179 if (user_data != NULL)
9180 ctxt->userData = user_data;
9181 }
9182 if (filename == NULL) {
9183 ctxt->directory = NULL;
9184 } else {
9185 ctxt->directory = xmlParserGetDirectory(filename);
9186 }
9187
9188 inputStream = xmlNewInputStream(ctxt);
9189 if (inputStream == NULL) {
9190 xmlFreeParserCtxt(ctxt);
Daniel Veillard77a90a72003-03-22 00:04:05 +00009191 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009192 return(NULL);
9193 }
9194
9195 if (filename == NULL)
9196 inputStream->filename = NULL;
9197 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009198 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009199 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009200 inputStream->buf = buf;
9201 inputStream->base = inputStream->buf->buffer->content;
9202 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009203 inputStream->end =
9204 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009205
9206 inputPush(ctxt, inputStream);
9207
9208 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9209 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009210 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9211 int cur = ctxt->input->cur - ctxt->input->base;
9212
Owen Taylor3473f882001-02-23 17:55:21 +00009213 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009214
9215 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9216 ctxt->input->cur = ctxt->input->base + cur;
9217 ctxt->input->end =
9218 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009219#ifdef DEBUG_PUSH
9220 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9221#endif
9222 }
9223
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009224 if (enc != XML_CHAR_ENCODING_NONE) {
9225 xmlSwitchEncoding(ctxt, enc);
9226 }
9227
Owen Taylor3473f882001-02-23 17:55:21 +00009228 return(ctxt);
9229}
9230
9231/**
9232 * xmlCreateIOParserCtxt:
9233 * @sax: a SAX handler
9234 * @user_data: The user data returned on SAX callbacks
9235 * @ioread: an I/O read function
9236 * @ioclose: an I/O close function
9237 * @ioctx: an I/O handler
9238 * @enc: the charset encoding if known
9239 *
9240 * Create a parser context for using the XML parser with an existing
9241 * I/O stream
9242 *
9243 * Returns the new parser context or NULL
9244 */
9245xmlParserCtxtPtr
9246xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9247 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9248 void *ioctx, xmlCharEncoding enc) {
9249 xmlParserCtxtPtr ctxt;
9250 xmlParserInputPtr inputStream;
9251 xmlParserInputBufferPtr buf;
9252
9253 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9254 if (buf == NULL) return(NULL);
9255
9256 ctxt = xmlNewParserCtxt();
9257 if (ctxt == NULL) {
9258 xmlFree(buf);
9259 return(NULL);
9260 }
9261 if (sax != NULL) {
9262 if (ctxt->sax != &xmlDefaultSAXHandler)
9263 xmlFree(ctxt->sax);
9264 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9265 if (ctxt->sax == NULL) {
9266 xmlFree(buf);
9267 xmlFree(ctxt);
9268 return(NULL);
9269 }
9270 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9271 if (user_data != NULL)
9272 ctxt->userData = user_data;
9273 }
9274
9275 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9276 if (inputStream == NULL) {
9277 xmlFreeParserCtxt(ctxt);
9278 return(NULL);
9279 }
9280 inputPush(ctxt, inputStream);
9281
9282 return(ctxt);
9283}
9284
9285/************************************************************************
9286 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009287 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009288 * *
9289 ************************************************************************/
9290
9291/**
9292 * xmlIOParseDTD:
9293 * @sax: the SAX handler block or NULL
9294 * @input: an Input Buffer
9295 * @enc: the charset encoding if known
9296 *
9297 * Load and parse a DTD
9298 *
9299 * Returns the resulting xmlDtdPtr or NULL in case of error.
9300 * @input will be freed at parsing end.
9301 */
9302
9303xmlDtdPtr
9304xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9305 xmlCharEncoding enc) {
9306 xmlDtdPtr ret = NULL;
9307 xmlParserCtxtPtr ctxt;
9308 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009309 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009310
9311 if (input == NULL)
9312 return(NULL);
9313
9314 ctxt = xmlNewParserCtxt();
9315 if (ctxt == NULL) {
9316 return(NULL);
9317 }
9318
9319 /*
9320 * Set-up the SAX context
9321 */
9322 if (sax != NULL) {
9323 if (ctxt->sax != NULL)
9324 xmlFree(ctxt->sax);
9325 ctxt->sax = sax;
9326 ctxt->userData = NULL;
9327 }
9328
9329 /*
9330 * generate a parser input from the I/O handler
9331 */
9332
9333 pinput = xmlNewIOInputStream(ctxt, input, enc);
9334 if (pinput == NULL) {
9335 if (sax != NULL) ctxt->sax = NULL;
9336 xmlFreeParserCtxt(ctxt);
9337 return(NULL);
9338 }
9339
9340 /*
9341 * plug some encoding conversion routines here.
9342 */
9343 xmlPushInput(ctxt, pinput);
9344
9345 pinput->filename = NULL;
9346 pinput->line = 1;
9347 pinput->col = 1;
9348 pinput->base = ctxt->input->cur;
9349 pinput->cur = ctxt->input->cur;
9350 pinput->free = NULL;
9351
9352 /*
9353 * let's parse that entity knowing it's an external subset.
9354 */
9355 ctxt->inSubset = 2;
9356 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9357 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9358 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009359
9360 if (enc == XML_CHAR_ENCODING_NONE) {
9361 /*
9362 * Get the 4 first bytes and decode the charset
9363 * if enc != XML_CHAR_ENCODING_NONE
9364 * plug some encoding conversion routines.
9365 */
9366 start[0] = RAW;
9367 start[1] = NXT(1);
9368 start[2] = NXT(2);
9369 start[3] = NXT(3);
9370 enc = xmlDetectCharEncoding(start, 4);
9371 if (enc != XML_CHAR_ENCODING_NONE) {
9372 xmlSwitchEncoding(ctxt, enc);
9373 }
9374 }
9375
Owen Taylor3473f882001-02-23 17:55:21 +00009376 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9377
9378 if (ctxt->myDoc != NULL) {
9379 if (ctxt->wellFormed) {
9380 ret = ctxt->myDoc->extSubset;
9381 ctxt->myDoc->extSubset = NULL;
9382 } else {
9383 ret = NULL;
9384 }
9385 xmlFreeDoc(ctxt->myDoc);
9386 ctxt->myDoc = NULL;
9387 }
9388 if (sax != NULL) ctxt->sax = NULL;
9389 xmlFreeParserCtxt(ctxt);
9390
9391 return(ret);
9392}
9393
9394/**
9395 * xmlSAXParseDTD:
9396 * @sax: the SAX handler block
9397 * @ExternalID: a NAME* containing the External ID of the DTD
9398 * @SystemID: a NAME* containing the URL to the DTD
9399 *
9400 * Load and parse an external subset.
9401 *
9402 * Returns the resulting xmlDtdPtr or NULL in case of error.
9403 */
9404
9405xmlDtdPtr
9406xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9407 const xmlChar *SystemID) {
9408 xmlDtdPtr ret = NULL;
9409 xmlParserCtxtPtr ctxt;
9410 xmlParserInputPtr input = NULL;
9411 xmlCharEncoding enc;
9412
9413 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9414
9415 ctxt = xmlNewParserCtxt();
9416 if (ctxt == NULL) {
9417 return(NULL);
9418 }
9419
9420 /*
9421 * Set-up the SAX context
9422 */
9423 if (sax != NULL) {
9424 if (ctxt->sax != NULL)
9425 xmlFree(ctxt->sax);
9426 ctxt->sax = sax;
9427 ctxt->userData = NULL;
9428 }
9429
9430 /*
9431 * Ask the Entity resolver to load the damn thing
9432 */
9433
9434 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9435 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9436 if (input == NULL) {
9437 if (sax != NULL) ctxt->sax = NULL;
9438 xmlFreeParserCtxt(ctxt);
9439 return(NULL);
9440 }
9441
9442 /*
9443 * plug some encoding conversion routines here.
9444 */
9445 xmlPushInput(ctxt, input);
9446 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9447 xmlSwitchEncoding(ctxt, enc);
9448
9449 if (input->filename == NULL)
9450 input->filename = (char *) xmlStrdup(SystemID);
9451 input->line = 1;
9452 input->col = 1;
9453 input->base = ctxt->input->cur;
9454 input->cur = ctxt->input->cur;
9455 input->free = NULL;
9456
9457 /*
9458 * let's parse that entity knowing it's an external subset.
9459 */
9460 ctxt->inSubset = 2;
9461 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9462 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9463 ExternalID, SystemID);
9464 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9465
9466 if (ctxt->myDoc != NULL) {
9467 if (ctxt->wellFormed) {
9468 ret = ctxt->myDoc->extSubset;
9469 ctxt->myDoc->extSubset = NULL;
9470 } else {
9471 ret = NULL;
9472 }
9473 xmlFreeDoc(ctxt->myDoc);
9474 ctxt->myDoc = NULL;
9475 }
9476 if (sax != NULL) ctxt->sax = NULL;
9477 xmlFreeParserCtxt(ctxt);
9478
9479 return(ret);
9480}
9481
9482/**
9483 * xmlParseDTD:
9484 * @ExternalID: a NAME* containing the External ID of the DTD
9485 * @SystemID: a NAME* containing the URL to the DTD
9486 *
9487 * Load and parse an external subset.
9488 *
9489 * Returns the resulting xmlDtdPtr or NULL in case of error.
9490 */
9491
9492xmlDtdPtr
9493xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9494 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9495}
9496
9497/************************************************************************
9498 * *
9499 * Front ends when parsing an Entity *
9500 * *
9501 ************************************************************************/
9502
9503/**
Owen Taylor3473f882001-02-23 17:55:21 +00009504 * xmlParseCtxtExternalEntity:
9505 * @ctx: the existing parsing context
9506 * @URL: the URL for the entity to load
9507 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009508 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009509 *
9510 * Parse an external general entity within an existing parsing context
9511 * An external general parsed entity is well-formed if it matches the
9512 * production labeled extParsedEnt.
9513 *
9514 * [78] extParsedEnt ::= TextDecl? content
9515 *
9516 * Returns 0 if the entity is well formed, -1 in case of args problem and
9517 * the parser error code otherwise
9518 */
9519
9520int
9521xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009522 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009523 xmlParserCtxtPtr ctxt;
9524 xmlDocPtr newDoc;
9525 xmlSAXHandlerPtr oldsax = NULL;
9526 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009527 xmlChar start[4];
9528 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009529
9530 if (ctx->depth > 40) {
9531 return(XML_ERR_ENTITY_LOOP);
9532 }
9533
Daniel Veillardcda96922001-08-21 10:56:31 +00009534 if (lst != NULL)
9535 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009536 if ((URL == NULL) && (ID == NULL))
9537 return(-1);
9538 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9539 return(-1);
9540
9541
9542 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9543 if (ctxt == NULL) return(-1);
9544 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +00009545 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +00009546 oldsax = ctxt->sax;
9547 ctxt->sax = ctx->sax;
9548 newDoc = xmlNewDoc(BAD_CAST "1.0");
9549 if (newDoc == NULL) {
9550 xmlFreeParserCtxt(ctxt);
9551 return(-1);
9552 }
9553 if (ctx->myDoc != NULL) {
9554 newDoc->intSubset = ctx->myDoc->intSubset;
9555 newDoc->extSubset = ctx->myDoc->extSubset;
9556 }
9557 if (ctx->myDoc->URL != NULL) {
9558 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9559 }
9560 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9561 if (newDoc->children == NULL) {
9562 ctxt->sax = oldsax;
9563 xmlFreeParserCtxt(ctxt);
9564 newDoc->intSubset = NULL;
9565 newDoc->extSubset = NULL;
9566 xmlFreeDoc(newDoc);
9567 return(-1);
9568 }
9569 nodePush(ctxt, newDoc->children);
9570 if (ctx->myDoc == NULL) {
9571 ctxt->myDoc = newDoc;
9572 } else {
9573 ctxt->myDoc = ctx->myDoc;
9574 newDoc->children->doc = ctx->myDoc;
9575 }
9576
Daniel Veillard87a764e2001-06-20 17:41:10 +00009577 /*
9578 * Get the 4 first bytes and decode the charset
9579 * if enc != XML_CHAR_ENCODING_NONE
9580 * plug some encoding conversion routines.
9581 */
9582 GROW
9583 start[0] = RAW;
9584 start[1] = NXT(1);
9585 start[2] = NXT(2);
9586 start[3] = NXT(3);
9587 enc = xmlDetectCharEncoding(start, 4);
9588 if (enc != XML_CHAR_ENCODING_NONE) {
9589 xmlSwitchEncoding(ctxt, enc);
9590 }
9591
Owen Taylor3473f882001-02-23 17:55:21 +00009592 /*
9593 * Parse a possible text declaration first
9594 */
Owen Taylor3473f882001-02-23 17:55:21 +00009595 if ((RAW == '<') && (NXT(1) == '?') &&
9596 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9597 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9598 xmlParseTextDecl(ctxt);
9599 }
9600
9601 /*
9602 * Doing validity checking on chunk doesn't make sense
9603 */
9604 ctxt->instate = XML_PARSER_CONTENT;
9605 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009606 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009607 ctxt->loadsubset = ctx->loadsubset;
9608 ctxt->depth = ctx->depth + 1;
9609 ctxt->replaceEntities = ctx->replaceEntities;
9610 if (ctxt->validate) {
9611 ctxt->vctxt.error = ctx->vctxt.error;
9612 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009613 } else {
9614 ctxt->vctxt.error = NULL;
9615 ctxt->vctxt.warning = NULL;
9616 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009617 ctxt->vctxt.nodeTab = NULL;
9618 ctxt->vctxt.nodeNr = 0;
9619 ctxt->vctxt.nodeMax = 0;
9620 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009621
9622 xmlParseContent(ctxt);
9623
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009624 ctx->validate = ctxt->validate;
9625 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009626 if ((RAW == '<') && (NXT(1) == '/')) {
9627 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9629 ctxt->sax->error(ctxt->userData,
9630 "chunk is not well balanced\n");
9631 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009632 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009633 } else if (RAW != 0) {
9634 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9636 ctxt->sax->error(ctxt->userData,
9637 "extra content at the end of well balanced chunk\n");
9638 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009639 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009640 }
9641 if (ctxt->node != newDoc->children) {
9642 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9644 ctxt->sax->error(ctxt->userData,
9645 "chunk is not well balanced\n");
9646 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009647 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009648 }
9649
9650 if (!ctxt->wellFormed) {
9651 if (ctxt->errNo == 0)
9652 ret = 1;
9653 else
9654 ret = ctxt->errNo;
9655 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009656 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009657 xmlNodePtr cur;
9658
9659 /*
9660 * Return the newly created nodeset after unlinking it from
9661 * they pseudo parent.
9662 */
9663 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009664 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009665 while (cur != NULL) {
9666 cur->parent = NULL;
9667 cur = cur->next;
9668 }
9669 newDoc->children->children = NULL;
9670 }
9671 ret = 0;
9672 }
9673 ctxt->sax = oldsax;
9674 xmlFreeParserCtxt(ctxt);
9675 newDoc->intSubset = NULL;
9676 newDoc->extSubset = NULL;
9677 xmlFreeDoc(newDoc);
9678
9679 return(ret);
9680}
9681
9682/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009683 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009684 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009685 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009686 * @sax: the SAX handler bloc (possibly NULL)
9687 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9688 * @depth: Used for loop detection, use 0
9689 * @URL: the URL for the entity to load
9690 * @ID: the System ID for the entity to load
9691 * @list: the return value for the set of parsed nodes
9692 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009693 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009694 *
9695 * Returns 0 if the entity is well formed, -1 in case of args problem and
9696 * the parser error code otherwise
9697 */
9698
Daniel Veillard257d9102001-05-08 10:41:44 +00009699static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009700xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9701 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009702 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009703 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009704 xmlParserCtxtPtr ctxt;
9705 xmlDocPtr newDoc;
9706 xmlSAXHandlerPtr oldsax = NULL;
9707 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009708 xmlChar start[4];
9709 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009710
9711 if (depth > 40) {
9712 return(XML_ERR_ENTITY_LOOP);
9713 }
9714
9715
9716
9717 if (list != NULL)
9718 *list = NULL;
9719 if ((URL == NULL) && (ID == NULL))
9720 return(-1);
9721 if (doc == NULL) /* @@ relax but check for dereferences */
9722 return(-1);
9723
9724
9725 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9726 if (ctxt == NULL) return(-1);
9727 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009728 if (oldctxt != NULL) {
9729 ctxt->_private = oldctxt->_private;
9730 ctxt->loadsubset = oldctxt->loadsubset;
9731 ctxt->validate = oldctxt->validate;
9732 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009733 ctxt->record_info = oldctxt->record_info;
9734 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
9735 ctxt->node_seq.length = oldctxt->node_seq.length;
9736 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009737 } else {
9738 /*
9739 * Doing validity checking on chunk without context
9740 * doesn't make sense
9741 */
9742 ctxt->_private = NULL;
9743 ctxt->validate = 0;
9744 ctxt->external = 2;
9745 ctxt->loadsubset = 0;
9746 }
Owen Taylor3473f882001-02-23 17:55:21 +00009747 if (sax != NULL) {
9748 oldsax = ctxt->sax;
9749 ctxt->sax = sax;
9750 if (user_data != NULL)
9751 ctxt->userData = user_data;
9752 }
9753 newDoc = xmlNewDoc(BAD_CAST "1.0");
9754 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009755 ctxt->node_seq.maximum = 0;
9756 ctxt->node_seq.length = 0;
9757 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009758 xmlFreeParserCtxt(ctxt);
9759 return(-1);
9760 }
9761 if (doc != NULL) {
9762 newDoc->intSubset = doc->intSubset;
9763 newDoc->extSubset = doc->extSubset;
9764 }
9765 if (doc->URL != NULL) {
9766 newDoc->URL = xmlStrdup(doc->URL);
9767 }
9768 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9769 if (newDoc->children == NULL) {
9770 if (sax != NULL)
9771 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009772 ctxt->node_seq.maximum = 0;
9773 ctxt->node_seq.length = 0;
9774 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009775 xmlFreeParserCtxt(ctxt);
9776 newDoc->intSubset = NULL;
9777 newDoc->extSubset = NULL;
9778 xmlFreeDoc(newDoc);
9779 return(-1);
9780 }
9781 nodePush(ctxt, newDoc->children);
9782 if (doc == NULL) {
9783 ctxt->myDoc = newDoc;
9784 } else {
9785 ctxt->myDoc = doc;
9786 newDoc->children->doc = doc;
9787 }
9788
Daniel Veillard87a764e2001-06-20 17:41:10 +00009789 /*
9790 * Get the 4 first bytes and decode the charset
9791 * if enc != XML_CHAR_ENCODING_NONE
9792 * plug some encoding conversion routines.
9793 */
9794 GROW;
9795 start[0] = RAW;
9796 start[1] = NXT(1);
9797 start[2] = NXT(2);
9798 start[3] = NXT(3);
9799 enc = xmlDetectCharEncoding(start, 4);
9800 if (enc != XML_CHAR_ENCODING_NONE) {
9801 xmlSwitchEncoding(ctxt, enc);
9802 }
9803
Owen Taylor3473f882001-02-23 17:55:21 +00009804 /*
9805 * Parse a possible text declaration first
9806 */
Owen Taylor3473f882001-02-23 17:55:21 +00009807 if ((RAW == '<') && (NXT(1) == '?') &&
9808 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9809 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9810 xmlParseTextDecl(ctxt);
9811 }
9812
Owen Taylor3473f882001-02-23 17:55:21 +00009813 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009814 ctxt->depth = depth;
9815
9816 xmlParseContent(ctxt);
9817
Daniel Veillard561b7f82002-03-20 21:55:57 +00009818 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009819 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9821 ctxt->sax->error(ctxt->userData,
9822 "chunk is not well balanced\n");
9823 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009824 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009825 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009826 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9828 ctxt->sax->error(ctxt->userData,
9829 "extra content at the end of well balanced chunk\n");
9830 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009831 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009832 }
9833 if (ctxt->node != newDoc->children) {
9834 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9836 ctxt->sax->error(ctxt->userData,
9837 "chunk is not well balanced\n");
9838 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009839 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009840 }
9841
9842 if (!ctxt->wellFormed) {
9843 if (ctxt->errNo == 0)
9844 ret = 1;
9845 else
9846 ret = ctxt->errNo;
9847 } else {
9848 if (list != NULL) {
9849 xmlNodePtr cur;
9850
9851 /*
9852 * Return the newly created nodeset after unlinking it from
9853 * they pseudo parent.
9854 */
9855 cur = newDoc->children->children;
9856 *list = cur;
9857 while (cur != NULL) {
9858 cur->parent = NULL;
9859 cur = cur->next;
9860 }
9861 newDoc->children->children = NULL;
9862 }
9863 ret = 0;
9864 }
9865 if (sax != NULL)
9866 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +00009867 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
9868 oldctxt->node_seq.length = ctxt->node_seq.length;
9869 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009870 ctxt->node_seq.maximum = 0;
9871 ctxt->node_seq.length = 0;
9872 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009873 xmlFreeParserCtxt(ctxt);
9874 newDoc->intSubset = NULL;
9875 newDoc->extSubset = NULL;
9876 xmlFreeDoc(newDoc);
9877
9878 return(ret);
9879}
9880
9881/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009882 * xmlParseExternalEntity:
9883 * @doc: the document the chunk pertains to
9884 * @sax: the SAX handler bloc (possibly NULL)
9885 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9886 * @depth: Used for loop detection, use 0
9887 * @URL: the URL for the entity to load
9888 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009889 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009890 *
9891 * Parse an external general entity
9892 * An external general parsed entity is well-formed if it matches the
9893 * production labeled extParsedEnt.
9894 *
9895 * [78] extParsedEnt ::= TextDecl? content
9896 *
9897 * Returns 0 if the entity is well formed, -1 in case of args problem and
9898 * the parser error code otherwise
9899 */
9900
9901int
9902xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009903 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009904 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009905 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009906}
9907
9908/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009909 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009910 * @doc: the document the chunk pertains to
9911 * @sax: the SAX handler bloc (possibly NULL)
9912 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9913 * @depth: Used for loop detection, use 0
9914 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009915 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009916 *
9917 * Parse a well-balanced chunk of an XML document
9918 * called by the parser
9919 * The allowed sequence for the Well Balanced Chunk is the one defined by
9920 * the content production in the XML grammar:
9921 *
9922 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9923 *
9924 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9925 * the parser error code otherwise
9926 */
9927
9928int
9929xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009930 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009931 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9932 depth, string, lst, 0 );
9933}
9934
9935/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009936 * xmlParseBalancedChunkMemoryInternal:
9937 * @oldctxt: the existing parsing context
9938 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9939 * @user_data: the user data field for the parser context
9940 * @lst: the return value for the set of parsed nodes
9941 *
9942 *
9943 * Parse a well-balanced chunk of an XML document
9944 * called by the parser
9945 * The allowed sequence for the Well Balanced Chunk is the one defined by
9946 * the content production in the XML grammar:
9947 *
9948 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9949 *
9950 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9951 * the parser error code otherwise
9952 *
9953 * In case recover is set to 1, the nodelist will not be empty even if
9954 * the parsed chunk is not well balanced.
9955 */
9956static int
9957xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9958 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9959 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009960 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009961 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009962 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009963 int size;
9964 int ret = 0;
9965
9966 if (oldctxt->depth > 40) {
9967 return(XML_ERR_ENTITY_LOOP);
9968 }
9969
9970
9971 if (lst != NULL)
9972 *lst = NULL;
9973 if (string == NULL)
9974 return(-1);
9975
9976 size = xmlStrlen(string);
9977
9978 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9979 if (ctxt == NULL) return(-1);
9980 if (user_data != NULL)
9981 ctxt->userData = user_data;
9982 else
9983 ctxt->userData = ctxt;
9984
9985 oldsax = ctxt->sax;
9986 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +00009987 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009988 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009989 newDoc = xmlNewDoc(BAD_CAST "1.0");
9990 if (newDoc == NULL) {
9991 ctxt->sax = oldsax;
9992 xmlFreeParserCtxt(ctxt);
9993 return(-1);
9994 }
Daniel Veillard328f48c2002-11-15 15:24:34 +00009995 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009996 } else {
9997 ctxt->myDoc = oldctxt->myDoc;
9998 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009999 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010000 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010001 BAD_CAST "pseudoroot", NULL);
10002 if (ctxt->myDoc->children == NULL) {
10003 ctxt->sax = oldsax;
10004 xmlFreeParserCtxt(ctxt);
10005 if (newDoc != NULL)
10006 xmlFreeDoc(newDoc);
10007 return(-1);
10008 }
10009 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010010 ctxt->instate = XML_PARSER_CONTENT;
10011 ctxt->depth = oldctxt->depth + 1;
10012
Daniel Veillard328f48c2002-11-15 15:24:34 +000010013 ctxt->validate = 0;
10014 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010015 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10016 /*
10017 * ID/IDREF registration will be done in xmlValidateElement below
10018 */
10019 ctxt->loadsubset |= XML_SKIP_IDS;
10020 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010021
Daniel Veillard68e9e742002-11-16 15:35:11 +000010022 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010023 if ((RAW == '<') && (NXT(1) == '/')) {
10024 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10026 ctxt->sax->error(ctxt->userData,
10027 "chunk is not well balanced\n");
10028 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010029 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010030 } else if (RAW != 0) {
10031 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10033 ctxt->sax->error(ctxt->userData,
10034 "extra content at the end of well balanced chunk\n");
10035 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010036 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010037 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010038 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010039 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10041 ctxt->sax->error(ctxt->userData,
10042 "chunk is not well balanced\n");
10043 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010044 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010045 }
10046
10047 if (!ctxt->wellFormed) {
10048 if (ctxt->errNo == 0)
10049 ret = 1;
10050 else
10051 ret = ctxt->errNo;
10052 } else {
10053 ret = 0;
10054 }
10055
10056 if ((lst != NULL) && (ret == 0)) {
10057 xmlNodePtr cur;
10058
10059 /*
10060 * Return the newly created nodeset after unlinking it from
10061 * they pseudo parent.
10062 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010063 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010064 *lst = cur;
10065 while (cur != NULL) {
Daniel Veillard8d589042003-02-04 15:07:21 +000010066 if (oldctxt->validate && oldctxt->wellFormed &&
10067 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10068 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10069 oldctxt->myDoc, cur);
10070 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010071 cur->parent = NULL;
10072 cur = cur->next;
10073 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010074 ctxt->myDoc->children->children = NULL;
10075 }
10076 if (ctxt->myDoc != NULL) {
10077 xmlFreeNode(ctxt->myDoc->children);
10078 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010079 }
10080
10081 ctxt->sax = oldsax;
10082 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010083 if (newDoc != NULL)
10084 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010085
10086 return(ret);
10087}
10088
10089/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010090 * xmlParseBalancedChunkMemoryRecover:
10091 * @doc: the document the chunk pertains to
10092 * @sax: the SAX handler bloc (possibly NULL)
10093 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10094 * @depth: Used for loop detection, use 0
10095 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10096 * @lst: the return value for the set of parsed nodes
10097 * @recover: return nodes even if the data is broken (use 0)
10098 *
10099 *
10100 * Parse a well-balanced chunk of an XML document
10101 * called by the parser
10102 * The allowed sequence for the Well Balanced Chunk is the one defined by
10103 * the content production in the XML grammar:
10104 *
10105 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10106 *
10107 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10108 * the parser error code otherwise
10109 *
10110 * In case recover is set to 1, the nodelist will not be empty even if
10111 * the parsed chunk is not well balanced.
10112 */
10113int
10114xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10115 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10116 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010117 xmlParserCtxtPtr ctxt;
10118 xmlDocPtr newDoc;
10119 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010120 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010121 int size;
10122 int ret = 0;
10123
10124 if (depth > 40) {
10125 return(XML_ERR_ENTITY_LOOP);
10126 }
10127
10128
Daniel Veillardcda96922001-08-21 10:56:31 +000010129 if (lst != NULL)
10130 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010131 if (string == NULL)
10132 return(-1);
10133
10134 size = xmlStrlen(string);
10135
10136 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10137 if (ctxt == NULL) return(-1);
10138 ctxt->userData = ctxt;
10139 if (sax != NULL) {
10140 oldsax = ctxt->sax;
10141 ctxt->sax = sax;
10142 if (user_data != NULL)
10143 ctxt->userData = user_data;
10144 }
10145 newDoc = xmlNewDoc(BAD_CAST "1.0");
10146 if (newDoc == NULL) {
10147 xmlFreeParserCtxt(ctxt);
10148 return(-1);
10149 }
10150 if (doc != NULL) {
10151 newDoc->intSubset = doc->intSubset;
10152 newDoc->extSubset = doc->extSubset;
10153 }
10154 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10155 if (newDoc->children == NULL) {
10156 if (sax != NULL)
10157 ctxt->sax = oldsax;
10158 xmlFreeParserCtxt(ctxt);
10159 newDoc->intSubset = NULL;
10160 newDoc->extSubset = NULL;
10161 xmlFreeDoc(newDoc);
10162 return(-1);
10163 }
10164 nodePush(ctxt, newDoc->children);
10165 if (doc == NULL) {
10166 ctxt->myDoc = newDoc;
10167 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010168 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010169 newDoc->children->doc = doc;
10170 }
10171 ctxt->instate = XML_PARSER_CONTENT;
10172 ctxt->depth = depth;
10173
10174 /*
10175 * Doing validity checking on chunk doesn't make sense
10176 */
10177 ctxt->validate = 0;
10178 ctxt->loadsubset = 0;
10179
Daniel Veillardb39bc392002-10-26 19:29:51 +000010180 if ( doc != NULL ){
10181 content = doc->children;
10182 doc->children = NULL;
10183 xmlParseContent(ctxt);
10184 doc->children = content;
10185 }
10186 else {
10187 xmlParseContent(ctxt);
10188 }
Owen Taylor3473f882001-02-23 17:55:21 +000010189 if ((RAW == '<') && (NXT(1) == '/')) {
10190 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10192 ctxt->sax->error(ctxt->userData,
10193 "chunk is not well balanced\n");
10194 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010195 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010196 } else if (RAW != 0) {
10197 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10199 ctxt->sax->error(ctxt->userData,
10200 "extra content at the end of well balanced chunk\n");
10201 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010202 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010203 }
10204 if (ctxt->node != newDoc->children) {
10205 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10207 ctxt->sax->error(ctxt->userData,
10208 "chunk is not well balanced\n");
10209 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010211 }
10212
10213 if (!ctxt->wellFormed) {
10214 if (ctxt->errNo == 0)
10215 ret = 1;
10216 else
10217 ret = ctxt->errNo;
10218 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010219 ret = 0;
10220 }
10221
10222 if (lst != NULL && (ret == 0 || recover == 1)) {
10223 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010224
10225 /*
10226 * Return the newly created nodeset after unlinking it from
10227 * they pseudo parent.
10228 */
10229 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010230 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010231 while (cur != NULL) {
10232 cur->parent = NULL;
10233 cur = cur->next;
10234 }
10235 newDoc->children->children = NULL;
10236 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010237
Owen Taylor3473f882001-02-23 17:55:21 +000010238 if (sax != NULL)
10239 ctxt->sax = oldsax;
10240 xmlFreeParserCtxt(ctxt);
10241 newDoc->intSubset = NULL;
10242 newDoc->extSubset = NULL;
10243 xmlFreeDoc(newDoc);
10244
10245 return(ret);
10246}
10247
10248/**
10249 * xmlSAXParseEntity:
10250 * @sax: the SAX handler block
10251 * @filename: the filename
10252 *
10253 * parse an XML external entity out of context and build a tree.
10254 * It use the given SAX function block to handle the parsing callback.
10255 * If sax is NULL, fallback to the default DOM tree building routines.
10256 *
10257 * [78] extParsedEnt ::= TextDecl? content
10258 *
10259 * This correspond to a "Well Balanced" chunk
10260 *
10261 * Returns the resulting document tree
10262 */
10263
10264xmlDocPtr
10265xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10266 xmlDocPtr ret;
10267 xmlParserCtxtPtr ctxt;
10268 char *directory = NULL;
10269
10270 ctxt = xmlCreateFileParserCtxt(filename);
10271 if (ctxt == NULL) {
10272 return(NULL);
10273 }
10274 if (sax != NULL) {
10275 if (ctxt->sax != NULL)
10276 xmlFree(ctxt->sax);
10277 ctxt->sax = sax;
10278 ctxt->userData = NULL;
10279 }
10280
10281 if ((ctxt->directory == NULL) && (directory == NULL))
10282 directory = xmlParserGetDirectory(filename);
10283
10284 xmlParseExtParsedEnt(ctxt);
10285
10286 if (ctxt->wellFormed)
10287 ret = ctxt->myDoc;
10288 else {
10289 ret = NULL;
10290 xmlFreeDoc(ctxt->myDoc);
10291 ctxt->myDoc = NULL;
10292 }
10293 if (sax != NULL)
10294 ctxt->sax = NULL;
10295 xmlFreeParserCtxt(ctxt);
10296
10297 return(ret);
10298}
10299
10300/**
10301 * xmlParseEntity:
10302 * @filename: the filename
10303 *
10304 * parse an XML external entity out of context and build a tree.
10305 *
10306 * [78] extParsedEnt ::= TextDecl? content
10307 *
10308 * This correspond to a "Well Balanced" chunk
10309 *
10310 * Returns the resulting document tree
10311 */
10312
10313xmlDocPtr
10314xmlParseEntity(const char *filename) {
10315 return(xmlSAXParseEntity(NULL, filename));
10316}
10317
10318/**
10319 * xmlCreateEntityParserCtxt:
10320 * @URL: the entity URL
10321 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010322 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010323 *
10324 * Create a parser context for an external entity
10325 * Automatic support for ZLIB/Compress compressed document is provided
10326 * by default if found at compile-time.
10327 *
10328 * Returns the new parser context or NULL
10329 */
10330xmlParserCtxtPtr
10331xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10332 const xmlChar *base) {
10333 xmlParserCtxtPtr ctxt;
10334 xmlParserInputPtr inputStream;
10335 char *directory = NULL;
10336 xmlChar *uri;
10337
10338 ctxt = xmlNewParserCtxt();
10339 if (ctxt == NULL) {
10340 return(NULL);
10341 }
10342
10343 uri = xmlBuildURI(URL, base);
10344
10345 if (uri == NULL) {
10346 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10347 if (inputStream == NULL) {
10348 xmlFreeParserCtxt(ctxt);
10349 return(NULL);
10350 }
10351
10352 inputPush(ctxt, inputStream);
10353
10354 if ((ctxt->directory == NULL) && (directory == NULL))
10355 directory = xmlParserGetDirectory((char *)URL);
10356 if ((ctxt->directory == NULL) && (directory != NULL))
10357 ctxt->directory = directory;
10358 } else {
10359 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10360 if (inputStream == NULL) {
10361 xmlFree(uri);
10362 xmlFreeParserCtxt(ctxt);
10363 return(NULL);
10364 }
10365
10366 inputPush(ctxt, inputStream);
10367
10368 if ((ctxt->directory == NULL) && (directory == NULL))
10369 directory = xmlParserGetDirectory((char *)uri);
10370 if ((ctxt->directory == NULL) && (directory != NULL))
10371 ctxt->directory = directory;
10372 xmlFree(uri);
10373 }
10374
10375 return(ctxt);
10376}
10377
10378/************************************************************************
10379 * *
10380 * Front ends when parsing from a file *
10381 * *
10382 ************************************************************************/
10383
10384/**
10385 * xmlCreateFileParserCtxt:
10386 * @filename: the filename
10387 *
10388 * Create a parser context for a file content.
10389 * Automatic support for ZLIB/Compress compressed document is provided
10390 * by default if found at compile-time.
10391 *
10392 * Returns the new parser context or NULL
10393 */
10394xmlParserCtxtPtr
10395xmlCreateFileParserCtxt(const char *filename)
10396{
10397 xmlParserCtxtPtr ctxt;
10398 xmlParserInputPtr inputStream;
Igor Zlatkovicce076162003-02-23 13:39:39 +000010399 char *canonicFilename;
Owen Taylor3473f882001-02-23 17:55:21 +000010400 char *directory = NULL;
10401
Owen Taylor3473f882001-02-23 17:55:21 +000010402 ctxt = xmlNewParserCtxt();
10403 if (ctxt == NULL) {
10404 if (xmlDefaultSAXHandler.error != NULL) {
10405 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10406 }
10407 return(NULL);
10408 }
10409
Daniel Veillardc64b8e92003-02-24 11:47:13 +000010410 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename);
Igor Zlatkovicce076162003-02-23 13:39:39 +000010411 if (canonicFilename == NULL) {
10412 if (xmlDefaultSAXHandler.error != NULL) {
10413 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10414 }
10415 return(NULL);
10416 }
10417
10418 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
10419 xmlFree(canonicFilename);
Owen Taylor3473f882001-02-23 17:55:21 +000010420 if (inputStream == NULL) {
10421 xmlFreeParserCtxt(ctxt);
10422 return(NULL);
10423 }
10424
Owen Taylor3473f882001-02-23 17:55:21 +000010425 inputPush(ctxt, inputStream);
10426 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010427 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010428 if ((ctxt->directory == NULL) && (directory != NULL))
10429 ctxt->directory = directory;
10430
10431 return(ctxt);
10432}
10433
10434/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010435 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010436 * @sax: the SAX handler block
10437 * @filename: the filename
10438 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10439 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010440 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010441 *
10442 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10443 * compressed document is provided by default if found at compile-time.
10444 * It use the given SAX function block to handle the parsing callback.
10445 * If sax is NULL, fallback to the default DOM tree building routines.
10446 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010447 * User data (void *) is stored within the parser context in the
10448 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010449 *
Owen Taylor3473f882001-02-23 17:55:21 +000010450 * Returns the resulting document tree
10451 */
10452
10453xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010454xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10455 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010456 xmlDocPtr ret;
10457 xmlParserCtxtPtr ctxt;
10458 char *directory = NULL;
10459
Daniel Veillard635ef722001-10-29 11:48:19 +000010460 xmlInitParser();
10461
Owen Taylor3473f882001-02-23 17:55:21 +000010462 ctxt = xmlCreateFileParserCtxt(filename);
10463 if (ctxt == NULL) {
10464 return(NULL);
10465 }
10466 if (sax != NULL) {
10467 if (ctxt->sax != NULL)
10468 xmlFree(ctxt->sax);
10469 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010470 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010471 if (data!=NULL) {
10472 ctxt->_private=data;
10473 }
Owen Taylor3473f882001-02-23 17:55:21 +000010474
10475 if ((ctxt->directory == NULL) && (directory == NULL))
10476 directory = xmlParserGetDirectory(filename);
10477 if ((ctxt->directory == NULL) && (directory != NULL))
10478 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10479
Daniel Veillarddad3f682002-11-17 16:47:27 +000010480 ctxt->recovery = recovery;
10481
Owen Taylor3473f882001-02-23 17:55:21 +000010482 xmlParseDocument(ctxt);
10483
10484 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10485 else {
10486 ret = NULL;
10487 xmlFreeDoc(ctxt->myDoc);
10488 ctxt->myDoc = NULL;
10489 }
10490 if (sax != NULL)
10491 ctxt->sax = NULL;
10492 xmlFreeParserCtxt(ctxt);
10493
10494 return(ret);
10495}
10496
10497/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010498 * xmlSAXParseFile:
10499 * @sax: the SAX handler block
10500 * @filename: the filename
10501 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10502 * documents
10503 *
10504 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10505 * compressed document is provided by default if found at compile-time.
10506 * It use the given SAX function block to handle the parsing callback.
10507 * If sax is NULL, fallback to the default DOM tree building routines.
10508 *
10509 * Returns the resulting document tree
10510 */
10511
10512xmlDocPtr
10513xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10514 int recovery) {
10515 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10516}
10517
10518/**
Owen Taylor3473f882001-02-23 17:55:21 +000010519 * xmlRecoverDoc:
10520 * @cur: a pointer to an array of xmlChar
10521 *
10522 * parse an XML in-memory document and build a tree.
10523 * In the case the document is not Well Formed, a tree is built anyway
10524 *
10525 * Returns the resulting document tree
10526 */
10527
10528xmlDocPtr
10529xmlRecoverDoc(xmlChar *cur) {
10530 return(xmlSAXParseDoc(NULL, cur, 1));
10531}
10532
10533/**
10534 * xmlParseFile:
10535 * @filename: the filename
10536 *
10537 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10538 * compressed document is provided by default if found at compile-time.
10539 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010540 * Returns the resulting document tree if the file was wellformed,
10541 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010542 */
10543
10544xmlDocPtr
10545xmlParseFile(const char *filename) {
10546 return(xmlSAXParseFile(NULL, filename, 0));
10547}
10548
10549/**
10550 * xmlRecoverFile:
10551 * @filename: the filename
10552 *
10553 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10554 * compressed document is provided by default if found at compile-time.
10555 * In the case the document is not Well Formed, a tree is built anyway
10556 *
10557 * Returns the resulting document tree
10558 */
10559
10560xmlDocPtr
10561xmlRecoverFile(const char *filename) {
10562 return(xmlSAXParseFile(NULL, filename, 1));
10563}
10564
10565
10566/**
10567 * xmlSetupParserForBuffer:
10568 * @ctxt: an XML parser context
10569 * @buffer: a xmlChar * buffer
10570 * @filename: a file name
10571 *
10572 * Setup the parser context to parse a new buffer; Clears any prior
10573 * contents from the parser context. The buffer parameter must not be
10574 * NULL, but the filename parameter can be
10575 */
10576void
10577xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10578 const char* filename)
10579{
10580 xmlParserInputPtr input;
10581
10582 input = xmlNewInputStream(ctxt);
10583 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010584 xmlGenericError(xmlGenericErrorContext,
10585 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010586 xmlFree(ctxt);
10587 return;
10588 }
10589
10590 xmlClearParserCtxt(ctxt);
10591 if (filename != NULL)
10592 input->filename = xmlMemStrdup(filename);
10593 input->base = buffer;
10594 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010595 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010596 inputPush(ctxt, input);
10597}
10598
10599/**
10600 * xmlSAXUserParseFile:
10601 * @sax: a SAX handler
10602 * @user_data: The user data returned on SAX callbacks
10603 * @filename: a file name
10604 *
10605 * parse an XML file and call the given SAX handler routines.
10606 * Automatic support for ZLIB/Compress compressed document is provided
10607 *
10608 * Returns 0 in case of success or a error number otherwise
10609 */
10610int
10611xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10612 const char *filename) {
10613 int ret = 0;
10614 xmlParserCtxtPtr ctxt;
10615
10616 ctxt = xmlCreateFileParserCtxt(filename);
10617 if (ctxt == NULL) return -1;
10618 if (ctxt->sax != &xmlDefaultSAXHandler)
10619 xmlFree(ctxt->sax);
10620 ctxt->sax = sax;
10621 if (user_data != NULL)
10622 ctxt->userData = user_data;
10623
10624 xmlParseDocument(ctxt);
10625
10626 if (ctxt->wellFormed)
10627 ret = 0;
10628 else {
10629 if (ctxt->errNo != 0)
10630 ret = ctxt->errNo;
10631 else
10632 ret = -1;
10633 }
10634 if (sax != NULL)
10635 ctxt->sax = NULL;
10636 xmlFreeParserCtxt(ctxt);
10637
10638 return ret;
10639}
10640
10641/************************************************************************
10642 * *
10643 * Front ends when parsing from memory *
10644 * *
10645 ************************************************************************/
10646
10647/**
10648 * xmlCreateMemoryParserCtxt:
10649 * @buffer: a pointer to a char array
10650 * @size: the size of the array
10651 *
10652 * Create a parser context for an XML in-memory document.
10653 *
10654 * Returns the new parser context or NULL
10655 */
10656xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010657xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010658 xmlParserCtxtPtr ctxt;
10659 xmlParserInputPtr input;
10660 xmlParserInputBufferPtr buf;
10661
10662 if (buffer == NULL)
10663 return(NULL);
10664 if (size <= 0)
10665 return(NULL);
10666
10667 ctxt = xmlNewParserCtxt();
10668 if (ctxt == NULL)
10669 return(NULL);
10670
10671 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010672 if (buf == NULL) {
10673 xmlFreeParserCtxt(ctxt);
10674 return(NULL);
10675 }
Owen Taylor3473f882001-02-23 17:55:21 +000010676
10677 input = xmlNewInputStream(ctxt);
10678 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010679 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010680 xmlFreeParserCtxt(ctxt);
10681 return(NULL);
10682 }
10683
10684 input->filename = NULL;
10685 input->buf = buf;
10686 input->base = input->buf->buffer->content;
10687 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010688 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010689
10690 inputPush(ctxt, input);
10691 return(ctxt);
10692}
10693
10694/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010695 * xmlSAXParseMemoryWithData:
10696 * @sax: the SAX handler block
10697 * @buffer: an pointer to a char array
10698 * @size: the size of the array
10699 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10700 * documents
10701 * @data: the userdata
10702 *
10703 * parse an XML in-memory block and use the given SAX function block
10704 * to handle the parsing callback. If sax is NULL, fallback to the default
10705 * DOM tree building routines.
10706 *
10707 * User data (void *) is stored within the parser context in the
10708 * context's _private member, so it is available nearly everywhere in libxml
10709 *
10710 * Returns the resulting document tree
10711 */
10712
10713xmlDocPtr
10714xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10715 int size, int recovery, void *data) {
10716 xmlDocPtr ret;
10717 xmlParserCtxtPtr ctxt;
10718
10719 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10720 if (ctxt == NULL) return(NULL);
10721 if (sax != NULL) {
10722 if (ctxt->sax != NULL)
10723 xmlFree(ctxt->sax);
10724 ctxt->sax = sax;
10725 }
10726 if (data!=NULL) {
10727 ctxt->_private=data;
10728 }
10729
10730 xmlParseDocument(ctxt);
10731
10732 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10733 else {
10734 ret = NULL;
10735 xmlFreeDoc(ctxt->myDoc);
10736 ctxt->myDoc = NULL;
10737 }
10738 if (sax != NULL)
10739 ctxt->sax = NULL;
10740 xmlFreeParserCtxt(ctxt);
10741
10742 return(ret);
10743}
10744
10745/**
Owen Taylor3473f882001-02-23 17:55:21 +000010746 * xmlSAXParseMemory:
10747 * @sax: the SAX handler block
10748 * @buffer: an pointer to a char array
10749 * @size: the size of the array
10750 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10751 * documents
10752 *
10753 * parse an XML in-memory block and use the given SAX function block
10754 * to handle the parsing callback. If sax is NULL, fallback to the default
10755 * DOM tree building routines.
10756 *
10757 * Returns the resulting document tree
10758 */
10759xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010760xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10761 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010762 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010763}
10764
10765/**
10766 * xmlParseMemory:
10767 * @buffer: an pointer to a char array
10768 * @size: the size of the array
10769 *
10770 * parse an XML in-memory block and build a tree.
10771 *
10772 * Returns the resulting document tree
10773 */
10774
Daniel Veillard50822cb2001-07-26 20:05:51 +000010775xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010776 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10777}
10778
10779/**
10780 * xmlRecoverMemory:
10781 * @buffer: an pointer to a char array
10782 * @size: the size of the array
10783 *
10784 * parse an XML in-memory block and build a tree.
10785 * In the case the document is not Well Formed, a tree is built anyway
10786 *
10787 * Returns the resulting document tree
10788 */
10789
Daniel Veillard50822cb2001-07-26 20:05:51 +000010790xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010791 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10792}
10793
10794/**
10795 * xmlSAXUserParseMemory:
10796 * @sax: a SAX handler
10797 * @user_data: The user data returned on SAX callbacks
10798 * @buffer: an in-memory XML document input
10799 * @size: the length of the XML document in bytes
10800 *
10801 * A better SAX parsing routine.
10802 * parse an XML in-memory buffer and call the given SAX handler routines.
10803 *
10804 * Returns 0 in case of success or a error number otherwise
10805 */
10806int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010807 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010808 int ret = 0;
10809 xmlParserCtxtPtr ctxt;
10810 xmlSAXHandlerPtr oldsax = NULL;
10811
Daniel Veillard9e923512002-08-14 08:48:52 +000010812 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010813 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10814 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010815 oldsax = ctxt->sax;
10816 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010817 if (user_data != NULL)
10818 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010819
10820 xmlParseDocument(ctxt);
10821
10822 if (ctxt->wellFormed)
10823 ret = 0;
10824 else {
10825 if (ctxt->errNo != 0)
10826 ret = ctxt->errNo;
10827 else
10828 ret = -1;
10829 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010830 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010831 xmlFreeParserCtxt(ctxt);
10832
10833 return ret;
10834}
10835
10836/**
10837 * xmlCreateDocParserCtxt:
10838 * @cur: a pointer to an array of xmlChar
10839 *
10840 * Creates a parser context for an XML in-memory document.
10841 *
10842 * Returns the new parser context or NULL
10843 */
10844xmlParserCtxtPtr
10845xmlCreateDocParserCtxt(xmlChar *cur) {
10846 int len;
10847
10848 if (cur == NULL)
10849 return(NULL);
10850 len = xmlStrlen(cur);
10851 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10852}
10853
10854/**
10855 * xmlSAXParseDoc:
10856 * @sax: the SAX handler block
10857 * @cur: a pointer to an array of xmlChar
10858 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10859 * documents
10860 *
10861 * parse an XML in-memory document and build a tree.
10862 * It use the given SAX function block to handle the parsing callback.
10863 * If sax is NULL, fallback to the default DOM tree building routines.
10864 *
10865 * Returns the resulting document tree
10866 */
10867
10868xmlDocPtr
10869xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10870 xmlDocPtr ret;
10871 xmlParserCtxtPtr ctxt;
10872
10873 if (cur == NULL) return(NULL);
10874
10875
10876 ctxt = xmlCreateDocParserCtxt(cur);
10877 if (ctxt == NULL) return(NULL);
10878 if (sax != NULL) {
10879 ctxt->sax = sax;
10880 ctxt->userData = NULL;
10881 }
10882
10883 xmlParseDocument(ctxt);
10884 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10885 else {
10886 ret = NULL;
10887 xmlFreeDoc(ctxt->myDoc);
10888 ctxt->myDoc = NULL;
10889 }
10890 if (sax != NULL)
10891 ctxt->sax = NULL;
10892 xmlFreeParserCtxt(ctxt);
10893
10894 return(ret);
10895}
10896
10897/**
10898 * xmlParseDoc:
10899 * @cur: a pointer to an array of xmlChar
10900 *
10901 * parse an XML in-memory document and build a tree.
10902 *
10903 * Returns the resulting document tree
10904 */
10905
10906xmlDocPtr
10907xmlParseDoc(xmlChar *cur) {
10908 return(xmlSAXParseDoc(NULL, cur, 0));
10909}
10910
Daniel Veillard8107a222002-01-13 14:10:10 +000010911/************************************************************************
10912 * *
10913 * Specific function to keep track of entities references *
10914 * and used by the XSLT debugger *
10915 * *
10916 ************************************************************************/
10917
10918static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10919
10920/**
10921 * xmlAddEntityReference:
10922 * @ent : A valid entity
10923 * @firstNode : A valid first node for children of entity
10924 * @lastNode : A valid last node of children entity
10925 *
10926 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10927 */
10928static void
10929xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10930 xmlNodePtr lastNode)
10931{
10932 if (xmlEntityRefFunc != NULL) {
10933 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10934 }
10935}
10936
10937
10938/**
10939 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000010940 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000010941 *
10942 * Set the function to call call back when a xml reference has been made
10943 */
10944void
10945xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10946{
10947 xmlEntityRefFunc = func;
10948}
Owen Taylor3473f882001-02-23 17:55:21 +000010949
10950/************************************************************************
10951 * *
10952 * Miscellaneous *
10953 * *
10954 ************************************************************************/
10955
10956#ifdef LIBXML_XPATH_ENABLED
10957#include <libxml/xpath.h>
10958#endif
10959
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010960extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010961static int xmlParserInitialized = 0;
10962
10963/**
10964 * xmlInitParser:
10965 *
10966 * Initialization function for the XML parser.
10967 * This is not reentrant. Call once before processing in case of
10968 * use in multithreaded programs.
10969 */
10970
10971void
10972xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010973 if (xmlParserInitialized != 0)
10974 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010975
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010976 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10977 (xmlGenericError == NULL))
10978 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010979 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010980 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010981 xmlInitCharEncodingHandlers();
10982 xmlInitializePredefinedEntities();
10983 xmlDefaultSAXHandlerInit();
10984 xmlRegisterDefaultInputCallbacks();
10985 xmlRegisterDefaultOutputCallbacks();
10986#ifdef LIBXML_HTML_ENABLED
10987 htmlInitAutoClose();
10988 htmlDefaultSAXHandlerInit();
10989#endif
10990#ifdef LIBXML_XPATH_ENABLED
10991 xmlXPathInit();
10992#endif
10993 xmlParserInitialized = 1;
10994}
10995
10996/**
10997 * xmlCleanupParser:
10998 *
10999 * Cleanup function for the XML parser. It tries to reclaim all
11000 * parsing related global memory allocated for the parser processing.
11001 * It doesn't deallocate any document related memory. Calling this
11002 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011003 * One should call xmlCleanupParser() only when the process has
11004 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011005 */
11006
11007void
11008xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000011009 xmlCleanupCharEncodingHandlers();
11010 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011011#ifdef LIBXML_CATALOG_ENABLED
11012 xmlCatalogCleanup();
11013#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011014 xmlCleanupThreads();
11015 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011016}