blob: ede62d80f94eb37d86352ebdd371213cbcb85682 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard21a0f912001-02-25 19:54:14 +000088#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000089#define XML_PARSER_BUFFER_SIZE 100
90
Daniel Veillard5997aca2002-03-18 18:36:20 +000091#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
92
Owen Taylor3473f882001-02-23 17:55:21 +000093/*
Owen Taylor3473f882001-02-23 17:55:21 +000094 * List of XML prefixed PI allowed by W3C specs
95 */
96
Daniel Veillardb44025c2001-10-11 22:55:55 +000097static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000098 "xml-stylesheet",
99 NULL
100};
101
102/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000103xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
104 const xmlChar **str);
105
Daniel Veillard257d9102001-05-08 10:41:44 +0000106static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000107xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
108 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000109 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000111
Daniel Veillard8107a222002-01-13 14:10:10 +0000112static void
113xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
114 xmlNodePtr lastNode);
115
Daniel Veillard328f48c2002-11-15 15:24:34 +0000116static int
117xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
118 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000119/************************************************************************
120 * *
121 * Parser stacks related functions and macros *
122 * *
123 ************************************************************************/
124
125xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
126 const xmlChar ** str);
127
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128/**
129 * inputPush:
130 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000131 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000132 *
133 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000134 *
135 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000137extern int
138inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
139{
140 if (ctxt->inputNr >= ctxt->inputMax) {
141 ctxt->inputMax *= 2;
142 ctxt->inputTab =
143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
144 ctxt->inputMax *
145 sizeof(ctxt->inputTab[0]));
146 if (ctxt->inputTab == NULL) {
147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
148 return (0);
149 }
150 }
151 ctxt->inputTab[ctxt->inputNr] = value;
152 ctxt->input = value;
153 return (ctxt->inputNr++);
154}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000155/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000156 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000157 * @ctxt: an XML parser context
158 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000159 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000160 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000161 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000163extern xmlParserInputPtr
164inputPop(xmlParserCtxtPtr ctxt)
165{
166 xmlParserInputPtr ret;
167
168 if (ctxt->inputNr <= 0)
169 return (0);
170 ctxt->inputNr--;
171 if (ctxt->inputNr > 0)
172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
173 else
174 ctxt->input = NULL;
175 ret = ctxt->inputTab[ctxt->inputNr];
176 ctxt->inputTab[ctxt->inputNr] = 0;
177 return (ret);
178}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179/**
180 * nodePush:
181 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000182 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 *
184 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000185 *
186 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000187 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000188extern int
189nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
190{
191 if (ctxt->nodeNr >= ctxt->nodeMax) {
192 ctxt->nodeMax *= 2;
193 ctxt->nodeTab =
194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
195 ctxt->nodeMax *
196 sizeof(ctxt->nodeTab[0]));
197 if (ctxt->nodeTab == NULL) {
198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
199 return (0);
200 }
201 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202#ifdef MAX_DEPTH
203 if (ctxt->nodeNr > MAX_DEPTH) {
204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
205 ctxt->sax->error(ctxt->userData,
206 "Excessive depth in document: change MAX_DEPTH = %d\n",
207 MAX_DEPTH);
208 ctxt->wellFormed = 0;
209 ctxt->instate = XML_PARSER_EOF;
210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
211 return(0);
212 }
213#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000214 ctxt->nodeTab[ctxt->nodeNr] = value;
215 ctxt->node = value;
216 return (ctxt->nodeNr++);
217}
218/**
219 * nodePop:
220 * @ctxt: an XML parser context
221 *
222 * Pops the top element node from the node stack
223 *
224 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000226extern xmlNodePtr
227nodePop(xmlParserCtxtPtr ctxt)
228{
229 xmlNodePtr ret;
230
231 if (ctxt->nodeNr <= 0)
232 return (0);
233 ctxt->nodeNr--;
234 if (ctxt->nodeNr > 0)
235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
236 else
237 ctxt->node = NULL;
238 ret = ctxt->nodeTab[ctxt->nodeNr];
239 ctxt->nodeTab[ctxt->nodeNr] = 0;
240 return (ret);
241}
242/**
243 * namePush:
244 * @ctxt: an XML parser context
245 * @value: the element name
246 *
247 * Pushes a new element name on top of the name stack
248 *
249 * Returns 0 in case of error, the index in the stack otherwise
250 */
251extern int
252namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
253{
254 if (ctxt->nameNr >= ctxt->nameMax) {
255 ctxt->nameMax *= 2;
256 ctxt->nameTab =
257 (xmlChar * *)xmlRealloc(ctxt->nameTab,
258 ctxt->nameMax *
259 sizeof(ctxt->nameTab[0]));
260 if (ctxt->nameTab == NULL) {
261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
262 return (0);
263 }
264 }
265 ctxt->nameTab[ctxt->nameNr] = value;
266 ctxt->name = value;
267 return (ctxt->nameNr++);
268}
269/**
270 * namePop:
271 * @ctxt: an XML parser context
272 *
273 * Pops the top element name from the name stack
274 *
275 * Returns the name just removed
276 */
277extern xmlChar *
278namePop(xmlParserCtxtPtr ctxt)
279{
280 xmlChar *ret;
281
282 if (ctxt->nameNr <= 0)
283 return (0);
284 ctxt->nameNr--;
285 if (ctxt->nameNr > 0)
286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
287 else
288 ctxt->name = NULL;
289 ret = ctxt->nameTab[ctxt->nameNr];
290 ctxt->nameTab[ctxt->nameNr] = 0;
291 return (ret);
292}
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000294static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000295 if (ctxt->spaceNr >= ctxt->spaceMax) {
296 ctxt->spaceMax *= 2;
297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
299 if (ctxt->spaceTab == NULL) {
300 xmlGenericError(xmlGenericErrorContext,
301 "realloc failed !\n");
302 return(0);
303 }
304 }
305 ctxt->spaceTab[ctxt->spaceNr] = val;
306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
307 return(ctxt->spaceNr++);
308}
309
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000311 int ret;
312 if (ctxt->spaceNr <= 0) return(0);
313 ctxt->spaceNr--;
314 if (ctxt->spaceNr > 0)
315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
316 else
317 ctxt->space = NULL;
318 ret = ctxt->spaceTab[ctxt->spaceNr];
319 ctxt->spaceTab[ctxt->spaceNr] = -1;
320 return(ret);
321}
322
323/*
324 * Macros for accessing the content. Those should be used only by the parser,
325 * and not exported.
326 *
327 * Dirty macros, i.e. one often need to make assumption on the context to
328 * use them
329 *
330 * CUR_PTR return the current pointer to the xmlChar to be parsed.
331 * To be used with extreme caution since operations consuming
332 * characters may move the input buffer to a different location !
333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
334 * This should be used internally by the parser
335 * only to compare to ASCII values otherwise it would break when
336 * running with UTF-8 encoding.
337 * RAW same as CUR but in the input buffer, bypass any token
338 * extraction that may have been done
339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
340 * to compare on ASCII based substring.
341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +0000342 * strings without newlines within the parser.
343 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
344 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +0000345 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
346 *
347 * NEXT Skip to the next character, this does the proper decoding
348 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +0000349 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +0000350 * CUR_CHAR(l) returns the current unicode character (int), set l
351 * to the number of xmlChars used for the encoding [0-5].
352 * CUR_SCHAR same but operate on a string instead of the context
353 * COPY_BUF copy the current unicode char to the target buffer, increment
354 * the index
355 * GROW, SHRINK handling of input buffers
356 */
357
Daniel Veillardfdc91562002-07-01 21:52:03 +0000358#define RAW (*ctxt->input->cur)
359#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000360#define NXT(val) ctxt->input->cur[(val)]
361#define CUR_PTR ctxt->input->cur
362
363#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000364 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +0000365 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000366 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000367 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
368 xmlPopInput(ctxt); \
369 } while (0)
370
Daniel Veillarda880b122003-04-21 21:36:41 +0000371#define SHRINK if ((ctxt->progressive == 0) && \
372 (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK))\
Daniel Veillard46de64e2002-05-29 08:21:33 +0000373 xmlSHRINK (ctxt);
374
375static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
376 xmlParserInputShrink(ctxt->input);
377 if ((*ctxt->input->cur == 0) &&
378 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
379 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000380 }
Owen Taylor3473f882001-02-23 17:55:21 +0000381
Daniel Veillarda880b122003-04-21 21:36:41 +0000382#define GROW if ((ctxt->progressive == 0) && \
383 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +0000384 xmlGROW (ctxt);
385
386static void xmlGROW (xmlParserCtxtPtr ctxt) {
387 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
388 if ((*ctxt->input->cur == 0) &&
389 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
390 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +0000391}
Owen Taylor3473f882001-02-23 17:55:21 +0000392
393#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
394
395#define NEXT xmlNextChar(ctxt)
396
Daniel Veillard21a0f912001-02-25 19:54:14 +0000397#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000398 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000399 ctxt->input->cur++; \
400 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000401 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000402 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
403 }
404
Owen Taylor3473f882001-02-23 17:55:21 +0000405#define NEXTL(l) do { \
406 if (*(ctxt->input->cur) == '\n') { \
407 ctxt->input->line++; ctxt->input->col = 1; \
408 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000409 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000410 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000411 } while (0)
412
413#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
414#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
415
416#define COPY_BUF(l,b,i,v) \
417 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000418 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000419
420/**
421 * xmlSkipBlankChars:
422 * @ctxt: the XML parser context
423 *
424 * skip all blanks character found at that point in the input streams.
425 * It pops up finished entities in the process if allowable at that point.
426 *
427 * Returns the number of space chars skipped
428 */
429
430int
431xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000432 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000433
434 /*
435 * It's Okay to use CUR/NEXT here since all the blanks are on
436 * the ASCII range.
437 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000438 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
439 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000440 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000441 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000442 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000443 cur = ctxt->input->cur;
444 while (IS_BLANK(*cur)) {
445 if (*cur == '\n') {
446 ctxt->input->line++; ctxt->input->col = 1;
447 }
448 cur++;
449 res++;
450 if (*cur == 0) {
451 ctxt->input->cur = cur;
452 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
453 cur = ctxt->input->cur;
454 }
455 }
456 ctxt->input->cur = cur;
457 } else {
458 int cur;
459 do {
460 cur = CUR;
461 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
462 NEXT;
463 cur = CUR;
464 res++;
465 }
466 while ((cur == 0) && (ctxt->inputNr > 1) &&
467 (ctxt->instate != XML_PARSER_COMMENT)) {
468 xmlPopInput(ctxt);
469 cur = CUR;
470 }
471 /*
472 * Need to handle support of entities branching here
473 */
474 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
475 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
476 }
Owen Taylor3473f882001-02-23 17:55:21 +0000477 return(res);
478}
479
480/************************************************************************
481 * *
482 * Commodity functions to handle entities *
483 * *
484 ************************************************************************/
485
486/**
487 * xmlPopInput:
488 * @ctxt: an XML parser context
489 *
490 * xmlPopInput: the current input pointed by ctxt->input came to an end
491 * pop it and return the next char.
492 *
493 * Returns the current xmlChar in the parser context
494 */
495xmlChar
496xmlPopInput(xmlParserCtxtPtr ctxt) {
497 if (ctxt->inputNr == 1) return(0); /* End of main Input */
498 if (xmlParserDebugEntities)
499 xmlGenericError(xmlGenericErrorContext,
500 "Popping input %d\n", ctxt->inputNr);
501 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000502 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000503 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
504 return(xmlPopInput(ctxt));
505 return(CUR);
506}
507
508/**
509 * xmlPushInput:
510 * @ctxt: an XML parser context
511 * @input: an XML parser input fragment (entity, XML fragment ...).
512 *
513 * xmlPushInput: switch to a new input stream which is stacked on top
514 * of the previous one(s).
515 */
516void
517xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
518 if (input == NULL) return;
519
520 if (xmlParserDebugEntities) {
521 if ((ctxt->input != NULL) && (ctxt->input->filename))
522 xmlGenericError(xmlGenericErrorContext,
523 "%s(%d): ", ctxt->input->filename,
524 ctxt->input->line);
525 xmlGenericError(xmlGenericErrorContext,
526 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
527 }
528 inputPush(ctxt, input);
529 GROW;
530}
531
532/**
533 * xmlParseCharRef:
534 * @ctxt: an XML parser context
535 *
536 * parse Reference declarations
537 *
538 * [66] CharRef ::= '&#' [0-9]+ ';' |
539 * '&#x' [0-9a-fA-F]+ ';'
540 *
541 * [ WFC: Legal Character ]
542 * Characters referred to using character references must match the
543 * production for Char.
544 *
545 * Returns the value parsed (as an int), 0 in case of error
546 */
547int
548xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000549 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000550 int count = 0;
551
Owen Taylor3473f882001-02-23 17:55:21 +0000552 /*
553 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
554 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000555 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000556 (NXT(2) == 'x')) {
557 SKIP(3);
558 GROW;
559 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000560 if (count++ > 20) {
561 count = 0;
562 GROW;
563 }
564 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000565 val = val * 16 + (CUR - '0');
566 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
567 val = val * 16 + (CUR - 'a') + 10;
568 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
569 val = val * 16 + (CUR - 'A') + 10;
570 else {
571 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
573 ctxt->sax->error(ctxt->userData,
574 "xmlParseCharRef: invalid hexadecimal value\n");
575 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000576 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000577 val = 0;
578 break;
579 }
580 NEXT;
581 count++;
582 }
583 if (RAW == ';') {
584 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000585 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000586 ctxt->nbChars ++;
587 ctxt->input->cur++;
588 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000589 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000590 SKIP(2);
591 GROW;
592 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000593 if (count++ > 20) {
594 count = 0;
595 GROW;
596 }
597 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000598 val = val * 10 + (CUR - '0');
599 else {
600 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
602 ctxt->sax->error(ctxt->userData,
603 "xmlParseCharRef: invalid decimal value\n");
604 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000605 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000606 val = 0;
607 break;
608 }
609 NEXT;
610 count++;
611 }
612 if (RAW == ';') {
613 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000614 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000615 ctxt->nbChars ++;
616 ctxt->input->cur++;
617 }
618 } else {
619 ctxt->errNo = XML_ERR_INVALID_CHARREF;
620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
621 ctxt->sax->error(ctxt->userData,
622 "xmlParseCharRef: invalid value\n");
623 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000624 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000625 }
626
627 /*
628 * [ WFC: Legal Character ]
629 * Characters referred to using character references must match the
630 * production for Char.
631 */
632 if (IS_CHAR(val)) {
633 return(val);
634 } else {
635 ctxt->errNo = XML_ERR_INVALID_CHAR;
636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000637 ctxt->sax->error(ctxt->userData,
638 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000639 val);
640 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000641 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000642 }
643 return(0);
644}
645
646/**
647 * xmlParseStringCharRef:
648 * @ctxt: an XML parser context
649 * @str: a pointer to an index in the string
650 *
651 * parse Reference declarations, variant parsing from a string rather
652 * than an an input flow.
653 *
654 * [66] CharRef ::= '&#' [0-9]+ ';' |
655 * '&#x' [0-9a-fA-F]+ ';'
656 *
657 * [ WFC: Legal Character ]
658 * Characters referred to using character references must match the
659 * production for Char.
660 *
661 * Returns the value parsed (as an int), 0 in case of error, str will be
662 * updated to the current value of the index
663 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000664static int
Owen Taylor3473f882001-02-23 17:55:21 +0000665xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
666 const xmlChar *ptr;
667 xmlChar cur;
668 int val = 0;
669
670 if ((str == NULL) || (*str == NULL)) return(0);
671 ptr = *str;
672 cur = *ptr;
673 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
674 ptr += 3;
675 cur = *ptr;
676 while (cur != ';') { /* Non input consuming loop */
677 if ((cur >= '0') && (cur <= '9'))
678 val = val * 16 + (cur - '0');
679 else if ((cur >= 'a') && (cur <= 'f'))
680 val = val * 16 + (cur - 'a') + 10;
681 else if ((cur >= 'A') && (cur <= 'F'))
682 val = val * 16 + (cur - 'A') + 10;
683 else {
684 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
686 ctxt->sax->error(ctxt->userData,
687 "xmlParseStringCharRef: invalid hexadecimal value\n");
688 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000689 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000690 val = 0;
691 break;
692 }
693 ptr++;
694 cur = *ptr;
695 }
696 if (cur == ';')
697 ptr++;
698 } else if ((cur == '&') && (ptr[1] == '#')){
699 ptr += 2;
700 cur = *ptr;
701 while (cur != ';') { /* Non input consuming loops */
702 if ((cur >= '0') && (cur <= '9'))
703 val = val * 10 + (cur - '0');
704 else {
705 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
707 ctxt->sax->error(ctxt->userData,
708 "xmlParseStringCharRef: invalid decimal value\n");
709 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000710 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000711 val = 0;
712 break;
713 }
714 ptr++;
715 cur = *ptr;
716 }
717 if (cur == ';')
718 ptr++;
719 } else {
720 ctxt->errNo = XML_ERR_INVALID_CHARREF;
721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
722 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000723 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000724 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000725 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000726 return(0);
727 }
728 *str = ptr;
729
730 /*
731 * [ WFC: Legal Character ]
732 * Characters referred to using character references must match the
733 * production for Char.
734 */
735 if (IS_CHAR(val)) {
736 return(val);
737 } else {
738 ctxt->errNo = XML_ERR_INVALID_CHAR;
739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
740 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000741 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000742 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000743 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000744 }
745 return(0);
746}
747
748/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000749 * xmlNewBlanksWrapperInputStream:
750 * @ctxt: an XML parser context
751 * @entity: an Entity pointer
752 *
753 * Create a new input stream for wrapping
754 * blanks around a PEReference
755 *
756 * Returns the new input stream or NULL
757 */
758
759static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
760
Daniel Veillardf4862f02002-09-10 11:13:43 +0000761static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000762xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
763 xmlParserInputPtr input;
764 xmlChar *buffer;
765 size_t length;
766 if (entity == NULL) {
767 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
769 ctxt->sax->error(ctxt->userData,
770 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
771 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
772 return(NULL);
773 }
774 if (xmlParserDebugEntities)
775 xmlGenericError(xmlGenericErrorContext,
776 "new blanks wrapper for entity: %s\n", entity->name);
777 input = xmlNewInputStream(ctxt);
778 if (input == NULL) {
779 return(NULL);
780 }
781 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000782 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +0000783 if (buffer == NULL) {
784 return(NULL);
785 }
786 buffer [0] = ' ';
787 buffer [1] = '%';
788 buffer [length-3] = ';';
789 buffer [length-2] = ' ';
790 buffer [length-1] = 0;
791 memcpy(buffer + 2, entity->name, length - 5);
792 input->free = deallocblankswrapper;
793 input->base = buffer;
794 input->cur = buffer;
795 input->length = length;
796 input->end = &buffer[length];
797 return(input);
798}
799
800/**
Owen Taylor3473f882001-02-23 17:55:21 +0000801 * xmlParserHandlePEReference:
802 * @ctxt: the parser context
803 *
804 * [69] PEReference ::= '%' Name ';'
805 *
806 * [ WFC: No Recursion ]
807 * A parsed entity must not contain a recursive
808 * reference to itself, either directly or indirectly.
809 *
810 * [ WFC: Entity Declared ]
811 * In a document without any DTD, a document with only an internal DTD
812 * subset which contains no parameter entity references, or a document
813 * with "standalone='yes'", ... ... The declaration of a parameter
814 * entity must precede any reference to it...
815 *
816 * [ VC: Entity Declared ]
817 * In a document with an external subset or external parameter entities
818 * with "standalone='no'", ... ... The declaration of a parameter entity
819 * must precede any reference to it...
820 *
821 * [ WFC: In DTD ]
822 * Parameter-entity references may only appear in the DTD.
823 * NOTE: misleading but this is handled.
824 *
825 * A PEReference may have been detected in the current input stream
826 * the handling is done accordingly to
827 * http://www.w3.org/TR/REC-xml#entproc
828 * i.e.
829 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000830 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000831 */
832void
833xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
834 xmlChar *name;
835 xmlEntityPtr entity = NULL;
836 xmlParserInputPtr input;
837
Owen Taylor3473f882001-02-23 17:55:21 +0000838 if (RAW != '%') return;
839 switch(ctxt->instate) {
840 case XML_PARSER_CDATA_SECTION:
841 return;
842 case XML_PARSER_COMMENT:
843 return;
844 case XML_PARSER_START_TAG:
845 return;
846 case XML_PARSER_END_TAG:
847 return;
848 case XML_PARSER_EOF:
849 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
851 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
852 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000853 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000854 return;
855 case XML_PARSER_PROLOG:
856 case XML_PARSER_START:
857 case XML_PARSER_MISC:
858 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
860 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
861 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000862 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000863 return;
864 case XML_PARSER_ENTITY_DECL:
865 case XML_PARSER_CONTENT:
866 case XML_PARSER_ATTRIBUTE_VALUE:
867 case XML_PARSER_PI:
868 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000869 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000870 /* we just ignore it there */
871 return;
872 case XML_PARSER_EPILOG:
873 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
875 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
876 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000877 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000878 return;
879 case XML_PARSER_ENTITY_VALUE:
880 /*
881 * NOTE: in the case of entity values, we don't do the
882 * substitution here since we need the literal
883 * entity value to be able to save the internal
884 * subset of the document.
885 * This will be handled by xmlStringDecodeEntities
886 */
887 return;
888 case XML_PARSER_DTD:
889 /*
890 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
891 * In the internal DTD subset, parameter-entity references
892 * can occur only where markup declarations can occur, not
893 * within markup declarations.
894 * In that case this is handled in xmlParseMarkupDecl
895 */
896 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
897 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000898 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
899 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000900 break;
901 case XML_PARSER_IGNORE:
902 return;
903 }
904
905 NEXT;
906 name = xmlParseName(ctxt);
907 if (xmlParserDebugEntities)
908 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000909 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000910 if (name == NULL) {
911 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000913 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000916 } else {
917 if (RAW == ';') {
918 NEXT;
919 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
920 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
921 if (entity == NULL) {
922
923 /*
924 * [ WFC: Entity Declared ]
925 * In a document without any DTD, a document with only an
926 * internal DTD subset which contains no parameter entity
927 * references, or a document with "standalone='yes'", ...
928 * ... The declaration of a parameter entity must precede
929 * any reference to it...
930 */
931 if ((ctxt->standalone == 1) ||
932 ((ctxt->hasExternalSubset == 0) &&
933 (ctxt->hasPErefs == 0))) {
934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
935 ctxt->sax->error(ctxt->userData,
936 "PEReference: %%%s; not found\n", name);
937 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000938 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000939 } else {
940 /*
941 * [ VC: Entity Declared ]
942 * In a document with an external subset or external
943 * parameter entities with "standalone='no'", ...
944 * ... The declaration of a parameter entity must precede
945 * any reference to it...
946 */
947 if ((!ctxt->disableSAX) &&
948 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
949 ctxt->vctxt.error(ctxt->vctxt.userData,
950 "PEReference: %%%s; not found\n", name);
951 } else if ((!ctxt->disableSAX) &&
952 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
953 ctxt->sax->warning(ctxt->userData,
954 "PEReference: %%%s; not found\n", name);
955 ctxt->valid = 0;
956 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000957 } else if (ctxt->input->free != deallocblankswrapper) {
958 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
959 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000960 } else {
961 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
962 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000963 xmlChar start[4];
964 xmlCharEncoding enc;
965
Owen Taylor3473f882001-02-23 17:55:21 +0000966 /*
967 * handle the extra spaces added before and after
968 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000969 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000970 */
971 input = xmlNewEntityInputStream(ctxt, entity);
972 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000973
974 /*
975 * Get the 4 first bytes and decode the charset
976 * if enc != XML_CHAR_ENCODING_NONE
977 * plug some encoding conversion routines.
978 */
979 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000980 if (entity->length >= 4) {
981 start[0] = RAW;
982 start[1] = NXT(1);
983 start[2] = NXT(2);
984 start[3] = NXT(3);
985 enc = xmlDetectCharEncoding(start, 4);
986 if (enc != XML_CHAR_ENCODING_NONE) {
987 xmlSwitchEncoding(ctxt, enc);
988 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000989 }
990
Owen Taylor3473f882001-02-23 17:55:21 +0000991 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
992 (RAW == '<') && (NXT(1) == '?') &&
993 (NXT(2) == 'x') && (NXT(3) == 'm') &&
994 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
995 xmlParseTextDecl(ctxt);
996 }
Owen Taylor3473f882001-02-23 17:55:21 +0000997 } else {
998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
999 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001000 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001001 name);
1002 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001003 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001004 }
1005 }
1006 } else {
1007 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1009 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001010 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001011 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001012 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001013 }
1014 xmlFree(name);
1015 }
1016}
1017
1018/*
1019 * Macro used to grow the current buffer.
1020 */
1021#define growBuffer(buffer) { \
1022 buffer##_size *= 2; \
1023 buffer = (xmlChar *) \
1024 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1025 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001026 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001027 return(NULL); \
1028 } \
1029}
1030
1031/**
1032 * xmlStringDecodeEntities:
1033 * @ctxt: the parser context
1034 * @str: the input string
1035 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1036 * @end: an end marker xmlChar, 0 if none
1037 * @end2: an end marker xmlChar, 0 if none
1038 * @end3: an end marker xmlChar, 0 if none
1039 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001040 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001041 *
1042 * [67] Reference ::= EntityRef | CharRef
1043 *
1044 * [69] PEReference ::= '%' Name ';'
1045 *
1046 * Returns A newly allocated string with the substitution done. The caller
1047 * must deallocate it !
1048 */
1049xmlChar *
1050xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1051 xmlChar end, xmlChar end2, xmlChar end3) {
1052 xmlChar *buffer = NULL;
1053 int buffer_size = 0;
1054
1055 xmlChar *current = NULL;
1056 xmlEntityPtr ent;
1057 int c,l;
1058 int nbchars = 0;
1059
1060 if (str == NULL)
1061 return(NULL);
1062
1063 if (ctxt->depth > 40) {
1064 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1066 ctxt->sax->error(ctxt->userData,
1067 "Detected entity reference loop\n");
1068 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001069 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001070 return(NULL);
1071 }
1072
1073 /*
1074 * allocate a translation buffer.
1075 */
1076 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001077 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001078 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001079 xmlGenericError(xmlGenericErrorContext,
1080 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001081 return(NULL);
1082 }
1083
1084 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001085 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001086 * we are operating on already parsed values.
1087 */
1088 c = CUR_SCHAR(str, l);
1089 while ((c != 0) && (c != end) && /* non input consuming loop */
1090 (c != end2) && (c != end3)) {
1091
1092 if (c == 0) break;
1093 if ((c == '&') && (str[1] == '#')) {
1094 int val = xmlParseStringCharRef(ctxt, &str);
1095 if (val != 0) {
1096 COPY_BUF(0,buffer,nbchars,val);
1097 }
1098 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1099 if (xmlParserDebugEntities)
1100 xmlGenericError(xmlGenericErrorContext,
1101 "String decoding Entity Reference: %.30s\n",
1102 str);
1103 ent = xmlParseStringEntityRef(ctxt, &str);
1104 if ((ent != NULL) &&
1105 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1106 if (ent->content != NULL) {
1107 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1108 } else {
1109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1110 ctxt->sax->error(ctxt->userData,
1111 "internal error entity has no content\n");
1112 }
1113 } else if ((ent != NULL) && (ent->content != NULL)) {
1114 xmlChar *rep;
1115
1116 ctxt->depth++;
1117 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1118 0, 0, 0);
1119 ctxt->depth--;
1120 if (rep != NULL) {
1121 current = rep;
1122 while (*current != 0) { /* non input consuming loop */
1123 buffer[nbchars++] = *current++;
1124 if (nbchars >
1125 buffer_size - XML_PARSER_BUFFER_SIZE) {
1126 growBuffer(buffer);
1127 }
1128 }
1129 xmlFree(rep);
1130 }
1131 } else if (ent != NULL) {
1132 int i = xmlStrlen(ent->name);
1133 const xmlChar *cur = ent->name;
1134
1135 buffer[nbchars++] = '&';
1136 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1137 growBuffer(buffer);
1138 }
1139 for (;i > 0;i--)
1140 buffer[nbchars++] = *cur++;
1141 buffer[nbchars++] = ';';
1142 }
1143 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1144 if (xmlParserDebugEntities)
1145 xmlGenericError(xmlGenericErrorContext,
1146 "String decoding PE Reference: %.30s\n", str);
1147 ent = xmlParseStringPEReference(ctxt, &str);
1148 if (ent != NULL) {
1149 xmlChar *rep;
1150
1151 ctxt->depth++;
1152 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1153 0, 0, 0);
1154 ctxt->depth--;
1155 if (rep != NULL) {
1156 current = rep;
1157 while (*current != 0) { /* non input consuming loop */
1158 buffer[nbchars++] = *current++;
1159 if (nbchars >
1160 buffer_size - XML_PARSER_BUFFER_SIZE) {
1161 growBuffer(buffer);
1162 }
1163 }
1164 xmlFree(rep);
1165 }
1166 }
1167 } else {
1168 COPY_BUF(l,buffer,nbchars,c);
1169 str += l;
1170 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1171 growBuffer(buffer);
1172 }
1173 }
1174 c = CUR_SCHAR(str, l);
1175 }
1176 buffer[nbchars++] = 0;
1177 return(buffer);
1178}
1179
1180
1181/************************************************************************
1182 * *
1183 * Commodity functions to handle xmlChars *
1184 * *
1185 ************************************************************************/
1186
1187/**
1188 * xmlStrndup:
1189 * @cur: the input xmlChar *
1190 * @len: the len of @cur
1191 *
1192 * a strndup for array of xmlChar's
1193 *
1194 * Returns a new xmlChar * or NULL
1195 */
1196xmlChar *
1197xmlStrndup(const xmlChar *cur, int len) {
1198 xmlChar *ret;
1199
1200 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001201 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001202 if (ret == NULL) {
1203 xmlGenericError(xmlGenericErrorContext,
1204 "malloc of %ld byte failed\n",
1205 (len + 1) * (long)sizeof(xmlChar));
1206 return(NULL);
1207 }
1208 memcpy(ret, cur, len * sizeof(xmlChar));
1209 ret[len] = 0;
1210 return(ret);
1211}
1212
1213/**
1214 * xmlStrdup:
1215 * @cur: the input xmlChar *
1216 *
1217 * a strdup for array of xmlChar's. Since they are supposed to be
1218 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1219 * a termination mark of '0'.
1220 *
1221 * Returns a new xmlChar * or NULL
1222 */
1223xmlChar *
1224xmlStrdup(const xmlChar *cur) {
1225 const xmlChar *p = cur;
1226
1227 if (cur == NULL) return(NULL);
1228 while (*p != 0) p++; /* non input consuming */
1229 return(xmlStrndup(cur, p - cur));
1230}
1231
1232/**
1233 * xmlCharStrndup:
1234 * @cur: the input char *
1235 * @len: the len of @cur
1236 *
1237 * a strndup for char's to xmlChar's
1238 *
1239 * Returns a new xmlChar * or NULL
1240 */
1241
1242xmlChar *
1243xmlCharStrndup(const char *cur, int len) {
1244 int i;
1245 xmlChar *ret;
1246
1247 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001248 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001249 if (ret == NULL) {
1250 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1251 (len + 1) * (long)sizeof(xmlChar));
1252 return(NULL);
1253 }
1254 for (i = 0;i < len;i++)
1255 ret[i] = (xmlChar) cur[i];
1256 ret[len] = 0;
1257 return(ret);
1258}
1259
1260/**
1261 * xmlCharStrdup:
1262 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001263 *
1264 * a strdup for char's to xmlChar's
1265 *
1266 * Returns a new xmlChar * or NULL
1267 */
1268
1269xmlChar *
1270xmlCharStrdup(const char *cur) {
1271 const char *p = cur;
1272
1273 if (cur == NULL) return(NULL);
1274 while (*p != '\0') p++; /* non input consuming */
1275 return(xmlCharStrndup(cur, p - cur));
1276}
1277
1278/**
1279 * xmlStrcmp:
1280 * @str1: the first xmlChar *
1281 * @str2: the second xmlChar *
1282 *
1283 * a strcmp for xmlChar's
1284 *
1285 * Returns the integer result of the comparison
1286 */
1287
1288int
1289xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1290 register int tmp;
1291
1292 if (str1 == str2) return(0);
1293 if (str1 == NULL) return(-1);
1294 if (str2 == NULL) return(1);
1295 do {
1296 tmp = *str1++ - *str2;
1297 if (tmp != 0) return(tmp);
1298 } while (*str2++ != 0);
1299 return 0;
1300}
1301
1302/**
1303 * xmlStrEqual:
1304 * @str1: the first xmlChar *
1305 * @str2: the second xmlChar *
1306 *
1307 * Check if both string are equal of have same content
1308 * Should be a bit more readable and faster than xmlStrEqual()
1309 *
1310 * Returns 1 if they are equal, 0 if they are different
1311 */
1312
1313int
1314xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1315 if (str1 == str2) return(1);
1316 if (str1 == NULL) return(0);
1317 if (str2 == NULL) return(0);
1318 do {
1319 if (*str1++ != *str2) return(0);
1320 } while (*str2++);
1321 return(1);
1322}
1323
1324/**
1325 * xmlStrncmp:
1326 * @str1: the first xmlChar *
1327 * @str2: the second xmlChar *
1328 * @len: the max comparison length
1329 *
1330 * a strncmp for xmlChar's
1331 *
1332 * Returns the integer result of the comparison
1333 */
1334
1335int
1336xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1337 register int tmp;
1338
1339 if (len <= 0) return(0);
1340 if (str1 == str2) return(0);
1341 if (str1 == NULL) return(-1);
1342 if (str2 == NULL) return(1);
1343 do {
1344 tmp = *str1++ - *str2;
1345 if (tmp != 0 || --len == 0) return(tmp);
1346 } while (*str2++ != 0);
1347 return 0;
1348}
1349
Daniel Veillardb44025c2001-10-11 22:55:55 +00001350static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001351 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1352 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1353 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1354 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1355 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1356 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1357 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1358 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1359 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1360 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1361 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1362 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1363 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1364 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1365 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1366 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1367 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1368 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1369 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1370 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1371 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1372 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1373 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1374 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1375 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1376 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1377 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1378 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1379 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1380 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1381 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1382 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1383};
1384
1385/**
1386 * xmlStrcasecmp:
1387 * @str1: the first xmlChar *
1388 * @str2: the second xmlChar *
1389 *
1390 * a strcasecmp for xmlChar's
1391 *
1392 * Returns the integer result of the comparison
1393 */
1394
1395int
1396xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1397 register int tmp;
1398
1399 if (str1 == str2) return(0);
1400 if (str1 == NULL) return(-1);
1401 if (str2 == NULL) return(1);
1402 do {
1403 tmp = casemap[*str1++] - casemap[*str2];
1404 if (tmp != 0) return(tmp);
1405 } while (*str2++ != 0);
1406 return 0;
1407}
1408
1409/**
1410 * xmlStrncasecmp:
1411 * @str1: the first xmlChar *
1412 * @str2: the second xmlChar *
1413 * @len: the max comparison length
1414 *
1415 * a strncasecmp for xmlChar's
1416 *
1417 * Returns the integer result of the comparison
1418 */
1419
1420int
1421xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1422 register int tmp;
1423
1424 if (len <= 0) return(0);
1425 if (str1 == str2) return(0);
1426 if (str1 == NULL) return(-1);
1427 if (str2 == NULL) return(1);
1428 do {
1429 tmp = casemap[*str1++] - casemap[*str2];
1430 if (tmp != 0 || --len == 0) return(tmp);
1431 } while (*str2++ != 0);
1432 return 0;
1433}
1434
1435/**
1436 * xmlStrchr:
1437 * @str: the xmlChar * array
1438 * @val: the xmlChar to search
1439 *
1440 * a strchr for xmlChar's
1441 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001442 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001443 */
1444
1445const xmlChar *
1446xmlStrchr(const xmlChar *str, xmlChar val) {
1447 if (str == NULL) return(NULL);
1448 while (*str != 0) { /* non input consuming */
1449 if (*str == val) return((xmlChar *) str);
1450 str++;
1451 }
1452 return(NULL);
1453}
1454
1455/**
1456 * xmlStrstr:
1457 * @str: the xmlChar * array (haystack)
1458 * @val: the xmlChar to search (needle)
1459 *
1460 * a strstr for xmlChar's
1461 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001462 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001463 */
1464
1465const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001466xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001467 int n;
1468
1469 if (str == NULL) return(NULL);
1470 if (val == NULL) return(NULL);
1471 n = xmlStrlen(val);
1472
1473 if (n == 0) return(str);
1474 while (*str != 0) { /* non input consuming */
1475 if (*str == *val) {
1476 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1477 }
1478 str++;
1479 }
1480 return(NULL);
1481}
1482
1483/**
1484 * xmlStrcasestr:
1485 * @str: the xmlChar * array (haystack)
1486 * @val: the xmlChar to search (needle)
1487 *
1488 * a case-ignoring strstr for xmlChar's
1489 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001490 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001491 */
1492
1493const xmlChar *
1494xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1495 int n;
1496
1497 if (str == NULL) return(NULL);
1498 if (val == NULL) return(NULL);
1499 n = xmlStrlen(val);
1500
1501 if (n == 0) return(str);
1502 while (*str != 0) { /* non input consuming */
1503 if (casemap[*str] == casemap[*val])
1504 if (!xmlStrncasecmp(str, val, n)) return(str);
1505 str++;
1506 }
1507 return(NULL);
1508}
1509
1510/**
1511 * xmlStrsub:
1512 * @str: the xmlChar * array (haystack)
1513 * @start: the index of the first char (zero based)
1514 * @len: the length of the substring
1515 *
1516 * Extract a substring of a given string
1517 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001518 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001519 */
1520
1521xmlChar *
1522xmlStrsub(const xmlChar *str, int start, int len) {
1523 int i;
1524
1525 if (str == NULL) return(NULL);
1526 if (start < 0) return(NULL);
1527 if (len < 0) return(NULL);
1528
1529 for (i = 0;i < start;i++) {
1530 if (*str == 0) return(NULL);
1531 str++;
1532 }
1533 if (*str == 0) return(NULL);
1534 return(xmlStrndup(str, len));
1535}
1536
1537/**
1538 * xmlStrlen:
1539 * @str: the xmlChar * array
1540 *
1541 * length of a xmlChar's string
1542 *
1543 * Returns the number of xmlChar contained in the ARRAY.
1544 */
1545
1546int
1547xmlStrlen(const xmlChar *str) {
1548 int len = 0;
1549
1550 if (str == NULL) return(0);
1551 while (*str != 0) { /* non input consuming */
1552 str++;
1553 len++;
1554 }
1555 return(len);
1556}
1557
1558/**
1559 * xmlStrncat:
1560 * @cur: the original xmlChar * array
1561 * @add: the xmlChar * array added
1562 * @len: the length of @add
1563 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001564 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001565 * first bytes of @add.
1566 *
1567 * Returns a new xmlChar *, the original @cur is reallocated if needed
1568 * and should not be freed
1569 */
1570
1571xmlChar *
1572xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1573 int size;
1574 xmlChar *ret;
1575
1576 if ((add == NULL) || (len == 0))
1577 return(cur);
1578 if (cur == NULL)
1579 return(xmlStrndup(add, len));
1580
1581 size = xmlStrlen(cur);
1582 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1583 if (ret == NULL) {
1584 xmlGenericError(xmlGenericErrorContext,
1585 "xmlStrncat: realloc of %ld byte failed\n",
1586 (size + len + 1) * (long)sizeof(xmlChar));
1587 return(cur);
1588 }
1589 memcpy(&ret[size], add, len * sizeof(xmlChar));
1590 ret[size + len] = 0;
1591 return(ret);
1592}
1593
1594/**
1595 * xmlStrcat:
1596 * @cur: the original xmlChar * array
1597 * @add: the xmlChar * array added
1598 *
1599 * a strcat for array of xmlChar's. Since they are supposed to be
1600 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1601 * a termination mark of '0'.
1602 *
1603 * Returns a new xmlChar * containing the concatenated string.
1604 */
1605xmlChar *
1606xmlStrcat(xmlChar *cur, const xmlChar *add) {
1607 const xmlChar *p = add;
1608
1609 if (add == NULL) return(cur);
1610 if (cur == NULL)
1611 return(xmlStrdup(add));
1612
1613 while (*p != 0) p++; /* non input consuming */
1614 return(xmlStrncat(cur, add, p - add));
1615}
1616
1617/************************************************************************
1618 * *
1619 * Commodity functions, cleanup needed ? *
1620 * *
1621 ************************************************************************/
1622
1623/**
1624 * areBlanks:
1625 * @ctxt: an XML parser context
1626 * @str: a xmlChar *
1627 * @len: the size of @str
1628 *
1629 * Is this a sequence of blank chars that one can ignore ?
1630 *
1631 * Returns 1 if ignorable 0 otherwise.
1632 */
1633
1634static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1635 int i, ret;
1636 xmlNodePtr lastChild;
1637
Daniel Veillard05c13a22001-09-09 08:38:09 +00001638 /*
1639 * Don't spend time trying to differentiate them, the same callback is
1640 * used !
1641 */
1642 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001643 return(0);
1644
Owen Taylor3473f882001-02-23 17:55:21 +00001645 /*
1646 * Check for xml:space value.
1647 */
1648 if (*(ctxt->space) == 1)
1649 return(0);
1650
1651 /*
1652 * Check that the string is made of blanks
1653 */
1654 for (i = 0;i < len;i++)
1655 if (!(IS_BLANK(str[i]))) return(0);
1656
1657 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001658 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001659 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001660 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001661 if (ctxt->myDoc != NULL) {
1662 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1663 if (ret == 0) return(1);
1664 if (ret == 1) return(0);
1665 }
1666
1667 /*
1668 * Otherwise, heuristic :-\
1669 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001671 if ((ctxt->node->children == NULL) &&
1672 (RAW == '<') && (NXT(1) == '/')) return(0);
1673
1674 lastChild = xmlGetLastChild(ctxt->node);
1675 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001676 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1677 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001678 } else if (xmlNodeIsText(lastChild))
1679 return(0);
1680 else if ((ctxt->node->children != NULL) &&
1681 (xmlNodeIsText(ctxt->node->children)))
1682 return(0);
1683 return(1);
1684}
1685
Owen Taylor3473f882001-02-23 17:55:21 +00001686/************************************************************************
1687 * *
1688 * Extra stuff for namespace support *
1689 * Relates to http://www.w3.org/TR/WD-xml-names *
1690 * *
1691 ************************************************************************/
1692
1693/**
1694 * xmlSplitQName:
1695 * @ctxt: an XML parser context
1696 * @name: an XML parser context
1697 * @prefix: a xmlChar **
1698 *
1699 * parse an UTF8 encoded XML qualified name string
1700 *
1701 * [NS 5] QName ::= (Prefix ':')? LocalPart
1702 *
1703 * [NS 6] Prefix ::= NCName
1704 *
1705 * [NS 7] LocalPart ::= NCName
1706 *
1707 * Returns the local part, and prefix is updated
1708 * to get the Prefix if any.
1709 */
1710
1711xmlChar *
1712xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1713 xmlChar buf[XML_MAX_NAMELEN + 5];
1714 xmlChar *buffer = NULL;
1715 int len = 0;
1716 int max = XML_MAX_NAMELEN;
1717 xmlChar *ret = NULL;
1718 const xmlChar *cur = name;
1719 int c;
1720
1721 *prefix = NULL;
1722
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00001723 if (cur == NULL) return(NULL);
1724
Owen Taylor3473f882001-02-23 17:55:21 +00001725#ifndef XML_XML_NAMESPACE
1726 /* xml: prefix is not really a namespace */
1727 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1728 (cur[2] == 'l') && (cur[3] == ':'))
1729 return(xmlStrdup(name));
1730#endif
1731
Daniel Veillard597bc482003-07-24 16:08:28 +00001732 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00001733 if (cur[0] == ':')
1734 return(xmlStrdup(name));
1735
1736 c = *cur++;
1737 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1738 buf[len++] = c;
1739 c = *cur++;
1740 }
1741 if (len >= max) {
1742 /*
1743 * Okay someone managed to make a huge name, so he's ready to pay
1744 * for the processing speed.
1745 */
1746 max = len * 2;
1747
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001748 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001749 if (buffer == NULL) {
1750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1751 ctxt->sax->error(ctxt->userData,
1752 "xmlSplitQName: out of memory\n");
1753 return(NULL);
1754 }
1755 memcpy(buffer, buf, len);
1756 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1757 if (len + 10 > max) {
1758 max *= 2;
1759 buffer = (xmlChar *) xmlRealloc(buffer,
1760 max * sizeof(xmlChar));
1761 if (buffer == NULL) {
1762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1763 ctxt->sax->error(ctxt->userData,
1764 "xmlSplitQName: out of memory\n");
1765 return(NULL);
1766 }
1767 }
1768 buffer[len++] = c;
1769 c = *cur++;
1770 }
1771 buffer[len] = 0;
1772 }
1773
Daniel Veillard597bc482003-07-24 16:08:28 +00001774 /* nasty but well=formed
1775 if ((c == ':') && (*cur == 0)) {
1776 return(xmlStrdup(name));
1777 } */
1778
Owen Taylor3473f882001-02-23 17:55:21 +00001779 if (buffer == NULL)
1780 ret = xmlStrndup(buf, len);
1781 else {
1782 ret = buffer;
1783 buffer = NULL;
1784 max = XML_MAX_NAMELEN;
1785 }
1786
1787
1788 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001789 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001790 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00001791 if (c == 0) {
1792 return(xmlStrndup("", 0));
1793 }
Owen Taylor3473f882001-02-23 17:55:21 +00001794 len = 0;
1795
Daniel Veillardbb284f42002-10-16 18:02:47 +00001796 /*
1797 * Check that the first character is proper to start
1798 * a new name
1799 */
1800 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1801 ((c >= 0x41) && (c <= 0x5A)) ||
1802 (c == '_') || (c == ':'))) {
1803 int l;
1804 int first = CUR_SCHAR(cur, l);
1805
1806 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001807 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1808 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001809 ctxt->sax->error(ctxt->userData,
1810 "Name %s is not XML Namespace compliant\n",
1811 name);
1812 }
1813 }
1814 cur++;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1817 buf[len++] = c;
1818 c = *cur++;
1819 }
1820 if (len >= max) {
1821 /*
1822 * Okay someone managed to make a huge name, so he's ready to pay
1823 * for the processing speed.
1824 */
1825 max = len * 2;
1826
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001827 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001828 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001829 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1830 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001831 ctxt->sax->error(ctxt->userData,
1832 "xmlSplitQName: out of memory\n");
1833 return(NULL);
1834 }
1835 memcpy(buffer, buf, len);
1836 while (c != 0) { /* tested bigname2.xml */
1837 if (len + 10 > max) {
1838 max *= 2;
1839 buffer = (xmlChar *) xmlRealloc(buffer,
1840 max * sizeof(xmlChar));
1841 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001842 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1843 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001844 ctxt->sax->error(ctxt->userData,
1845 "xmlSplitQName: out of memory\n");
1846 return(NULL);
1847 }
1848 }
1849 buffer[len++] = c;
1850 c = *cur++;
1851 }
1852 buffer[len] = 0;
1853 }
1854
1855 if (buffer == NULL)
1856 ret = xmlStrndup(buf, len);
1857 else {
1858 ret = buffer;
1859 }
1860 }
1861
1862 return(ret);
1863}
1864
1865/************************************************************************
1866 * *
1867 * The parser itself *
1868 * Relates to http://www.w3.org/TR/REC-xml *
1869 * *
1870 ************************************************************************/
1871
Daniel Veillard76d66f42001-05-16 21:05:17 +00001872static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001873/**
1874 * xmlParseName:
1875 * @ctxt: an XML parser context
1876 *
1877 * parse an XML name.
1878 *
1879 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1880 * CombiningChar | Extender
1881 *
1882 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1883 *
1884 * [6] Names ::= Name (S Name)*
1885 *
1886 * Returns the Name parsed or NULL
1887 */
1888
1889xmlChar *
1890xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001891 const xmlChar *in;
1892 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001893 int count = 0;
1894
1895 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001896
1897 /*
1898 * Accelerator for simple ASCII names
1899 */
1900 in = ctxt->input->cur;
1901 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1902 ((*in >= 0x41) && (*in <= 0x5A)) ||
1903 (*in == '_') || (*in == ':')) {
1904 in++;
1905 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1906 ((*in >= 0x41) && (*in <= 0x5A)) ||
1907 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001908 (*in == '_') || (*in == '-') ||
1909 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001910 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001911 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001912 count = in - ctxt->input->cur;
1913 ret = xmlStrndup(ctxt->input->cur, count);
1914 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001915 ctxt->nbChars += count;
1916 ctxt->input->col += count;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00001917 if (ret == NULL) {
1918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1919 ctxt->sax->error(ctxt->userData,
1920 "XML parser: out of memory\n");
1921 ctxt->errNo = XML_ERR_NO_MEMORY;
1922 ctxt->instate = XML_PARSER_EOF;
1923 ctxt->disableSAX = 1;
1924 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001925 return(ret);
1926 }
1927 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001928 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001929}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001930
Daniel Veillard46de64e2002-05-29 08:21:33 +00001931/**
1932 * xmlParseNameAndCompare:
1933 * @ctxt: an XML parser context
1934 *
1935 * parse an XML name and compares for match
1936 * (specialized for endtag parsing)
1937 *
1938 *
1939 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1940 * and the name for mismatch
1941 */
1942
Daniel Veillardf4862f02002-09-10 11:13:43 +00001943static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001944xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1945 const xmlChar *cmp = other;
1946 const xmlChar *in;
1947 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001948
1949 GROW;
1950
1951 in = ctxt->input->cur;
1952 while (*in != 0 && *in == *cmp) {
1953 ++in;
1954 ++cmp;
1955 }
1956 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1957 /* success */
1958 ctxt->input->cur = in;
1959 return (xmlChar*) 1;
1960 }
1961 /* failure (or end of input buffer), check with full function */
1962 ret = xmlParseName (ctxt);
1963 if (ret != 0 && xmlStrEqual (ret, other)) {
1964 xmlFree (ret);
1965 return (xmlChar*) 1;
1966 }
1967 return ret;
1968}
1969
Daniel Veillard76d66f42001-05-16 21:05:17 +00001970static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001971xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1972 xmlChar buf[XML_MAX_NAMELEN + 5];
1973 int len = 0, l;
1974 int c;
1975 int count = 0;
1976
1977 /*
1978 * Handler for more complex cases
1979 */
1980 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001981 c = CUR_CHAR(l);
1982 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1983 (!IS_LETTER(c) && (c != '_') &&
1984 (c != ':'))) {
1985 return(NULL);
1986 }
1987
1988 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1989 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1990 (c == '.') || (c == '-') ||
1991 (c == '_') || (c == ':') ||
1992 (IS_COMBINING(c)) ||
1993 (IS_EXTENDER(c)))) {
1994 if (count++ > 100) {
1995 count = 0;
1996 GROW;
1997 }
1998 COPY_BUF(l,buf,len,c);
1999 NEXTL(l);
2000 c = CUR_CHAR(l);
2001 if (len >= XML_MAX_NAMELEN) {
2002 /*
2003 * Okay someone managed to make a huge name, so he's ready to pay
2004 * for the processing speed.
2005 */
2006 xmlChar *buffer;
2007 int max = len * 2;
2008
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002009 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002010 if (buffer == NULL) {
2011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2012 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00002013 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002014 return(NULL);
2015 }
2016 memcpy(buffer, buf, len);
2017 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
2018 (c == '.') || (c == '-') ||
2019 (c == '_') || (c == ':') ||
2020 (IS_COMBINING(c)) ||
2021 (IS_EXTENDER(c))) {
2022 if (count++ > 100) {
2023 count = 0;
2024 GROW;
2025 }
2026 if (len + 10 > max) {
2027 max *= 2;
2028 buffer = (xmlChar *) xmlRealloc(buffer,
2029 max * sizeof(xmlChar));
2030 if (buffer == NULL) {
2031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2032 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00002033 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002034 return(NULL);
2035 }
2036 }
2037 COPY_BUF(l,buffer,len,c);
2038 NEXTL(l);
2039 c = CUR_CHAR(l);
2040 }
2041 buffer[len] = 0;
2042 return(buffer);
2043 }
2044 }
2045 return(xmlStrndup(buf, len));
2046}
2047
2048/**
2049 * xmlParseStringName:
2050 * @ctxt: an XML parser context
2051 * @str: a pointer to the string pointer (IN/OUT)
2052 *
2053 * parse an XML name.
2054 *
2055 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2056 * CombiningChar | Extender
2057 *
2058 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2059 *
2060 * [6] Names ::= Name (S Name)*
2061 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002062 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002063 * is updated to the current location in the string.
2064 */
2065
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002066static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002067xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2068 xmlChar buf[XML_MAX_NAMELEN + 5];
2069 const xmlChar *cur = *str;
2070 int len = 0, l;
2071 int c;
2072
2073 c = CUR_SCHAR(cur, l);
2074 if (!IS_LETTER(c) && (c != '_') &&
2075 (c != ':')) {
2076 return(NULL);
2077 }
2078
2079 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2080 (c == '.') || (c == '-') ||
2081 (c == '_') || (c == ':') ||
2082 (IS_COMBINING(c)) ||
2083 (IS_EXTENDER(c))) {
2084 COPY_BUF(l,buf,len,c);
2085 cur += l;
2086 c = CUR_SCHAR(cur, l);
2087 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2088 /*
2089 * Okay someone managed to make a huge name, so he's ready to pay
2090 * for the processing speed.
2091 */
2092 xmlChar *buffer;
2093 int max = len * 2;
2094
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002095 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002096 if (buffer == NULL) {
2097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2098 ctxt->sax->error(ctxt->userData,
2099 "xmlParseStringName: out of memory\n");
2100 return(NULL);
2101 }
2102 memcpy(buffer, buf, len);
2103 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2104 (c == '.') || (c == '-') ||
2105 (c == '_') || (c == ':') ||
2106 (IS_COMBINING(c)) ||
2107 (IS_EXTENDER(c))) {
2108 if (len + 10 > max) {
2109 max *= 2;
2110 buffer = (xmlChar *) xmlRealloc(buffer,
2111 max * sizeof(xmlChar));
2112 if (buffer == NULL) {
2113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2114 ctxt->sax->error(ctxt->userData,
2115 "xmlParseStringName: out of memory\n");
2116 return(NULL);
2117 }
2118 }
2119 COPY_BUF(l,buffer,len,c);
2120 cur += l;
2121 c = CUR_SCHAR(cur, l);
2122 }
2123 buffer[len] = 0;
2124 *str = cur;
2125 return(buffer);
2126 }
2127 }
2128 *str = cur;
2129 return(xmlStrndup(buf, len));
2130}
2131
2132/**
2133 * xmlParseNmtoken:
2134 * @ctxt: an XML parser context
2135 *
2136 * parse an XML Nmtoken.
2137 *
2138 * [7] Nmtoken ::= (NameChar)+
2139 *
2140 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2141 *
2142 * Returns the Nmtoken parsed or NULL
2143 */
2144
2145xmlChar *
2146xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2147 xmlChar buf[XML_MAX_NAMELEN + 5];
2148 int len = 0, l;
2149 int c;
2150 int count = 0;
2151
2152 GROW;
2153 c = CUR_CHAR(l);
2154
2155 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2156 (c == '.') || (c == '-') ||
2157 (c == '_') || (c == ':') ||
2158 (IS_COMBINING(c)) ||
2159 (IS_EXTENDER(c))) {
2160 if (count++ > 100) {
2161 count = 0;
2162 GROW;
2163 }
2164 COPY_BUF(l,buf,len,c);
2165 NEXTL(l);
2166 c = CUR_CHAR(l);
2167 if (len >= XML_MAX_NAMELEN) {
2168 /*
2169 * Okay someone managed to make a huge token, so he's ready to pay
2170 * for the processing speed.
2171 */
2172 xmlChar *buffer;
2173 int max = len * 2;
2174
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002175 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002176 if (buffer == NULL) {
2177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2178 ctxt->sax->error(ctxt->userData,
2179 "xmlParseNmtoken: out of memory\n");
2180 return(NULL);
2181 }
2182 memcpy(buffer, buf, len);
2183 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2184 (c == '.') || (c == '-') ||
2185 (c == '_') || (c == ':') ||
2186 (IS_COMBINING(c)) ||
2187 (IS_EXTENDER(c))) {
2188 if (count++ > 100) {
2189 count = 0;
2190 GROW;
2191 }
2192 if (len + 10 > max) {
2193 max *= 2;
2194 buffer = (xmlChar *) xmlRealloc(buffer,
2195 max * sizeof(xmlChar));
2196 if (buffer == NULL) {
2197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2198 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002199 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002200 return(NULL);
2201 }
2202 }
2203 COPY_BUF(l,buffer,len,c);
2204 NEXTL(l);
2205 c = CUR_CHAR(l);
2206 }
2207 buffer[len] = 0;
2208 return(buffer);
2209 }
2210 }
2211 if (len == 0)
2212 return(NULL);
2213 return(xmlStrndup(buf, len));
2214}
2215
2216/**
2217 * xmlParseEntityValue:
2218 * @ctxt: an XML parser context
2219 * @orig: if non-NULL store a copy of the original entity value
2220 *
2221 * parse a value for ENTITY declarations
2222 *
2223 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2224 * "'" ([^%&'] | PEReference | Reference)* "'"
2225 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002226 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002227 */
2228
2229xmlChar *
2230xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2231 xmlChar *buf = NULL;
2232 int len = 0;
2233 int size = XML_PARSER_BUFFER_SIZE;
2234 int c, l;
2235 xmlChar stop;
2236 xmlChar *ret = NULL;
2237 const xmlChar *cur = NULL;
2238 xmlParserInputPtr input;
2239
2240 if (RAW == '"') stop = '"';
2241 else if (RAW == '\'') stop = '\'';
2242 else {
2243 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2245 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2246 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002247 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002248 return(NULL);
2249 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002250 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002251 if (buf == NULL) {
2252 xmlGenericError(xmlGenericErrorContext,
2253 "malloc of %d byte failed\n", size);
2254 return(NULL);
2255 }
2256
2257 /*
2258 * The content of the entity definition is copied in a buffer.
2259 */
2260
2261 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2262 input = ctxt->input;
2263 GROW;
2264 NEXT;
2265 c = CUR_CHAR(l);
2266 /*
2267 * NOTE: 4.4.5 Included in Literal
2268 * When a parameter entity reference appears in a literal entity
2269 * value, ... a single or double quote character in the replacement
2270 * text is always treated as a normal data character and will not
2271 * terminate the literal.
2272 * In practice it means we stop the loop only when back at parsing
2273 * the initial entity and the quote is found
2274 */
2275 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2276 (ctxt->input != input))) {
2277 if (len + 5 >= size) {
2278 size *= 2;
2279 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2280 if (buf == NULL) {
2281 xmlGenericError(xmlGenericErrorContext,
2282 "realloc of %d byte failed\n", size);
2283 return(NULL);
2284 }
2285 }
2286 COPY_BUF(l,buf,len,c);
2287 NEXTL(l);
2288 /*
2289 * Pop-up of finished entities.
2290 */
2291 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2292 xmlPopInput(ctxt);
2293
2294 GROW;
2295 c = CUR_CHAR(l);
2296 if (c == 0) {
2297 GROW;
2298 c = CUR_CHAR(l);
2299 }
2300 }
2301 buf[len] = 0;
2302
2303 /*
2304 * Raise problem w.r.t. '&' and '%' being used in non-entities
2305 * reference constructs. Note Charref will be handled in
2306 * xmlStringDecodeEntities()
2307 */
2308 cur = buf;
2309 while (*cur != 0) { /* non input consuming */
2310 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2311 xmlChar *name;
2312 xmlChar tmp = *cur;
2313
2314 cur++;
2315 name = xmlParseStringName(ctxt, &cur);
2316 if ((name == NULL) || (*cur != ';')) {
2317 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2318 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2319 ctxt->sax->error(ctxt->userData,
2320 "EntityValue: '%c' forbidden except for entities references\n",
2321 tmp);
2322 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002323 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002324 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002325 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2326 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002327 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2329 ctxt->sax->error(ctxt->userData,
2330 "EntityValue: PEReferences forbidden in internal subset\n",
2331 tmp);
2332 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002333 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002334 }
2335 if (name != NULL)
2336 xmlFree(name);
2337 }
2338 cur++;
2339 }
2340
2341 /*
2342 * Then PEReference entities are substituted.
2343 */
2344 if (c != stop) {
2345 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2347 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2348 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002349 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002350 xmlFree(buf);
2351 } else {
2352 NEXT;
2353 /*
2354 * NOTE: 4.4.7 Bypassed
2355 * When a general entity reference appears in the EntityValue in
2356 * an entity declaration, it is bypassed and left as is.
2357 * so XML_SUBSTITUTE_REF is not set here.
2358 */
2359 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2360 0, 0, 0);
2361 if (orig != NULL)
2362 *orig = buf;
2363 else
2364 xmlFree(buf);
2365 }
2366
2367 return(ret);
2368}
2369
2370/**
2371 * xmlParseAttValue:
2372 * @ctxt: an XML parser context
2373 *
2374 * parse a value for an attribute
2375 * Note: the parser won't do substitution of entities here, this
2376 * will be handled later in xmlStringGetNodeList
2377 *
2378 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2379 * "'" ([^<&'] | Reference)* "'"
2380 *
2381 * 3.3.3 Attribute-Value Normalization:
2382 * Before the value of an attribute is passed to the application or
2383 * checked for validity, the XML processor must normalize it as follows:
2384 * - a character reference is processed by appending the referenced
2385 * character to the attribute value
2386 * - an entity reference is processed by recursively processing the
2387 * replacement text of the entity
2388 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2389 * appending #x20 to the normalized value, except that only a single
2390 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2391 * parsed entity or the literal entity value of an internal parsed entity
2392 * - other characters are processed by appending them to the normalized value
2393 * If the declared value is not CDATA, then the XML processor must further
2394 * process the normalized attribute value by discarding any leading and
2395 * trailing space (#x20) characters, and by replacing sequences of space
2396 * (#x20) characters by a single space (#x20) character.
2397 * All attributes for which no declaration has been read should be treated
2398 * by a non-validating parser as if declared CDATA.
2399 *
2400 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2401 */
2402
2403xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002404xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2405
2406xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002407xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2408 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002409 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002410 xmlChar *ret = NULL;
2411 SHRINK;
2412 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002413 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002414 if (*in != '"' && *in != '\'') {
2415 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2417 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2418 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002419 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002420 return(NULL);
2421 }
2422 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2423 limit = *in;
2424 ++in;
2425
2426 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2427 *in != '&' && *in != '<'
2428 ) {
2429 ++in;
2430 }
2431 if (*in != limit) {
2432 return xmlParseAttValueComplex(ctxt);
2433 }
2434 ++in;
2435 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2436 CUR_PTR = in;
2437 return ret;
2438}
2439
Daniel Veillard01c13b52002-12-10 15:19:08 +00002440/**
2441 * xmlParseAttValueComplex:
2442 * @ctxt: an XML parser context
2443 *
2444 * parse a value for an attribute, this is the fallback function
2445 * of xmlParseAttValue() when the attribute parsing requires handling
2446 * of non-ASCII characters.
2447 *
2448 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2449 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002450xmlChar *
2451xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2452 xmlChar limit = 0;
2453 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002454 int len = 0;
2455 int buf_size = 0;
2456 int c, l;
2457 xmlChar *current = NULL;
2458 xmlEntityPtr ent;
2459
2460
2461 SHRINK;
2462 if (NXT(0) == '"') {
2463 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2464 limit = '"';
2465 NEXT;
2466 } else if (NXT(0) == '\'') {
2467 limit = '\'';
2468 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2469 NEXT;
2470 } else {
2471 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2473 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2474 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002475 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002476 return(NULL);
2477 }
2478
2479 /*
2480 * allocate a translation buffer.
2481 */
2482 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002483 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002484 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002485 xmlGenericError(xmlGenericErrorContext,
2486 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002487 return(NULL);
2488 }
2489
2490 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002491 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002492 */
2493 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002494 while ((NXT(0) != limit) && /* checked */
2495 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002496 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002497 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002498 if (NXT(1) == '#') {
2499 int val = xmlParseCharRef(ctxt);
2500 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002501 if (ctxt->replaceEntities) {
2502 if (len > buf_size - 10) {
2503 growBuffer(buf);
2504 }
2505 buf[len++] = '&';
2506 } else {
2507 /*
2508 * The reparsing will be done in xmlStringGetNodeList()
2509 * called by the attribute() function in SAX.c
2510 */
2511 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002512
Daniel Veillard319a7422001-09-11 09:27:09 +00002513 if (len > buf_size - 10) {
2514 growBuffer(buf);
2515 }
2516 current = &buffer[0];
2517 while (*current != 0) { /* non input consuming */
2518 buf[len++] = *current++;
2519 }
Owen Taylor3473f882001-02-23 17:55:21 +00002520 }
2521 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002522 if (len > buf_size - 10) {
2523 growBuffer(buf);
2524 }
Owen Taylor3473f882001-02-23 17:55:21 +00002525 len += xmlCopyChar(0, &buf[len], val);
2526 }
2527 } else {
2528 ent = xmlParseEntityRef(ctxt);
2529 if ((ent != NULL) &&
2530 (ctxt->replaceEntities != 0)) {
2531 xmlChar *rep;
2532
2533 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2534 rep = xmlStringDecodeEntities(ctxt, ent->content,
2535 XML_SUBSTITUTE_REF, 0, 0, 0);
2536 if (rep != NULL) {
2537 current = rep;
2538 while (*current != 0) { /* non input consuming */
2539 buf[len++] = *current++;
2540 if (len > buf_size - 10) {
2541 growBuffer(buf);
2542 }
2543 }
2544 xmlFree(rep);
2545 }
2546 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002547 if (len > buf_size - 10) {
2548 growBuffer(buf);
2549 }
Owen Taylor3473f882001-02-23 17:55:21 +00002550 if (ent->content != NULL)
2551 buf[len++] = ent->content[0];
2552 }
2553 } else if (ent != NULL) {
2554 int i = xmlStrlen(ent->name);
2555 const xmlChar *cur = ent->name;
2556
2557 /*
2558 * This may look absurd but is needed to detect
2559 * entities problems
2560 */
2561 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2562 (ent->content != NULL)) {
2563 xmlChar *rep;
2564 rep = xmlStringDecodeEntities(ctxt, ent->content,
2565 XML_SUBSTITUTE_REF, 0, 0, 0);
2566 if (rep != NULL)
2567 xmlFree(rep);
2568 }
2569
2570 /*
2571 * Just output the reference
2572 */
2573 buf[len++] = '&';
2574 if (len > buf_size - i - 10) {
2575 growBuffer(buf);
2576 }
2577 for (;i > 0;i--)
2578 buf[len++] = *cur++;
2579 buf[len++] = ';';
2580 }
2581 }
2582 } else {
2583 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2584 COPY_BUF(l,buf,len,0x20);
2585 if (len > buf_size - 10) {
2586 growBuffer(buf);
2587 }
2588 } else {
2589 COPY_BUF(l,buf,len,c);
2590 if (len > buf_size - 10) {
2591 growBuffer(buf);
2592 }
2593 }
2594 NEXTL(l);
2595 }
2596 GROW;
2597 c = CUR_CHAR(l);
2598 }
2599 buf[len++] = 0;
2600 if (RAW == '<') {
2601 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2603 ctxt->sax->error(ctxt->userData,
2604 "Unescaped '<' not allowed in attributes values\n");
2605 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002606 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002607 } else if (RAW != limit) {
2608 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2610 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2611 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002612 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002613 } else
2614 NEXT;
2615 return(buf);
2616}
2617
2618/**
2619 * xmlParseSystemLiteral:
2620 * @ctxt: an XML parser context
2621 *
2622 * parse an XML Literal
2623 *
2624 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2625 *
2626 * Returns the SystemLiteral parsed or NULL
2627 */
2628
2629xmlChar *
2630xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2631 xmlChar *buf = NULL;
2632 int len = 0;
2633 int size = XML_PARSER_BUFFER_SIZE;
2634 int cur, l;
2635 xmlChar stop;
2636 int state = ctxt->instate;
2637 int count = 0;
2638
2639 SHRINK;
2640 if (RAW == '"') {
2641 NEXT;
2642 stop = '"';
2643 } else if (RAW == '\'') {
2644 NEXT;
2645 stop = '\'';
2646 } else {
2647 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2649 ctxt->sax->error(ctxt->userData,
2650 "SystemLiteral \" or ' expected\n");
2651 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002652 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002653 return(NULL);
2654 }
2655
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002656 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002657 if (buf == NULL) {
2658 xmlGenericError(xmlGenericErrorContext,
2659 "malloc of %d byte failed\n", size);
2660 return(NULL);
2661 }
2662 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2663 cur = CUR_CHAR(l);
2664 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2665 if (len + 5 >= size) {
2666 size *= 2;
2667 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2668 if (buf == NULL) {
2669 xmlGenericError(xmlGenericErrorContext,
2670 "realloc of %d byte failed\n", size);
2671 ctxt->instate = (xmlParserInputState) state;
2672 return(NULL);
2673 }
2674 }
2675 count++;
2676 if (count > 50) {
2677 GROW;
2678 count = 0;
2679 }
2680 COPY_BUF(l,buf,len,cur);
2681 NEXTL(l);
2682 cur = CUR_CHAR(l);
2683 if (cur == 0) {
2684 GROW;
2685 SHRINK;
2686 cur = CUR_CHAR(l);
2687 }
2688 }
2689 buf[len] = 0;
2690 ctxt->instate = (xmlParserInputState) state;
2691 if (!IS_CHAR(cur)) {
2692 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2694 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2695 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002696 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002697 } else {
2698 NEXT;
2699 }
2700 return(buf);
2701}
2702
2703/**
2704 * xmlParsePubidLiteral:
2705 * @ctxt: an XML parser context
2706 *
2707 * parse an XML public literal
2708 *
2709 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2710 *
2711 * Returns the PubidLiteral parsed or NULL.
2712 */
2713
2714xmlChar *
2715xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2716 xmlChar *buf = NULL;
2717 int len = 0;
2718 int size = XML_PARSER_BUFFER_SIZE;
2719 xmlChar cur;
2720 xmlChar stop;
2721 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002722 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002723
2724 SHRINK;
2725 if (RAW == '"') {
2726 NEXT;
2727 stop = '"';
2728 } else if (RAW == '\'') {
2729 NEXT;
2730 stop = '\'';
2731 } else {
2732 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2734 ctxt->sax->error(ctxt->userData,
2735 "SystemLiteral \" or ' expected\n");
2736 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002737 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002738 return(NULL);
2739 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002740 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002741 if (buf == NULL) {
2742 xmlGenericError(xmlGenericErrorContext,
2743 "malloc of %d byte failed\n", size);
2744 return(NULL);
2745 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002746 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002747 cur = CUR;
2748 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2749 if (len + 1 >= size) {
2750 size *= 2;
2751 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2752 if (buf == NULL) {
2753 xmlGenericError(xmlGenericErrorContext,
2754 "realloc of %d byte failed\n", size);
2755 return(NULL);
2756 }
2757 }
2758 buf[len++] = cur;
2759 count++;
2760 if (count > 50) {
2761 GROW;
2762 count = 0;
2763 }
2764 NEXT;
2765 cur = CUR;
2766 if (cur == 0) {
2767 GROW;
2768 SHRINK;
2769 cur = CUR;
2770 }
2771 }
2772 buf[len] = 0;
2773 if (cur != stop) {
2774 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2776 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2777 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002778 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002779 } else {
2780 NEXT;
2781 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002782 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002783 return(buf);
2784}
2785
Daniel Veillard48b2f892001-02-25 16:11:03 +00002786void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002787/**
2788 * xmlParseCharData:
2789 * @ctxt: an XML parser context
2790 * @cdata: int indicating whether we are within a CDATA section
2791 *
2792 * parse a CharData section.
2793 * if we are within a CDATA section ']]>' marks an end of section.
2794 *
2795 * The right angle bracket (>) may be represented using the string "&gt;",
2796 * and must, for compatibility, be escaped using "&gt;" or a character
2797 * reference when it appears in the string "]]>" in content, when that
2798 * string is not marking the end of a CDATA section.
2799 *
2800 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2801 */
2802
2803void
2804xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002805 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002806 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002807 int line = ctxt->input->line;
2808 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002809
2810 SHRINK;
2811 GROW;
2812 /*
2813 * Accelerated common case where input don't need to be
2814 * modified before passing it to the handler.
2815 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002816 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002817 in = ctxt->input->cur;
2818 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002819get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002820 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2821 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002822 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002823 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002824 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002825 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002826 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002827 ctxt->input->line++;
2828 in++;
2829 }
2830 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002831 }
2832 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002833 if ((in[1] == ']') && (in[2] == '>')) {
2834 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2836 ctxt->sax->error(ctxt->userData,
2837 "Sequence ']]>' not allowed in content\n");
2838 ctxt->input->cur = in;
2839 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002840 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002841 return;
2842 }
2843 in++;
2844 goto get_more;
2845 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002846 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002847 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002848 if (IS_BLANK(*ctxt->input->cur)) {
2849 const xmlChar *tmp = ctxt->input->cur;
2850 ctxt->input->cur = in;
2851 if (areBlanks(ctxt, tmp, nbchar)) {
2852 if (ctxt->sax->ignorableWhitespace != NULL)
2853 ctxt->sax->ignorableWhitespace(ctxt->userData,
2854 tmp, nbchar);
2855 } else {
2856 if (ctxt->sax->characters != NULL)
2857 ctxt->sax->characters(ctxt->userData,
2858 tmp, nbchar);
2859 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002860 line = ctxt->input->line;
2861 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002862 } else {
2863 if (ctxt->sax->characters != NULL)
2864 ctxt->sax->characters(ctxt->userData,
2865 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002866 line = ctxt->input->line;
2867 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002868 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002869 }
2870 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002871 if (*in == 0xD) {
2872 in++;
2873 if (*in == 0xA) {
2874 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002875 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002876 ctxt->input->line++;
2877 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002878 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002879 in--;
2880 }
2881 if (*in == '<') {
2882 return;
2883 }
2884 if (*in == '&') {
2885 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002886 }
2887 SHRINK;
2888 GROW;
2889 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002890 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002891 nbchar = 0;
2892 }
Daniel Veillard50582112001-03-26 22:52:16 +00002893 ctxt->input->line = line;
2894 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002895 xmlParseCharDataComplex(ctxt, cdata);
2896}
2897
Daniel Veillard01c13b52002-12-10 15:19:08 +00002898/**
2899 * xmlParseCharDataComplex:
2900 * @ctxt: an XML parser context
2901 * @cdata: int indicating whether we are within a CDATA section
2902 *
2903 * parse a CharData section.this is the fallback function
2904 * of xmlParseCharData() when the parsing requires handling
2905 * of non-ASCII characters.
2906 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002907void
2908xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002909 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2910 int nbchar = 0;
2911 int cur, l;
2912 int count = 0;
2913
2914 SHRINK;
2915 GROW;
2916 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002917 while ((cur != '<') && /* checked */
2918 (cur != '&') &&
2919 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002920 if ((cur == ']') && (NXT(1) == ']') &&
2921 (NXT(2) == '>')) {
2922 if (cdata) break;
2923 else {
2924 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2926 ctxt->sax->error(ctxt->userData,
2927 "Sequence ']]>' not allowed in content\n");
2928 /* Should this be relaxed ??? I see a "must here */
2929 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002930 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002931 }
2932 }
2933 COPY_BUF(l,buf,nbchar,cur);
2934 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2935 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002936 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002937 */
2938 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2939 if (areBlanks(ctxt, buf, nbchar)) {
2940 if (ctxt->sax->ignorableWhitespace != NULL)
2941 ctxt->sax->ignorableWhitespace(ctxt->userData,
2942 buf, nbchar);
2943 } else {
2944 if (ctxt->sax->characters != NULL)
2945 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2946 }
2947 }
2948 nbchar = 0;
2949 }
2950 count++;
2951 if (count > 50) {
2952 GROW;
2953 count = 0;
2954 }
2955 NEXTL(l);
2956 cur = CUR_CHAR(l);
2957 }
2958 if (nbchar != 0) {
2959 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002960 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002961 */
2962 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2963 if (areBlanks(ctxt, buf, nbchar)) {
2964 if (ctxt->sax->ignorableWhitespace != NULL)
2965 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2966 } else {
2967 if (ctxt->sax->characters != NULL)
2968 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2969 }
2970 }
2971 }
2972}
2973
2974/**
2975 * xmlParseExternalID:
2976 * @ctxt: an XML parser context
2977 * @publicID: a xmlChar** receiving PubidLiteral
2978 * @strict: indicate whether we should restrict parsing to only
2979 * production [75], see NOTE below
2980 *
2981 * Parse an External ID or a Public ID
2982 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002983 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002984 * 'PUBLIC' S PubidLiteral S SystemLiteral
2985 *
2986 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2987 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2988 *
2989 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2990 *
2991 * Returns the function returns SystemLiteral and in the second
2992 * case publicID receives PubidLiteral, is strict is off
2993 * it is possible to return NULL and have publicID set.
2994 */
2995
2996xmlChar *
2997xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2998 xmlChar *URI = NULL;
2999
3000 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003001
3002 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003003 if ((RAW == 'S') && (NXT(1) == 'Y') &&
3004 (NXT(2) == 'S') && (NXT(3) == 'T') &&
3005 (NXT(4) == 'E') && (NXT(5) == 'M')) {
3006 SKIP(6);
3007 if (!IS_BLANK(CUR)) {
3008 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3010 ctxt->sax->error(ctxt->userData,
3011 "Space required after 'SYSTEM'\n");
3012 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003013 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003014 }
3015 SKIP_BLANKS;
3016 URI = xmlParseSystemLiteral(ctxt);
3017 if (URI == NULL) {
3018 ctxt->errNo = XML_ERR_URI_REQUIRED;
3019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3020 ctxt->sax->error(ctxt->userData,
3021 "xmlParseExternalID: SYSTEM, no URI\n");
3022 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003023 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003024 }
3025 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3026 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3027 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3028 SKIP(6);
3029 if (!IS_BLANK(CUR)) {
3030 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3032 ctxt->sax->error(ctxt->userData,
3033 "Space required after 'PUBLIC'\n");
3034 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003035 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003036 }
3037 SKIP_BLANKS;
3038 *publicID = xmlParsePubidLiteral(ctxt);
3039 if (*publicID == NULL) {
3040 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
3041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3042 ctxt->sax->error(ctxt->userData,
3043 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
3044 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003045 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 }
3047 if (strict) {
3048 /*
3049 * We don't handle [83] so "S SystemLiteral" is required.
3050 */
3051 if (!IS_BLANK(CUR)) {
3052 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3054 ctxt->sax->error(ctxt->userData,
3055 "Space required after the Public Identifier\n");
3056 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003058 }
3059 } else {
3060 /*
3061 * We handle [83] so we return immediately, if
3062 * "S SystemLiteral" is not detected. From a purely parsing
3063 * point of view that's a nice mess.
3064 */
3065 const xmlChar *ptr;
3066 GROW;
3067
3068 ptr = CUR_PTR;
3069 if (!IS_BLANK(*ptr)) return(NULL);
3070
3071 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3072 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3073 }
3074 SKIP_BLANKS;
3075 URI = xmlParseSystemLiteral(ctxt);
3076 if (URI == NULL) {
3077 ctxt->errNo = XML_ERR_URI_REQUIRED;
3078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3079 ctxt->sax->error(ctxt->userData,
3080 "xmlParseExternalID: PUBLIC, no URI\n");
3081 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003082 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003083 }
3084 }
3085 return(URI);
3086}
3087
3088/**
3089 * xmlParseComment:
3090 * @ctxt: an XML parser context
3091 *
3092 * Skip an XML (SGML) comment <!-- .... -->
3093 * The spec says that "For compatibility, the string "--" (double-hyphen)
3094 * must not occur within comments. "
3095 *
3096 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3097 */
3098void
3099xmlParseComment(xmlParserCtxtPtr ctxt) {
3100 xmlChar *buf = NULL;
3101 int len;
3102 int size = XML_PARSER_BUFFER_SIZE;
3103 int q, ql;
3104 int r, rl;
3105 int cur, l;
3106 xmlParserInputState state;
3107 xmlParserInputPtr input = ctxt->input;
3108 int count = 0;
3109
3110 /*
3111 * Check that there is a comment right here.
3112 */
3113 if ((RAW != '<') || (NXT(1) != '!') ||
3114 (NXT(2) != '-') || (NXT(3) != '-')) return;
3115
3116 state = ctxt->instate;
3117 ctxt->instate = XML_PARSER_COMMENT;
3118 SHRINK;
3119 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003120 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003121 if (buf == NULL) {
3122 xmlGenericError(xmlGenericErrorContext,
3123 "malloc of %d byte failed\n", size);
3124 ctxt->instate = state;
3125 return;
3126 }
3127 q = CUR_CHAR(ql);
3128 NEXTL(ql);
3129 r = CUR_CHAR(rl);
3130 NEXTL(rl);
3131 cur = CUR_CHAR(l);
3132 len = 0;
3133 while (IS_CHAR(cur) && /* checked */
3134 ((cur != '>') ||
3135 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003136 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003137 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3139 ctxt->sax->error(ctxt->userData,
3140 "Comment must not contain '--' (double-hyphen)`\n");
3141 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003142 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003143 }
3144 if (len + 5 >= size) {
3145 size *= 2;
3146 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3147 if (buf == NULL) {
3148 xmlGenericError(xmlGenericErrorContext,
3149 "realloc of %d byte failed\n", size);
3150 ctxt->instate = state;
3151 return;
3152 }
3153 }
3154 COPY_BUF(ql,buf,len,q);
3155 q = r;
3156 ql = rl;
3157 r = cur;
3158 rl = l;
3159
3160 count++;
3161 if (count > 50) {
3162 GROW;
3163 count = 0;
3164 }
3165 NEXTL(l);
3166 cur = CUR_CHAR(l);
3167 if (cur == 0) {
3168 SHRINK;
3169 GROW;
3170 cur = CUR_CHAR(l);
3171 }
3172 }
3173 buf[len] = 0;
3174 if (!IS_CHAR(cur)) {
3175 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3177 ctxt->sax->error(ctxt->userData,
3178 "Comment not terminated \n<!--%.50s\n", buf);
3179 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003180 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003181 xmlFree(buf);
3182 } else {
3183 if (input != ctxt->input) {
3184 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3186 ctxt->sax->error(ctxt->userData,
3187"Comment doesn't start and stop in the same entity\n");
3188 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003189 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003190 }
3191 NEXT;
3192 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3193 (!ctxt->disableSAX))
3194 ctxt->sax->comment(ctxt->userData, buf);
3195 xmlFree(buf);
3196 }
3197 ctxt->instate = state;
3198}
3199
3200/**
3201 * xmlParsePITarget:
3202 * @ctxt: an XML parser context
3203 *
3204 * parse the name of a PI
3205 *
3206 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3207 *
3208 * Returns the PITarget name or NULL
3209 */
3210
3211xmlChar *
3212xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3213 xmlChar *name;
3214
3215 name = xmlParseName(ctxt);
3216 if ((name != NULL) &&
3217 ((name[0] == 'x') || (name[0] == 'X')) &&
3218 ((name[1] == 'm') || (name[1] == 'M')) &&
3219 ((name[2] == 'l') || (name[2] == 'L'))) {
3220 int i;
3221 if ((name[0] == 'x') && (name[1] == 'm') &&
3222 (name[2] == 'l') && (name[3] == 0)) {
3223 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3225 ctxt->sax->error(ctxt->userData,
3226 "XML declaration allowed only at the start of the document\n");
3227 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003228 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003229 return(name);
3230 } else if (name[3] == 0) {
3231 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3233 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3234 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003235 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003236 return(name);
3237 }
3238 for (i = 0;;i++) {
3239 if (xmlW3CPIs[i] == NULL) break;
3240 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3241 return(name);
3242 }
3243 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3244 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3245 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003246 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003247 }
3248 }
3249 return(name);
3250}
3251
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003252#ifdef LIBXML_CATALOG_ENABLED
3253/**
3254 * xmlParseCatalogPI:
3255 * @ctxt: an XML parser context
3256 * @catalog: the PI value string
3257 *
3258 * parse an XML Catalog Processing Instruction.
3259 *
3260 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3261 *
3262 * Occurs only if allowed by the user and if happening in the Misc
3263 * part of the document before any doctype informations
3264 * This will add the given catalog to the parsing context in order
3265 * to be used if there is a resolution need further down in the document
3266 */
3267
3268static void
3269xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3270 xmlChar *URL = NULL;
3271 const xmlChar *tmp, *base;
3272 xmlChar marker;
3273
3274 tmp = catalog;
3275 while (IS_BLANK(*tmp)) tmp++;
3276 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3277 goto error;
3278 tmp += 7;
3279 while (IS_BLANK(*tmp)) tmp++;
3280 if (*tmp != '=') {
3281 return;
3282 }
3283 tmp++;
3284 while (IS_BLANK(*tmp)) tmp++;
3285 marker = *tmp;
3286 if ((marker != '\'') && (marker != '"'))
3287 goto error;
3288 tmp++;
3289 base = tmp;
3290 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3291 if (*tmp == 0)
3292 goto error;
3293 URL = xmlStrndup(base, tmp - base);
3294 tmp++;
3295 while (IS_BLANK(*tmp)) tmp++;
3296 if (*tmp != 0)
3297 goto error;
3298
3299 if (URL != NULL) {
3300 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3301 xmlFree(URL);
3302 }
3303 return;
3304
3305error:
3306 ctxt->errNo = XML_WAR_CATALOG_PI;
3307 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3308 ctxt->sax->warning(ctxt->userData,
3309 "Catalog PI syntax error: %s\n", catalog);
3310 if (URL != NULL)
3311 xmlFree(URL);
3312}
3313#endif
3314
Owen Taylor3473f882001-02-23 17:55:21 +00003315/**
3316 * xmlParsePI:
3317 * @ctxt: an XML parser context
3318 *
3319 * parse an XML Processing Instruction.
3320 *
3321 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3322 *
3323 * The processing is transfered to SAX once parsed.
3324 */
3325
3326void
3327xmlParsePI(xmlParserCtxtPtr ctxt) {
3328 xmlChar *buf = NULL;
3329 int len = 0;
3330 int size = XML_PARSER_BUFFER_SIZE;
3331 int cur, l;
3332 xmlChar *target;
3333 xmlParserInputState state;
3334 int count = 0;
3335
3336 if ((RAW == '<') && (NXT(1) == '?')) {
3337 xmlParserInputPtr input = ctxt->input;
3338 state = ctxt->instate;
3339 ctxt->instate = XML_PARSER_PI;
3340 /*
3341 * this is a Processing Instruction.
3342 */
3343 SKIP(2);
3344 SHRINK;
3345
3346 /*
3347 * Parse the target name and check for special support like
3348 * namespace.
3349 */
3350 target = xmlParsePITarget(ctxt);
3351 if (target != NULL) {
3352 if ((RAW == '?') && (NXT(1) == '>')) {
3353 if (input != ctxt->input) {
3354 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3356 ctxt->sax->error(ctxt->userData,
3357 "PI declaration doesn't start and stop in the same entity\n");
3358 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003359 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003360 }
3361 SKIP(2);
3362
3363 /*
3364 * SAX: PI detected.
3365 */
3366 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3367 (ctxt->sax->processingInstruction != NULL))
3368 ctxt->sax->processingInstruction(ctxt->userData,
3369 target, NULL);
3370 ctxt->instate = state;
3371 xmlFree(target);
3372 return;
3373 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003374 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003375 if (buf == NULL) {
3376 xmlGenericError(xmlGenericErrorContext,
3377 "malloc of %d byte failed\n", size);
3378 ctxt->instate = state;
3379 return;
3380 }
3381 cur = CUR;
3382 if (!IS_BLANK(cur)) {
3383 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3385 ctxt->sax->error(ctxt->userData,
3386 "xmlParsePI: PI %s space expected\n", target);
3387 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003388 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003389 }
3390 SKIP_BLANKS;
3391 cur = CUR_CHAR(l);
3392 while (IS_CHAR(cur) && /* checked */
3393 ((cur != '?') || (NXT(1) != '>'))) {
3394 if (len + 5 >= size) {
3395 size *= 2;
3396 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3397 if (buf == NULL) {
3398 xmlGenericError(xmlGenericErrorContext,
3399 "realloc of %d byte failed\n", size);
3400 ctxt->instate = state;
3401 return;
3402 }
3403 }
3404 count++;
3405 if (count > 50) {
3406 GROW;
3407 count = 0;
3408 }
3409 COPY_BUF(l,buf,len,cur);
3410 NEXTL(l);
3411 cur = CUR_CHAR(l);
3412 if (cur == 0) {
3413 SHRINK;
3414 GROW;
3415 cur = CUR_CHAR(l);
3416 }
3417 }
3418 buf[len] = 0;
3419 if (cur != '?') {
3420 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3421 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3422 ctxt->sax->error(ctxt->userData,
3423 "xmlParsePI: PI %s never end ...\n", target);
3424 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003425 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003426 } else {
3427 if (input != ctxt->input) {
3428 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3430 ctxt->sax->error(ctxt->userData,
3431 "PI declaration doesn't start and stop in the same entity\n");
3432 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003433 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003434 }
3435 SKIP(2);
3436
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003437#ifdef LIBXML_CATALOG_ENABLED
3438 if (((state == XML_PARSER_MISC) ||
3439 (state == XML_PARSER_START)) &&
3440 (xmlStrEqual(target, XML_CATALOG_PI))) {
3441 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3442 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3443 (allow == XML_CATA_ALLOW_ALL))
3444 xmlParseCatalogPI(ctxt, buf);
3445 }
3446#endif
3447
3448
Owen Taylor3473f882001-02-23 17:55:21 +00003449 /*
3450 * SAX: PI detected.
3451 */
3452 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3453 (ctxt->sax->processingInstruction != NULL))
3454 ctxt->sax->processingInstruction(ctxt->userData,
3455 target, buf);
3456 }
3457 xmlFree(buf);
3458 xmlFree(target);
3459 } else {
3460 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3462 ctxt->sax->error(ctxt->userData,
3463 "xmlParsePI : no target name\n");
3464 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003465 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003466 }
3467 ctxt->instate = state;
3468 }
3469}
3470
3471/**
3472 * xmlParseNotationDecl:
3473 * @ctxt: an XML parser context
3474 *
3475 * parse a notation declaration
3476 *
3477 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3478 *
3479 * Hence there is actually 3 choices:
3480 * 'PUBLIC' S PubidLiteral
3481 * 'PUBLIC' S PubidLiteral S SystemLiteral
3482 * and 'SYSTEM' S SystemLiteral
3483 *
3484 * See the NOTE on xmlParseExternalID().
3485 */
3486
3487void
3488xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3489 xmlChar *name;
3490 xmlChar *Pubid;
3491 xmlChar *Systemid;
3492
3493 if ((RAW == '<') && (NXT(1) == '!') &&
3494 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3495 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3496 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3497 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3498 xmlParserInputPtr input = ctxt->input;
3499 SHRINK;
3500 SKIP(10);
3501 if (!IS_BLANK(CUR)) {
3502 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3503 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3504 ctxt->sax->error(ctxt->userData,
3505 "Space required after '<!NOTATION'\n");
3506 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003507 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003508 return;
3509 }
3510 SKIP_BLANKS;
3511
Daniel Veillard76d66f42001-05-16 21:05:17 +00003512 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003513 if (name == NULL) {
3514 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3516 ctxt->sax->error(ctxt->userData,
3517 "NOTATION: Name expected here\n");
3518 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003519 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003520 return;
3521 }
3522 if (!IS_BLANK(CUR)) {
3523 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3525 ctxt->sax->error(ctxt->userData,
3526 "Space required after the NOTATION name'\n");
3527 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003528 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003529 return;
3530 }
3531 SKIP_BLANKS;
3532
3533 /*
3534 * Parse the IDs.
3535 */
3536 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3537 SKIP_BLANKS;
3538
3539 if (RAW == '>') {
3540 if (input != ctxt->input) {
3541 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3542 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3543 ctxt->sax->error(ctxt->userData,
3544"Notation declaration doesn't start and stop in the same entity\n");
3545 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003546 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003547 }
3548 NEXT;
3549 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3550 (ctxt->sax->notationDecl != NULL))
3551 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3552 } else {
3553 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3555 ctxt->sax->error(ctxt->userData,
3556 "'>' required to close NOTATION declaration\n");
3557 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003558 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003559 }
3560 xmlFree(name);
3561 if (Systemid != NULL) xmlFree(Systemid);
3562 if (Pubid != NULL) xmlFree(Pubid);
3563 }
3564}
3565
3566/**
3567 * xmlParseEntityDecl:
3568 * @ctxt: an XML parser context
3569 *
3570 * parse <!ENTITY declarations
3571 *
3572 * [70] EntityDecl ::= GEDecl | PEDecl
3573 *
3574 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3575 *
3576 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3577 *
3578 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3579 *
3580 * [74] PEDef ::= EntityValue | ExternalID
3581 *
3582 * [76] NDataDecl ::= S 'NDATA' S Name
3583 *
3584 * [ VC: Notation Declared ]
3585 * The Name must match the declared name of a notation.
3586 */
3587
3588void
3589xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3590 xmlChar *name = NULL;
3591 xmlChar *value = NULL;
3592 xmlChar *URI = NULL, *literal = NULL;
3593 xmlChar *ndata = NULL;
3594 int isParameter = 0;
3595 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003596 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003597
3598 GROW;
3599 if ((RAW == '<') && (NXT(1) == '!') &&
3600 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3601 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3602 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3603 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003604 SHRINK;
3605 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003606 skipped = SKIP_BLANKS;
3607 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003608 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3610 ctxt->sax->error(ctxt->userData,
3611 "Space required after '<!ENTITY'\n");
3612 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003613 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003614 }
Owen Taylor3473f882001-02-23 17:55:21 +00003615
3616 if (RAW == '%') {
3617 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003618 skipped = SKIP_BLANKS;
3619 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003620 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3622 ctxt->sax->error(ctxt->userData,
3623 "Space required after '%'\n");
3624 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003625 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003626 }
Owen Taylor3473f882001-02-23 17:55:21 +00003627 isParameter = 1;
3628 }
3629
Daniel Veillard76d66f42001-05-16 21:05:17 +00003630 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003631 if (name == NULL) {
3632 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3634 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3635 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003636 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003637 return;
3638 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003639 skipped = SKIP_BLANKS;
3640 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003641 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3643 ctxt->sax->error(ctxt->userData,
3644 "Space required after the entity name\n");
3645 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003646 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003647 }
Owen Taylor3473f882001-02-23 17:55:21 +00003648
Daniel Veillardf5582f12002-06-11 10:08:16 +00003649 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003650 /*
3651 * handle the various case of definitions...
3652 */
3653 if (isParameter) {
3654 if ((RAW == '"') || (RAW == '\'')) {
3655 value = xmlParseEntityValue(ctxt, &orig);
3656 if (value) {
3657 if ((ctxt->sax != NULL) &&
3658 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3659 ctxt->sax->entityDecl(ctxt->userData, name,
3660 XML_INTERNAL_PARAMETER_ENTITY,
3661 NULL, NULL, value);
3662 }
3663 } else {
3664 URI = xmlParseExternalID(ctxt, &literal, 1);
3665 if ((URI == NULL) && (literal == NULL)) {
3666 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3668 ctxt->sax->error(ctxt->userData,
3669 "Entity value required\n");
3670 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003671 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003672 }
3673 if (URI) {
3674 xmlURIPtr uri;
3675
3676 uri = xmlParseURI((const char *) URI);
3677 if (uri == NULL) {
3678 ctxt->errNo = XML_ERR_INVALID_URI;
3679 if ((ctxt->sax != NULL) &&
3680 (!ctxt->disableSAX) &&
3681 (ctxt->sax->error != NULL))
3682 ctxt->sax->error(ctxt->userData,
3683 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003684 /*
3685 * This really ought to be a well formedness error
3686 * but the XML Core WG decided otherwise c.f. issue
3687 * E26 of the XML erratas.
3688 */
Owen Taylor3473f882001-02-23 17:55:21 +00003689 } else {
3690 if (uri->fragment != NULL) {
3691 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3692 if ((ctxt->sax != NULL) &&
3693 (!ctxt->disableSAX) &&
3694 (ctxt->sax->error != NULL))
3695 ctxt->sax->error(ctxt->userData,
3696 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003697 /*
3698 * Okay this is foolish to block those but not
3699 * invalid URIs.
3700 */
Owen Taylor3473f882001-02-23 17:55:21 +00003701 ctxt->wellFormed = 0;
3702 } else {
3703 if ((ctxt->sax != NULL) &&
3704 (!ctxt->disableSAX) &&
3705 (ctxt->sax->entityDecl != NULL))
3706 ctxt->sax->entityDecl(ctxt->userData, name,
3707 XML_EXTERNAL_PARAMETER_ENTITY,
3708 literal, URI, NULL);
3709 }
3710 xmlFreeURI(uri);
3711 }
3712 }
3713 }
3714 } else {
3715 if ((RAW == '"') || (RAW == '\'')) {
3716 value = xmlParseEntityValue(ctxt, &orig);
3717 if ((ctxt->sax != NULL) &&
3718 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3719 ctxt->sax->entityDecl(ctxt->userData, name,
3720 XML_INTERNAL_GENERAL_ENTITY,
3721 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003722 /*
3723 * For expat compatibility in SAX mode.
3724 */
3725 if ((ctxt->myDoc == NULL) ||
3726 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3727 if (ctxt->myDoc == NULL) {
3728 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3729 }
3730 if (ctxt->myDoc->intSubset == NULL)
3731 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3732 BAD_CAST "fake", NULL, NULL);
3733
3734 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3735 NULL, NULL, value);
3736 }
Owen Taylor3473f882001-02-23 17:55:21 +00003737 } else {
3738 URI = xmlParseExternalID(ctxt, &literal, 1);
3739 if ((URI == NULL) && (literal == NULL)) {
3740 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3742 ctxt->sax->error(ctxt->userData,
3743 "Entity value required\n");
3744 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003745 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003746 }
3747 if (URI) {
3748 xmlURIPtr uri;
3749
3750 uri = xmlParseURI((const char *)URI);
3751 if (uri == NULL) {
3752 ctxt->errNo = XML_ERR_INVALID_URI;
3753 if ((ctxt->sax != NULL) &&
3754 (!ctxt->disableSAX) &&
3755 (ctxt->sax->error != NULL))
3756 ctxt->sax->error(ctxt->userData,
3757 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003758 /*
3759 * This really ought to be a well formedness error
3760 * but the XML Core WG decided otherwise c.f. issue
3761 * E26 of the XML erratas.
3762 */
Owen Taylor3473f882001-02-23 17:55:21 +00003763 } else {
3764 if (uri->fragment != NULL) {
3765 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3766 if ((ctxt->sax != NULL) &&
3767 (!ctxt->disableSAX) &&
3768 (ctxt->sax->error != NULL))
3769 ctxt->sax->error(ctxt->userData,
3770 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003771 /*
3772 * Okay this is foolish to block those but not
3773 * invalid URIs.
3774 */
Owen Taylor3473f882001-02-23 17:55:21 +00003775 ctxt->wellFormed = 0;
3776 }
3777 xmlFreeURI(uri);
3778 }
3779 }
3780 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3781 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3783 ctxt->sax->error(ctxt->userData,
3784 "Space required before 'NDATA'\n");
3785 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003786 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003787 }
3788 SKIP_BLANKS;
3789 if ((RAW == 'N') && (NXT(1) == 'D') &&
3790 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3791 (NXT(4) == 'A')) {
3792 SKIP(5);
3793 if (!IS_BLANK(CUR)) {
3794 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3796 ctxt->sax->error(ctxt->userData,
3797 "Space required after 'NDATA'\n");
3798 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003799 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003800 }
3801 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003802 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003803 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3804 (ctxt->sax->unparsedEntityDecl != NULL))
3805 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3806 literal, URI, ndata);
3807 } else {
3808 if ((ctxt->sax != NULL) &&
3809 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3810 ctxt->sax->entityDecl(ctxt->userData, name,
3811 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3812 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003813 /*
3814 * For expat compatibility in SAX mode.
3815 * assuming the entity repalcement was asked for
3816 */
3817 if ((ctxt->replaceEntities != 0) &&
3818 ((ctxt->myDoc == NULL) ||
3819 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3820 if (ctxt->myDoc == NULL) {
3821 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3822 }
3823
3824 if (ctxt->myDoc->intSubset == NULL)
3825 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3826 BAD_CAST "fake", NULL, NULL);
3827 entityDecl(ctxt, name,
3828 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3829 literal, URI, NULL);
3830 }
Owen Taylor3473f882001-02-23 17:55:21 +00003831 }
3832 }
3833 }
3834 SKIP_BLANKS;
3835 if (RAW != '>') {
3836 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3838 ctxt->sax->error(ctxt->userData,
3839 "xmlParseEntityDecl: entity %s not terminated\n", name);
3840 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003841 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003842 } else {
3843 if (input != ctxt->input) {
3844 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3846 ctxt->sax->error(ctxt->userData,
3847"Entity declaration doesn't start and stop in the same entity\n");
3848 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003849 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003850 }
3851 NEXT;
3852 }
3853 if (orig != NULL) {
3854 /*
3855 * Ugly mechanism to save the raw entity value.
3856 */
3857 xmlEntityPtr cur = NULL;
3858
3859 if (isParameter) {
3860 if ((ctxt->sax != NULL) &&
3861 (ctxt->sax->getParameterEntity != NULL))
3862 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3863 } else {
3864 if ((ctxt->sax != NULL) &&
3865 (ctxt->sax->getEntity != NULL))
3866 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003867 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3868 cur = getEntity(ctxt, name);
3869 }
Owen Taylor3473f882001-02-23 17:55:21 +00003870 }
3871 if (cur != NULL) {
3872 if (cur->orig != NULL)
3873 xmlFree(orig);
3874 else
3875 cur->orig = orig;
3876 } else
3877 xmlFree(orig);
3878 }
3879 if (name != NULL) xmlFree(name);
3880 if (value != NULL) xmlFree(value);
3881 if (URI != NULL) xmlFree(URI);
3882 if (literal != NULL) xmlFree(literal);
3883 if (ndata != NULL) xmlFree(ndata);
3884 }
3885}
3886
3887/**
3888 * xmlParseDefaultDecl:
3889 * @ctxt: an XML parser context
3890 * @value: Receive a possible fixed default value for the attribute
3891 *
3892 * Parse an attribute default declaration
3893 *
3894 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3895 *
3896 * [ VC: Required Attribute ]
3897 * if the default declaration is the keyword #REQUIRED, then the
3898 * attribute must be specified for all elements of the type in the
3899 * attribute-list declaration.
3900 *
3901 * [ VC: Attribute Default Legal ]
3902 * The declared default value must meet the lexical constraints of
3903 * the declared attribute type c.f. xmlValidateAttributeDecl()
3904 *
3905 * [ VC: Fixed Attribute Default ]
3906 * if an attribute has a default value declared with the #FIXED
3907 * keyword, instances of that attribute must match the default value.
3908 *
3909 * [ WFC: No < in Attribute Values ]
3910 * handled in xmlParseAttValue()
3911 *
3912 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3913 * or XML_ATTRIBUTE_FIXED.
3914 */
3915
3916int
3917xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3918 int val;
3919 xmlChar *ret;
3920
3921 *value = NULL;
3922 if ((RAW == '#') && (NXT(1) == 'R') &&
3923 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3924 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3925 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3926 (NXT(8) == 'D')) {
3927 SKIP(9);
3928 return(XML_ATTRIBUTE_REQUIRED);
3929 }
3930 if ((RAW == '#') && (NXT(1) == 'I') &&
3931 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3932 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3933 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3934 SKIP(8);
3935 return(XML_ATTRIBUTE_IMPLIED);
3936 }
3937 val = XML_ATTRIBUTE_NONE;
3938 if ((RAW == '#') && (NXT(1) == 'F') &&
3939 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3940 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3941 SKIP(6);
3942 val = XML_ATTRIBUTE_FIXED;
3943 if (!IS_BLANK(CUR)) {
3944 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3946 ctxt->sax->error(ctxt->userData,
3947 "Space required after '#FIXED'\n");
3948 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003949 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003950 }
3951 SKIP_BLANKS;
3952 }
3953 ret = xmlParseAttValue(ctxt);
3954 ctxt->instate = XML_PARSER_DTD;
3955 if (ret == NULL) {
3956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3957 ctxt->sax->error(ctxt->userData,
3958 "Attribute default value declaration error\n");
3959 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003960 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003961 } else
3962 *value = ret;
3963 return(val);
3964}
3965
3966/**
3967 * xmlParseNotationType:
3968 * @ctxt: an XML parser context
3969 *
3970 * parse an Notation attribute type.
3971 *
3972 * Note: the leading 'NOTATION' S part has already being parsed...
3973 *
3974 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3975 *
3976 * [ VC: Notation Attributes ]
3977 * Values of this type must match one of the notation names included
3978 * in the declaration; all notation names in the declaration must be declared.
3979 *
3980 * Returns: the notation attribute tree built while parsing
3981 */
3982
3983xmlEnumerationPtr
3984xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3985 xmlChar *name;
3986 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3987
3988 if (RAW != '(') {
3989 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3991 ctxt->sax->error(ctxt->userData,
3992 "'(' required to start 'NOTATION'\n");
3993 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003994 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003995 return(NULL);
3996 }
3997 SHRINK;
3998 do {
3999 NEXT;
4000 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004001 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004002 if (name == NULL) {
4003 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4005 ctxt->sax->error(ctxt->userData,
4006 "Name expected in NOTATION declaration\n");
4007 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004008 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004009 return(ret);
4010 }
4011 cur = xmlCreateEnumeration(name);
4012 xmlFree(name);
4013 if (cur == NULL) return(ret);
4014 if (last == NULL) ret = last = cur;
4015 else {
4016 last->next = cur;
4017 last = cur;
4018 }
4019 SKIP_BLANKS;
4020 } while (RAW == '|');
4021 if (RAW != ')') {
4022 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
4023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4024 ctxt->sax->error(ctxt->userData,
4025 "')' required to finish NOTATION declaration\n");
4026 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004027 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004028 if ((last != NULL) && (last != ret))
4029 xmlFreeEnumeration(last);
4030 return(ret);
4031 }
4032 NEXT;
4033 return(ret);
4034}
4035
4036/**
4037 * xmlParseEnumerationType:
4038 * @ctxt: an XML parser context
4039 *
4040 * parse an Enumeration attribute type.
4041 *
4042 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4043 *
4044 * [ VC: Enumeration ]
4045 * Values of this type must match one of the Nmtoken tokens in
4046 * the declaration
4047 *
4048 * Returns: the enumeration attribute tree built while parsing
4049 */
4050
4051xmlEnumerationPtr
4052xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4053 xmlChar *name;
4054 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4055
4056 if (RAW != '(') {
4057 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4059 ctxt->sax->error(ctxt->userData,
4060 "'(' required to start ATTLIST enumeration\n");
4061 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004062 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004063 return(NULL);
4064 }
4065 SHRINK;
4066 do {
4067 NEXT;
4068 SKIP_BLANKS;
4069 name = xmlParseNmtoken(ctxt);
4070 if (name == NULL) {
4071 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4073 ctxt->sax->error(ctxt->userData,
4074 "NmToken expected in ATTLIST enumeration\n");
4075 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004076 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004077 return(ret);
4078 }
4079 cur = xmlCreateEnumeration(name);
4080 xmlFree(name);
4081 if (cur == NULL) return(ret);
4082 if (last == NULL) ret = last = cur;
4083 else {
4084 last->next = cur;
4085 last = cur;
4086 }
4087 SKIP_BLANKS;
4088 } while (RAW == '|');
4089 if (RAW != ')') {
4090 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4091 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4092 ctxt->sax->error(ctxt->userData,
4093 "')' required to finish ATTLIST enumeration\n");
4094 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004095 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004096 return(ret);
4097 }
4098 NEXT;
4099 return(ret);
4100}
4101
4102/**
4103 * xmlParseEnumeratedType:
4104 * @ctxt: an XML parser context
4105 * @tree: the enumeration tree built while parsing
4106 *
4107 * parse an Enumerated attribute type.
4108 *
4109 * [57] EnumeratedType ::= NotationType | Enumeration
4110 *
4111 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4112 *
4113 *
4114 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4115 */
4116
4117int
4118xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4119 if ((RAW == 'N') && (NXT(1) == 'O') &&
4120 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4121 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4122 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4123 SKIP(8);
4124 if (!IS_BLANK(CUR)) {
4125 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4127 ctxt->sax->error(ctxt->userData,
4128 "Space required after 'NOTATION'\n");
4129 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004130 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004131 return(0);
4132 }
4133 SKIP_BLANKS;
4134 *tree = xmlParseNotationType(ctxt);
4135 if (*tree == NULL) return(0);
4136 return(XML_ATTRIBUTE_NOTATION);
4137 }
4138 *tree = xmlParseEnumerationType(ctxt);
4139 if (*tree == NULL) return(0);
4140 return(XML_ATTRIBUTE_ENUMERATION);
4141}
4142
4143/**
4144 * xmlParseAttributeType:
4145 * @ctxt: an XML parser context
4146 * @tree: the enumeration tree built while parsing
4147 *
4148 * parse the Attribute list def for an element
4149 *
4150 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4151 *
4152 * [55] StringType ::= 'CDATA'
4153 *
4154 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4155 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4156 *
4157 * Validity constraints for attribute values syntax are checked in
4158 * xmlValidateAttributeValue()
4159 *
4160 * [ VC: ID ]
4161 * Values of type ID must match the Name production. A name must not
4162 * appear more than once in an XML document as a value of this type;
4163 * i.e., ID values must uniquely identify the elements which bear them.
4164 *
4165 * [ VC: One ID per Element Type ]
4166 * No element type may have more than one ID attribute specified.
4167 *
4168 * [ VC: ID Attribute Default ]
4169 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4170 *
4171 * [ VC: IDREF ]
4172 * Values of type IDREF must match the Name production, and values
4173 * of type IDREFS must match Names; each IDREF Name must match the value
4174 * of an ID attribute on some element in the XML document; i.e. IDREF
4175 * values must match the value of some ID attribute.
4176 *
4177 * [ VC: Entity Name ]
4178 * Values of type ENTITY must match the Name production, values
4179 * of type ENTITIES must match Names; each Entity Name must match the
4180 * name of an unparsed entity declared in the DTD.
4181 *
4182 * [ VC: Name Token ]
4183 * Values of type NMTOKEN must match the Nmtoken production; values
4184 * of type NMTOKENS must match Nmtokens.
4185 *
4186 * Returns the attribute type
4187 */
4188int
4189xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4190 SHRINK;
4191 if ((RAW == 'C') && (NXT(1) == 'D') &&
4192 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4193 (NXT(4) == 'A')) {
4194 SKIP(5);
4195 return(XML_ATTRIBUTE_CDATA);
4196 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4197 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4198 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4199 SKIP(6);
4200 return(XML_ATTRIBUTE_IDREFS);
4201 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4202 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4203 (NXT(4) == 'F')) {
4204 SKIP(5);
4205 return(XML_ATTRIBUTE_IDREF);
4206 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4207 SKIP(2);
4208 return(XML_ATTRIBUTE_ID);
4209 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4210 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4211 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4212 SKIP(6);
4213 return(XML_ATTRIBUTE_ENTITY);
4214 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4215 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4216 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4217 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4218 SKIP(8);
4219 return(XML_ATTRIBUTE_ENTITIES);
4220 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4221 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4222 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4223 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4224 SKIP(8);
4225 return(XML_ATTRIBUTE_NMTOKENS);
4226 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4227 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4228 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4229 (NXT(6) == 'N')) {
4230 SKIP(7);
4231 return(XML_ATTRIBUTE_NMTOKEN);
4232 }
4233 return(xmlParseEnumeratedType(ctxt, tree));
4234}
4235
4236/**
4237 * xmlParseAttributeListDecl:
4238 * @ctxt: an XML parser context
4239 *
4240 * : parse the Attribute list def for an element
4241 *
4242 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4243 *
4244 * [53] AttDef ::= S Name S AttType S DefaultDecl
4245 *
4246 */
4247void
4248xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4249 xmlChar *elemName;
4250 xmlChar *attrName;
4251 xmlEnumerationPtr tree;
4252
4253 if ((RAW == '<') && (NXT(1) == '!') &&
4254 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4255 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4256 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4257 (NXT(8) == 'T')) {
4258 xmlParserInputPtr input = ctxt->input;
4259
4260 SKIP(9);
4261 if (!IS_BLANK(CUR)) {
4262 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4264 ctxt->sax->error(ctxt->userData,
4265 "Space required after '<!ATTLIST'\n");
4266 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004267 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004268 }
4269 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004270 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004271 if (elemName == NULL) {
4272 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4274 ctxt->sax->error(ctxt->userData,
4275 "ATTLIST: no name for Element\n");
4276 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004277 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004278 return;
4279 }
4280 SKIP_BLANKS;
4281 GROW;
4282 while (RAW != '>') {
4283 const xmlChar *check = CUR_PTR;
4284 int type;
4285 int def;
4286 xmlChar *defaultValue = NULL;
4287
4288 GROW;
4289 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004290 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004291 if (attrName == NULL) {
4292 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4294 ctxt->sax->error(ctxt->userData,
4295 "ATTLIST: no name for Attribute\n");
4296 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004297 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004298 break;
4299 }
4300 GROW;
4301 if (!IS_BLANK(CUR)) {
4302 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4304 ctxt->sax->error(ctxt->userData,
4305 "Space required after the attribute name\n");
4306 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004307 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004308 if (attrName != NULL)
4309 xmlFree(attrName);
4310 if (defaultValue != NULL)
4311 xmlFree(defaultValue);
4312 break;
4313 }
4314 SKIP_BLANKS;
4315
4316 type = xmlParseAttributeType(ctxt, &tree);
4317 if (type <= 0) {
4318 if (attrName != NULL)
4319 xmlFree(attrName);
4320 if (defaultValue != NULL)
4321 xmlFree(defaultValue);
4322 break;
4323 }
4324
4325 GROW;
4326 if (!IS_BLANK(CUR)) {
4327 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4329 ctxt->sax->error(ctxt->userData,
4330 "Space required after the attribute type\n");
4331 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004332 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004333 if (attrName != NULL)
4334 xmlFree(attrName);
4335 if (defaultValue != NULL)
4336 xmlFree(defaultValue);
4337 if (tree != NULL)
4338 xmlFreeEnumeration(tree);
4339 break;
4340 }
4341 SKIP_BLANKS;
4342
4343 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4344 if (def <= 0) {
4345 if (attrName != NULL)
4346 xmlFree(attrName);
4347 if (defaultValue != NULL)
4348 xmlFree(defaultValue);
4349 if (tree != NULL)
4350 xmlFreeEnumeration(tree);
4351 break;
4352 }
4353
4354 GROW;
4355 if (RAW != '>') {
4356 if (!IS_BLANK(CUR)) {
4357 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4359 ctxt->sax->error(ctxt->userData,
4360 "Space required after the attribute default value\n");
4361 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004362 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004363 if (attrName != NULL)
4364 xmlFree(attrName);
4365 if (defaultValue != NULL)
4366 xmlFree(defaultValue);
4367 if (tree != NULL)
4368 xmlFreeEnumeration(tree);
4369 break;
4370 }
4371 SKIP_BLANKS;
4372 }
4373 if (check == CUR_PTR) {
4374 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4376 ctxt->sax->error(ctxt->userData,
4377 "xmlParseAttributeListDecl: detected internal error\n");
4378 if (attrName != NULL)
4379 xmlFree(attrName);
4380 if (defaultValue != NULL)
4381 xmlFree(defaultValue);
4382 if (tree != NULL)
4383 xmlFreeEnumeration(tree);
4384 break;
4385 }
4386 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4387 (ctxt->sax->attributeDecl != NULL))
4388 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4389 type, def, defaultValue, tree);
4390 if (attrName != NULL)
4391 xmlFree(attrName);
4392 if (defaultValue != NULL)
4393 xmlFree(defaultValue);
4394 GROW;
4395 }
4396 if (RAW == '>') {
4397 if (input != ctxt->input) {
4398 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4399 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4400 ctxt->sax->error(ctxt->userData,
4401"Attribute list declaration doesn't start and stop in the same entity\n");
4402 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004403 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004404 }
4405 NEXT;
4406 }
4407
4408 xmlFree(elemName);
4409 }
4410}
4411
4412/**
4413 * xmlParseElementMixedContentDecl:
4414 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004415 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004416 *
4417 * parse the declaration for a Mixed Element content
4418 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4419 *
4420 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4421 * '(' S? '#PCDATA' S? ')'
4422 *
4423 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4424 *
4425 * [ VC: No Duplicate Types ]
4426 * The same name must not appear more than once in a single
4427 * mixed-content declaration.
4428 *
4429 * returns: the list of the xmlElementContentPtr describing the element choices
4430 */
4431xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004432xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004433 xmlElementContentPtr ret = NULL, cur = NULL, n;
4434 xmlChar *elem = NULL;
4435
4436 GROW;
4437 if ((RAW == '#') && (NXT(1) == 'P') &&
4438 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4439 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4440 (NXT(6) == 'A')) {
4441 SKIP(7);
4442 SKIP_BLANKS;
4443 SHRINK;
4444 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004445 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4446 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4447 if (ctxt->vctxt.error != NULL)
4448 ctxt->vctxt.error(ctxt->vctxt.userData,
4449"Element content declaration doesn't start and stop in the same entity\n");
4450 ctxt->valid = 0;
4451 }
Owen Taylor3473f882001-02-23 17:55:21 +00004452 NEXT;
4453 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4454 if (RAW == '*') {
4455 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4456 NEXT;
4457 }
4458 return(ret);
4459 }
4460 if ((RAW == '(') || (RAW == '|')) {
4461 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4462 if (ret == NULL) return(NULL);
4463 }
4464 while (RAW == '|') {
4465 NEXT;
4466 if (elem == NULL) {
4467 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4468 if (ret == NULL) return(NULL);
4469 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004470 if (cur != NULL)
4471 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004472 cur = ret;
4473 } else {
4474 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4475 if (n == NULL) return(NULL);
4476 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004477 if (n->c1 != NULL)
4478 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004479 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004480 if (n != NULL)
4481 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004482 cur = n;
4483 xmlFree(elem);
4484 }
4485 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004486 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004487 if (elem == NULL) {
4488 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4490 ctxt->sax->error(ctxt->userData,
4491 "xmlParseElementMixedContentDecl : Name expected\n");
4492 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004493 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004494 xmlFreeElementContent(cur);
4495 return(NULL);
4496 }
4497 SKIP_BLANKS;
4498 GROW;
4499 }
4500 if ((RAW == ')') && (NXT(1) == '*')) {
4501 if (elem != NULL) {
4502 cur->c2 = xmlNewElementContent(elem,
4503 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004504 if (cur->c2 != NULL)
4505 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004506 xmlFree(elem);
4507 }
4508 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004509 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4510 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4511 if (ctxt->vctxt.error != NULL)
4512 ctxt->vctxt.error(ctxt->vctxt.userData,
4513"Element content declaration doesn't start and stop in the same entity\n");
4514 ctxt->valid = 0;
4515 }
Owen Taylor3473f882001-02-23 17:55:21 +00004516 SKIP(2);
4517 } else {
4518 if (elem != NULL) xmlFree(elem);
4519 xmlFreeElementContent(ret);
4520 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4522 ctxt->sax->error(ctxt->userData,
4523 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4524 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004525 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004526 return(NULL);
4527 }
4528
4529 } else {
4530 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4532 ctxt->sax->error(ctxt->userData,
4533 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4534 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004535 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004536 }
4537 return(ret);
4538}
4539
4540/**
4541 * xmlParseElementChildrenContentDecl:
4542 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004543 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004544 *
4545 * parse the declaration for a Mixed Element content
4546 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4547 *
4548 *
4549 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4550 *
4551 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4552 *
4553 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4554 *
4555 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4556 *
4557 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4558 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004559 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004560 * opening or closing parentheses in a choice, seq, or Mixed
4561 * construct is contained in the replacement text for a parameter
4562 * entity, both must be contained in the same replacement text. For
4563 * interoperability, if a parameter-entity reference appears in a
4564 * choice, seq, or Mixed construct, its replacement text should not
4565 * be empty, and neither the first nor last non-blank character of
4566 * the replacement text should be a connector (| or ,).
4567 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004568 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004569 * hierarchy.
4570 */
4571xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004572xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004573(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004574 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4575 xmlChar *elem;
4576 xmlChar type = 0;
4577
4578 SKIP_BLANKS;
4579 GROW;
4580 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004581 xmlParserInputPtr input = ctxt->input;
4582
Owen Taylor3473f882001-02-23 17:55:21 +00004583 /* Recurse on first child */
4584 NEXT;
4585 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004586 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004587 SKIP_BLANKS;
4588 GROW;
4589 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004590 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004591 if (elem == NULL) {
4592 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4594 ctxt->sax->error(ctxt->userData,
4595 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4596 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004597 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004598 return(NULL);
4599 }
4600 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004601 if (cur == NULL) {
4602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4603 ctxt->sax->error(ctxt->userData,
4604 "xmlParseElementChildrenContentDecl : out of memory\n");
4605 ctxt->errNo = XML_ERR_NO_MEMORY;
4606 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4607 xmlFree(elem);
4608 return(NULL);
4609 }
Owen Taylor3473f882001-02-23 17:55:21 +00004610 GROW;
4611 if (RAW == '?') {
4612 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4613 NEXT;
4614 } else if (RAW == '*') {
4615 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4616 NEXT;
4617 } else if (RAW == '+') {
4618 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4619 NEXT;
4620 } else {
4621 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4622 }
4623 xmlFree(elem);
4624 GROW;
4625 }
4626 SKIP_BLANKS;
4627 SHRINK;
4628 while (RAW != ')') {
4629 /*
4630 * Each loop we parse one separator and one element.
4631 */
4632 if (RAW == ',') {
4633 if (type == 0) type = CUR;
4634
4635 /*
4636 * Detect "Name | Name , Name" error
4637 */
4638 else if (type != CUR) {
4639 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4641 ctxt->sax->error(ctxt->userData,
4642 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4643 type);
4644 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004645 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004646 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004647 xmlFreeElementContent(last);
4648 if (ret != NULL)
4649 xmlFreeElementContent(ret);
4650 return(NULL);
4651 }
4652 NEXT;
4653
4654 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4655 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004656 if ((last != NULL) && (last != ret))
4657 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004658 xmlFreeElementContent(ret);
4659 return(NULL);
4660 }
4661 if (last == NULL) {
4662 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004663 if (ret != NULL)
4664 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004665 ret = cur = op;
4666 } else {
4667 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004668 if (op != NULL)
4669 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004670 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004671 if (last != NULL)
4672 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004673 cur =op;
4674 last = NULL;
4675 }
4676 } else if (RAW == '|') {
4677 if (type == 0) type = CUR;
4678
4679 /*
4680 * Detect "Name , Name | Name" error
4681 */
4682 else if (type != CUR) {
4683 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4684 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4685 ctxt->sax->error(ctxt->userData,
4686 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4687 type);
4688 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004689 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004690 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004691 xmlFreeElementContent(last);
4692 if (ret != NULL)
4693 xmlFreeElementContent(ret);
4694 return(NULL);
4695 }
4696 NEXT;
4697
4698 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4699 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004700 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004701 xmlFreeElementContent(last);
4702 if (ret != NULL)
4703 xmlFreeElementContent(ret);
4704 return(NULL);
4705 }
4706 if (last == NULL) {
4707 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004708 if (ret != NULL)
4709 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004710 ret = cur = op;
4711 } else {
4712 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004713 if (op != NULL)
4714 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004715 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004716 if (last != NULL)
4717 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004718 cur =op;
4719 last = NULL;
4720 }
4721 } else {
4722 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4724 ctxt->sax->error(ctxt->userData,
4725 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4726 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004727 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004728 if (ret != NULL)
4729 xmlFreeElementContent(ret);
4730 return(NULL);
4731 }
4732 GROW;
4733 SKIP_BLANKS;
4734 GROW;
4735 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004736 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004737 /* Recurse on second child */
4738 NEXT;
4739 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004740 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004741 SKIP_BLANKS;
4742 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004743 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004744 if (elem == NULL) {
4745 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4747 ctxt->sax->error(ctxt->userData,
4748 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4749 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004750 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004751 if (ret != NULL)
4752 xmlFreeElementContent(ret);
4753 return(NULL);
4754 }
4755 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4756 xmlFree(elem);
4757 if (RAW == '?') {
4758 last->ocur = XML_ELEMENT_CONTENT_OPT;
4759 NEXT;
4760 } else if (RAW == '*') {
4761 last->ocur = XML_ELEMENT_CONTENT_MULT;
4762 NEXT;
4763 } else if (RAW == '+') {
4764 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4765 NEXT;
4766 } else {
4767 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4768 }
4769 }
4770 SKIP_BLANKS;
4771 GROW;
4772 }
4773 if ((cur != NULL) && (last != NULL)) {
4774 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004775 if (last != NULL)
4776 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004777 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004778 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4779 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4780 if (ctxt->vctxt.error != NULL)
4781 ctxt->vctxt.error(ctxt->vctxt.userData,
4782"Element content declaration doesn't start and stop in the same entity\n");
4783 ctxt->valid = 0;
4784 }
Owen Taylor3473f882001-02-23 17:55:21 +00004785 NEXT;
4786 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004787 if (ret != NULL)
4788 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004789 NEXT;
4790 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004791 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004792 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004793 cur = ret;
4794 /*
4795 * Some normalization:
4796 * (a | b* | c?)* == (a | b | c)*
4797 */
4798 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4799 if ((cur->c1 != NULL) &&
4800 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4801 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4802 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4803 if ((cur->c2 != NULL) &&
4804 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4805 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4806 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4807 cur = cur->c2;
4808 }
4809 }
Owen Taylor3473f882001-02-23 17:55:21 +00004810 NEXT;
4811 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004812 if (ret != NULL) {
4813 int found = 0;
4814
Daniel Veillarde470df72001-04-18 21:41:07 +00004815 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004816 /*
4817 * Some normalization:
4818 * (a | b*)+ == (a | b)*
4819 * (a | b?)+ == (a | b)*
4820 */
4821 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4822 if ((cur->c1 != NULL) &&
4823 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4824 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4825 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4826 found = 1;
4827 }
4828 if ((cur->c2 != NULL) &&
4829 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4830 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4831 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4832 found = 1;
4833 }
4834 cur = cur->c2;
4835 }
4836 if (found)
4837 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4838 }
Owen Taylor3473f882001-02-23 17:55:21 +00004839 NEXT;
4840 }
4841 return(ret);
4842}
4843
4844/**
4845 * xmlParseElementContentDecl:
4846 * @ctxt: an XML parser context
4847 * @name: the name of the element being defined.
4848 * @result: the Element Content pointer will be stored here if any
4849 *
4850 * parse the declaration for an Element content either Mixed or Children,
4851 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4852 *
4853 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4854 *
4855 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4856 */
4857
4858int
4859xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4860 xmlElementContentPtr *result) {
4861
4862 xmlElementContentPtr tree = NULL;
4863 xmlParserInputPtr input = ctxt->input;
4864 int res;
4865
4866 *result = NULL;
4867
4868 if (RAW != '(') {
4869 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4871 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004872 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004873 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004874 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004875 return(-1);
4876 }
4877 NEXT;
4878 GROW;
4879 SKIP_BLANKS;
4880 if ((RAW == '#') && (NXT(1) == 'P') &&
4881 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4882 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4883 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004884 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004885 res = XML_ELEMENT_TYPE_MIXED;
4886 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004887 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004888 res = XML_ELEMENT_TYPE_ELEMENT;
4889 }
Owen Taylor3473f882001-02-23 17:55:21 +00004890 SKIP_BLANKS;
4891 *result = tree;
4892 return(res);
4893}
4894
4895/**
4896 * xmlParseElementDecl:
4897 * @ctxt: an XML parser context
4898 *
4899 * parse an Element declaration.
4900 *
4901 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4902 *
4903 * [ VC: Unique Element Type Declaration ]
4904 * No element type may be declared more than once
4905 *
4906 * Returns the type of the element, or -1 in case of error
4907 */
4908int
4909xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4910 xmlChar *name;
4911 int ret = -1;
4912 xmlElementContentPtr content = NULL;
4913
4914 GROW;
4915 if ((RAW == '<') && (NXT(1) == '!') &&
4916 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4917 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4918 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4919 (NXT(8) == 'T')) {
4920 xmlParserInputPtr input = ctxt->input;
4921
4922 SKIP(9);
4923 if (!IS_BLANK(CUR)) {
4924 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4926 ctxt->sax->error(ctxt->userData,
4927 "Space required after 'ELEMENT'\n");
4928 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004929 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004930 }
4931 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004932 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004933 if (name == NULL) {
4934 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4936 ctxt->sax->error(ctxt->userData,
4937 "xmlParseElementDecl: no name for Element\n");
4938 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004939 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004940 return(-1);
4941 }
4942 while ((RAW == 0) && (ctxt->inputNr > 1))
4943 xmlPopInput(ctxt);
4944 if (!IS_BLANK(CUR)) {
4945 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4947 ctxt->sax->error(ctxt->userData,
4948 "Space required after the element name\n");
4949 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004950 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004951 }
4952 SKIP_BLANKS;
4953 if ((RAW == 'E') && (NXT(1) == 'M') &&
4954 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4955 (NXT(4) == 'Y')) {
4956 SKIP(5);
4957 /*
4958 * Element must always be empty.
4959 */
4960 ret = XML_ELEMENT_TYPE_EMPTY;
4961 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4962 (NXT(2) == 'Y')) {
4963 SKIP(3);
4964 /*
4965 * Element is a generic container.
4966 */
4967 ret = XML_ELEMENT_TYPE_ANY;
4968 } else if (RAW == '(') {
4969 ret = xmlParseElementContentDecl(ctxt, name, &content);
4970 } else {
4971 /*
4972 * [ WFC: PEs in Internal Subset ] error handling.
4973 */
4974 if ((RAW == '%') && (ctxt->external == 0) &&
4975 (ctxt->inputNr == 1)) {
4976 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4978 ctxt->sax->error(ctxt->userData,
4979 "PEReference: forbidden within markup decl in internal subset\n");
4980 } else {
4981 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4983 ctxt->sax->error(ctxt->userData,
4984 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4985 }
4986 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004987 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004988 if (name != NULL) xmlFree(name);
4989 return(-1);
4990 }
4991
4992 SKIP_BLANKS;
4993 /*
4994 * Pop-up of finished entities.
4995 */
4996 while ((RAW == 0) && (ctxt->inputNr > 1))
4997 xmlPopInput(ctxt);
4998 SKIP_BLANKS;
4999
5000 if (RAW != '>') {
5001 ctxt->errNo = XML_ERR_GT_REQUIRED;
5002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5003 ctxt->sax->error(ctxt->userData,
5004 "xmlParseElementDecl: expected '>' at the end\n");
5005 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005006 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005007 } else {
5008 if (input != ctxt->input) {
5009 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5011 ctxt->sax->error(ctxt->userData,
5012"Element declaration doesn't start and stop in the same entity\n");
5013 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005014 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005015 }
5016
5017 NEXT;
5018 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5019 (ctxt->sax->elementDecl != NULL))
5020 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5021 content);
5022 }
5023 if (content != NULL) {
5024 xmlFreeElementContent(content);
5025 }
5026 if (name != NULL) {
5027 xmlFree(name);
5028 }
5029 }
5030 return(ret);
5031}
5032
5033/**
Owen Taylor3473f882001-02-23 17:55:21 +00005034 * xmlParseConditionalSections
5035 * @ctxt: an XML parser context
5036 *
5037 * [61] conditionalSect ::= includeSect | ignoreSect
5038 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5039 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5040 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5041 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5042 */
5043
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005044static void
Owen Taylor3473f882001-02-23 17:55:21 +00005045xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5046 SKIP(3);
5047 SKIP_BLANKS;
5048 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5049 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5050 (NXT(6) == 'E')) {
5051 SKIP(7);
5052 SKIP_BLANKS;
5053 if (RAW != '[') {
5054 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5056 ctxt->sax->error(ctxt->userData,
5057 "XML conditional section '[' expected\n");
5058 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005059 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005060 } else {
5061 NEXT;
5062 }
5063 if (xmlParserDebugEntities) {
5064 if ((ctxt->input != NULL) && (ctxt->input->filename))
5065 xmlGenericError(xmlGenericErrorContext,
5066 "%s(%d): ", ctxt->input->filename,
5067 ctxt->input->line);
5068 xmlGenericError(xmlGenericErrorContext,
5069 "Entering INCLUDE Conditional Section\n");
5070 }
5071
5072 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5073 (NXT(2) != '>'))) {
5074 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005075 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005076
5077 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5078 xmlParseConditionalSections(ctxt);
5079 } else if (IS_BLANK(CUR)) {
5080 NEXT;
5081 } else if (RAW == '%') {
5082 xmlParsePEReference(ctxt);
5083 } else
5084 xmlParseMarkupDecl(ctxt);
5085
5086 /*
5087 * Pop-up of finished entities.
5088 */
5089 while ((RAW == 0) && (ctxt->inputNr > 1))
5090 xmlPopInput(ctxt);
5091
Daniel Veillardfdc91562002-07-01 21:52:03 +00005092 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005093 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5095 ctxt->sax->error(ctxt->userData,
5096 "Content error in the external subset\n");
5097 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005098 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005099 break;
5100 }
5101 }
5102 if (xmlParserDebugEntities) {
5103 if ((ctxt->input != NULL) && (ctxt->input->filename))
5104 xmlGenericError(xmlGenericErrorContext,
5105 "%s(%d): ", ctxt->input->filename,
5106 ctxt->input->line);
5107 xmlGenericError(xmlGenericErrorContext,
5108 "Leaving INCLUDE Conditional Section\n");
5109 }
5110
5111 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5112 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5113 int state;
5114 int instate;
5115 int depth = 0;
5116
5117 SKIP(6);
5118 SKIP_BLANKS;
5119 if (RAW != '[') {
5120 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5122 ctxt->sax->error(ctxt->userData,
5123 "XML conditional section '[' expected\n");
5124 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005125 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005126 } else {
5127 NEXT;
5128 }
5129 if (xmlParserDebugEntities) {
5130 if ((ctxt->input != NULL) && (ctxt->input->filename))
5131 xmlGenericError(xmlGenericErrorContext,
5132 "%s(%d): ", ctxt->input->filename,
5133 ctxt->input->line);
5134 xmlGenericError(xmlGenericErrorContext,
5135 "Entering IGNORE Conditional Section\n");
5136 }
5137
5138 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005139 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005140 * But disable SAX event generating DTD building in the meantime
5141 */
5142 state = ctxt->disableSAX;
5143 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005144 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005145 ctxt->instate = XML_PARSER_IGNORE;
5146
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005147 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005148 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5149 depth++;
5150 SKIP(3);
5151 continue;
5152 }
5153 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5154 if (--depth >= 0) SKIP(3);
5155 continue;
5156 }
5157 NEXT;
5158 continue;
5159 }
5160
5161 ctxt->disableSAX = state;
5162 ctxt->instate = instate;
5163
5164 if (xmlParserDebugEntities) {
5165 if ((ctxt->input != NULL) && (ctxt->input->filename))
5166 xmlGenericError(xmlGenericErrorContext,
5167 "%s(%d): ", ctxt->input->filename,
5168 ctxt->input->line);
5169 xmlGenericError(xmlGenericErrorContext,
5170 "Leaving IGNORE Conditional Section\n");
5171 }
5172
5173 } else {
5174 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5176 ctxt->sax->error(ctxt->userData,
5177 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5178 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005179 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005180 }
5181
5182 if (RAW == 0)
5183 SHRINK;
5184
5185 if (RAW == 0) {
5186 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5188 ctxt->sax->error(ctxt->userData,
5189 "XML conditional section not closed\n");
5190 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005191 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005192 } else {
5193 SKIP(3);
5194 }
5195}
5196
5197/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005198 * xmlParseMarkupDecl:
5199 * @ctxt: an XML parser context
5200 *
5201 * parse Markup declarations
5202 *
5203 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5204 * NotationDecl | PI | Comment
5205 *
5206 * [ VC: Proper Declaration/PE Nesting ]
5207 * Parameter-entity replacement text must be properly nested with
5208 * markup declarations. That is to say, if either the first character
5209 * or the last character of a markup declaration (markupdecl above) is
5210 * contained in the replacement text for a parameter-entity reference,
5211 * both must be contained in the same replacement text.
5212 *
5213 * [ WFC: PEs in Internal Subset ]
5214 * In the internal DTD subset, parameter-entity references can occur
5215 * only where markup declarations can occur, not within markup declarations.
5216 * (This does not apply to references that occur in external parameter
5217 * entities or to the external subset.)
5218 */
5219void
5220xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5221 GROW;
5222 xmlParseElementDecl(ctxt);
5223 xmlParseAttributeListDecl(ctxt);
5224 xmlParseEntityDecl(ctxt);
5225 xmlParseNotationDecl(ctxt);
5226 xmlParsePI(ctxt);
5227 xmlParseComment(ctxt);
5228 /*
5229 * This is only for internal subset. On external entities,
5230 * the replacement is done before parsing stage
5231 */
5232 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5233 xmlParsePEReference(ctxt);
5234
5235 /*
5236 * Conditional sections are allowed from entities included
5237 * by PE References in the internal subset.
5238 */
5239 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5240 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5241 xmlParseConditionalSections(ctxt);
5242 }
5243 }
5244
5245 ctxt->instate = XML_PARSER_DTD;
5246}
5247
5248/**
5249 * xmlParseTextDecl:
5250 * @ctxt: an XML parser context
5251 *
5252 * parse an XML declaration header for external entities
5253 *
5254 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5255 *
5256 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5257 */
5258
5259void
5260xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5261 xmlChar *version;
5262
5263 /*
5264 * We know that '<?xml' is here.
5265 */
5266 if ((RAW == '<') && (NXT(1) == '?') &&
5267 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5268 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5269 SKIP(5);
5270 } else {
5271 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5273 ctxt->sax->error(ctxt->userData,
5274 "Text declaration '<?xml' required\n");
5275 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005276 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005277
5278 return;
5279 }
5280
5281 if (!IS_BLANK(CUR)) {
5282 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5284 ctxt->sax->error(ctxt->userData,
5285 "Space needed after '<?xml'\n");
5286 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005287 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005288 }
5289 SKIP_BLANKS;
5290
5291 /*
5292 * We may have the VersionInfo here.
5293 */
5294 version = xmlParseVersionInfo(ctxt);
5295 if (version == NULL)
5296 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005297 else {
5298 if (!IS_BLANK(CUR)) {
5299 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5301 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5302 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005303 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005304 }
5305 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005306 ctxt->input->version = version;
5307
5308 /*
5309 * We must have the encoding declaration
5310 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005311 xmlParseEncodingDecl(ctxt);
5312 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5313 /*
5314 * The XML REC instructs us to stop parsing right here
5315 */
5316 return;
5317 }
5318
5319 SKIP_BLANKS;
5320 if ((RAW == '?') && (NXT(1) == '>')) {
5321 SKIP(2);
5322 } else if (RAW == '>') {
5323 /* Deprecated old WD ... */
5324 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5326 ctxt->sax->error(ctxt->userData,
5327 "XML declaration must end-up with '?>'\n");
5328 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005329 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005330 NEXT;
5331 } else {
5332 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5334 ctxt->sax->error(ctxt->userData,
5335 "parsing XML declaration: '?>' expected\n");
5336 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005337 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005338 MOVETO_ENDTAG(CUR_PTR);
5339 NEXT;
5340 }
5341}
5342
5343/**
Owen Taylor3473f882001-02-23 17:55:21 +00005344 * xmlParseExternalSubset:
5345 * @ctxt: an XML parser context
5346 * @ExternalID: the external identifier
5347 * @SystemID: the system identifier (or URL)
5348 *
5349 * parse Markup declarations from an external subset
5350 *
5351 * [30] extSubset ::= textDecl? extSubsetDecl
5352 *
5353 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5354 */
5355void
5356xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5357 const xmlChar *SystemID) {
5358 GROW;
5359 if ((RAW == '<') && (NXT(1) == '?') &&
5360 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5361 (NXT(4) == 'l')) {
5362 xmlParseTextDecl(ctxt);
5363 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5364 /*
5365 * The XML REC instructs us to stop parsing right here
5366 */
5367 ctxt->instate = XML_PARSER_EOF;
5368 return;
5369 }
5370 }
5371 if (ctxt->myDoc == NULL) {
5372 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5373 }
5374 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5375 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5376
5377 ctxt->instate = XML_PARSER_DTD;
5378 ctxt->external = 1;
5379 while (((RAW == '<') && (NXT(1) == '?')) ||
5380 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005381 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005382 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005383 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005384
5385 GROW;
5386 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5387 xmlParseConditionalSections(ctxt);
5388 } else if (IS_BLANK(CUR)) {
5389 NEXT;
5390 } else if (RAW == '%') {
5391 xmlParsePEReference(ctxt);
5392 } else
5393 xmlParseMarkupDecl(ctxt);
5394
5395 /*
5396 * Pop-up of finished entities.
5397 */
5398 while ((RAW == 0) && (ctxt->inputNr > 1))
5399 xmlPopInput(ctxt);
5400
Daniel Veillardfdc91562002-07-01 21:52:03 +00005401 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005402 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5404 ctxt->sax->error(ctxt->userData,
5405 "Content error in the external subset\n");
5406 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005407 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005408 break;
5409 }
5410 }
5411
5412 if (RAW != 0) {
5413 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5415 ctxt->sax->error(ctxt->userData,
5416 "Extra content at the end of the document\n");
5417 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005418 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005419 }
5420
5421}
5422
5423/**
5424 * xmlParseReference:
5425 * @ctxt: an XML parser context
5426 *
5427 * parse and handle entity references in content, depending on the SAX
5428 * interface, this may end-up in a call to character() if this is a
5429 * CharRef, a predefined entity, if there is no reference() callback.
5430 * or if the parser was asked to switch to that mode.
5431 *
5432 * [67] Reference ::= EntityRef | CharRef
5433 */
5434void
5435xmlParseReference(xmlParserCtxtPtr ctxt) {
5436 xmlEntityPtr ent;
5437 xmlChar *val;
5438 if (RAW != '&') return;
5439
5440 if (NXT(1) == '#') {
5441 int i = 0;
5442 xmlChar out[10];
5443 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005444 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005445
5446 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5447 /*
5448 * So we are using non-UTF-8 buffers
5449 * Check that the char fit on 8bits, if not
5450 * generate a CharRef.
5451 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005452 if (value <= 0xFF) {
5453 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005454 out[1] = 0;
5455 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5456 (!ctxt->disableSAX))
5457 ctxt->sax->characters(ctxt->userData, out, 1);
5458 } else {
5459 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005460 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005461 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005462 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005463 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5464 (!ctxt->disableSAX))
5465 ctxt->sax->reference(ctxt->userData, out);
5466 }
5467 } else {
5468 /*
5469 * Just encode the value in UTF-8
5470 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005471 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005472 out[i] = 0;
5473 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5474 (!ctxt->disableSAX))
5475 ctxt->sax->characters(ctxt->userData, out, i);
5476 }
5477 } else {
5478 ent = xmlParseEntityRef(ctxt);
5479 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005480 if (!ctxt->wellFormed)
5481 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005482 if ((ent->name != NULL) &&
5483 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5484 xmlNodePtr list = NULL;
5485 int ret;
5486
5487
5488 /*
5489 * The first reference to the entity trigger a parsing phase
5490 * where the ent->children is filled with the result from
5491 * the parsing.
5492 */
5493 if (ent->children == NULL) {
5494 xmlChar *value;
5495 value = ent->content;
5496
5497 /*
5498 * Check that this entity is well formed
5499 */
5500 if ((value != NULL) &&
5501 (value[1] == 0) && (value[0] == '<') &&
5502 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5503 /*
5504 * DONE: get definite answer on this !!!
5505 * Lots of entity decls are used to declare a single
5506 * char
5507 * <!ENTITY lt "<">
5508 * Which seems to be valid since
5509 * 2.4: The ampersand character (&) and the left angle
5510 * bracket (<) may appear in their literal form only
5511 * when used ... They are also legal within the literal
5512 * entity value of an internal entity declaration;i
5513 * see "4.3.2 Well-Formed Parsed Entities".
5514 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5515 * Looking at the OASIS test suite and James Clark
5516 * tests, this is broken. However the XML REC uses
5517 * it. Is the XML REC not well-formed ????
5518 * This is a hack to avoid this problem
5519 *
5520 * ANSWER: since lt gt amp .. are already defined,
5521 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005522 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005523 * is lousy but acceptable.
5524 */
5525 list = xmlNewDocText(ctxt->myDoc, value);
5526 if (list != NULL) {
5527 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5528 (ent->children == NULL)) {
5529 ent->children = list;
5530 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005531 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005532 list->parent = (xmlNodePtr) ent;
5533 } else {
5534 xmlFreeNodeList(list);
5535 }
5536 } else if (list != NULL) {
5537 xmlFreeNodeList(list);
5538 }
5539 } else {
5540 /*
5541 * 4.3.2: An internal general parsed entity is well-formed
5542 * if its replacement text matches the production labeled
5543 * content.
5544 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005545
5546 void *user_data;
5547 /*
5548 * This is a bit hackish but this seems the best
5549 * way to make sure both SAX and DOM entity support
5550 * behaves okay.
5551 */
5552 if (ctxt->userData == ctxt)
5553 user_data = NULL;
5554 else
5555 user_data = ctxt->userData;
5556
Owen Taylor3473f882001-02-23 17:55:21 +00005557 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5558 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005559 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5560 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005561 ctxt->depth--;
5562 } else if (ent->etype ==
5563 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5564 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005565 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005566 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005567 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005568 ctxt->depth--;
5569 } else {
5570 ret = -1;
5571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5572 ctxt->sax->error(ctxt->userData,
5573 "Internal: invalid entity type\n");
5574 }
5575 if (ret == XML_ERR_ENTITY_LOOP) {
5576 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5578 ctxt->sax->error(ctxt->userData,
5579 "Detected entity reference loop\n");
5580 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005581 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005582 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005583 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005584 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5585 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005586 (ent->children == NULL)) {
5587 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005588 if (ctxt->replaceEntities) {
5589 /*
5590 * Prune it directly in the generated document
5591 * except for single text nodes.
5592 */
5593 if ((list->type == XML_TEXT_NODE) &&
5594 (list->next == NULL)) {
5595 list->parent = (xmlNodePtr) ent;
5596 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005597 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005598 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005599 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005600 while (list != NULL) {
5601 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005602 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005603 if (list->next == NULL)
5604 ent->last = list;
5605 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005606 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005607 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005608 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5609 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005610 }
5611 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005612 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005613 while (list != NULL) {
5614 list->parent = (xmlNodePtr) ent;
5615 if (list->next == NULL)
5616 ent->last = list;
5617 list = list->next;
5618 }
Owen Taylor3473f882001-02-23 17:55:21 +00005619 }
5620 } else {
5621 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005622 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005623 }
5624 } else if (ret > 0) {
5625 ctxt->errNo = ret;
5626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5627 ctxt->sax->error(ctxt->userData,
5628 "Entity value required\n");
5629 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005630 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005631 } else if (list != NULL) {
5632 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005633 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005634 }
5635 }
5636 }
5637 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5638 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5639 /*
5640 * Create a node.
5641 */
5642 ctxt->sax->reference(ctxt->userData, ent->name);
5643 return;
5644 } else if (ctxt->replaceEntities) {
5645 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5646 /*
5647 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005648 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005649 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005650 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005651 if ((list == NULL) && (ent->owner == 0)) {
5652 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005653 cur = ent->children;
5654 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005655 nw = xmlCopyNode(cur, 1);
5656 if (nw != NULL) {
5657 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005658 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005659 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005660 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005661 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005662 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005663 if (cur == ent->last)
5664 break;
5665 cur = cur->next;
5666 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005667 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005668 xmlAddEntityReference(ent, firstChild, nw);
5669 } else if (list == NULL) {
5670 xmlNodePtr nw = NULL, cur, next, last,
5671 firstChild = NULL;
5672 /*
5673 * Copy the entity child list and make it the new
5674 * entity child list. The goal is to make sure any
5675 * ID or REF referenced will be the one from the
5676 * document content and not the entity copy.
5677 */
5678 cur = ent->children;
5679 ent->children = NULL;
5680 last = ent->last;
5681 ent->last = NULL;
5682 while (cur != NULL) {
5683 next = cur->next;
5684 cur->next = NULL;
5685 cur->parent = NULL;
5686 nw = xmlCopyNode(cur, 1);
5687 if (nw != NULL) {
5688 nw->_private = cur->_private;
5689 if (firstChild == NULL){
5690 firstChild = cur;
5691 }
5692 xmlAddChild((xmlNodePtr) ent, nw);
5693 xmlAddChild(ctxt->node, cur);
5694 }
5695 if (cur == last)
5696 break;
5697 cur = next;
5698 }
5699 ent->owner = 1;
5700 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5701 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005702 } else {
5703 /*
5704 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005705 * node with a possible previous text one which
5706 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005707 */
5708 if (ent->children->type == XML_TEXT_NODE)
5709 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5710 if ((ent->last != ent->children) &&
5711 (ent->last->type == XML_TEXT_NODE))
5712 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5713 xmlAddChildList(ctxt->node, ent->children);
5714 }
5715
Owen Taylor3473f882001-02-23 17:55:21 +00005716 /*
5717 * This is to avoid a nasty side effect, see
5718 * characters() in SAX.c
5719 */
5720 ctxt->nodemem = 0;
5721 ctxt->nodelen = 0;
5722 return;
5723 } else {
5724 /*
5725 * Probably running in SAX mode
5726 */
5727 xmlParserInputPtr input;
5728
5729 input = xmlNewEntityInputStream(ctxt, ent);
5730 xmlPushInput(ctxt, input);
5731 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5732 (RAW == '<') && (NXT(1) == '?') &&
5733 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5734 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5735 xmlParseTextDecl(ctxt);
5736 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5737 /*
5738 * The XML REC instructs us to stop parsing right here
5739 */
5740 ctxt->instate = XML_PARSER_EOF;
5741 return;
5742 }
5743 if (input->standalone == 1) {
5744 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5746 ctxt->sax->error(ctxt->userData,
5747 "external parsed entities cannot be standalone\n");
5748 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005749 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005750 }
5751 }
5752 return;
5753 }
5754 }
5755 } else {
5756 val = ent->content;
5757 if (val == NULL) return;
5758 /*
5759 * inline the entity.
5760 */
5761 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5762 (!ctxt->disableSAX))
5763 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5764 }
5765 }
5766}
5767
5768/**
5769 * xmlParseEntityRef:
5770 * @ctxt: an XML parser context
5771 *
5772 * parse ENTITY references declarations
5773 *
5774 * [68] EntityRef ::= '&' Name ';'
5775 *
5776 * [ WFC: Entity Declared ]
5777 * In a document without any DTD, a document with only an internal DTD
5778 * subset which contains no parameter entity references, or a document
5779 * with "standalone='yes'", the Name given in the entity reference
5780 * must match that in an entity declaration, except that well-formed
5781 * documents need not declare any of the following entities: amp, lt,
5782 * gt, apos, quot. The declaration of a parameter entity must precede
5783 * any reference to it. Similarly, the declaration of a general entity
5784 * must precede any reference to it which appears in a default value in an
5785 * attribute-list declaration. Note that if entities are declared in the
5786 * external subset or in external parameter entities, a non-validating
5787 * processor is not obligated to read and process their declarations;
5788 * for such documents, the rule that an entity must be declared is a
5789 * well-formedness constraint only if standalone='yes'.
5790 *
5791 * [ WFC: Parsed Entity ]
5792 * An entity reference must not contain the name of an unparsed entity
5793 *
5794 * Returns the xmlEntityPtr if found, or NULL otherwise.
5795 */
5796xmlEntityPtr
5797xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5798 xmlChar *name;
5799 xmlEntityPtr ent = NULL;
5800
5801 GROW;
5802
5803 if (RAW == '&') {
5804 NEXT;
5805 name = xmlParseName(ctxt);
5806 if (name == NULL) {
5807 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5809 ctxt->sax->error(ctxt->userData,
5810 "xmlParseEntityRef: no name\n");
5811 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005812 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005813 } else {
5814 if (RAW == ';') {
5815 NEXT;
5816 /*
5817 * Ask first SAX for entity resolution, otherwise try the
5818 * predefined set.
5819 */
5820 if (ctxt->sax != NULL) {
5821 if (ctxt->sax->getEntity != NULL)
5822 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005823 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005824 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005825 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5826 (ctxt->userData==ctxt)) {
Daniel Veillard5997aca2002-03-18 18:36:20 +00005827 ent = getEntity(ctxt, name);
5828 }
Owen Taylor3473f882001-02-23 17:55:21 +00005829 }
5830 /*
5831 * [ WFC: Entity Declared ]
5832 * In a document without any DTD, a document with only an
5833 * internal DTD subset which contains no parameter entity
5834 * references, or a document with "standalone='yes'", the
5835 * Name given in the entity reference must match that in an
5836 * entity declaration, except that well-formed documents
5837 * need not declare any of the following entities: amp, lt,
5838 * gt, apos, quot.
5839 * The declaration of a parameter entity must precede any
5840 * reference to it.
5841 * Similarly, the declaration of a general entity must
5842 * precede any reference to it which appears in a default
5843 * value in an attribute-list declaration. Note that if
5844 * entities are declared in the external subset or in
5845 * external parameter entities, a non-validating processor
5846 * is not obligated to read and process their declarations;
5847 * for such documents, the rule that an entity must be
5848 * declared is a well-formedness constraint only if
5849 * standalone='yes'.
5850 */
5851 if (ent == NULL) {
5852 if ((ctxt->standalone == 1) ||
5853 ((ctxt->hasExternalSubset == 0) &&
5854 (ctxt->hasPErefs == 0))) {
5855 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5857 ctxt->sax->error(ctxt->userData,
5858 "Entity '%s' not defined\n", name);
5859 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005860 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005861 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005862 } else {
5863 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005865 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005866 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005867 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005868 }
5869 }
5870
5871 /*
5872 * [ WFC: Parsed Entity ]
5873 * An entity reference must not contain the name of an
5874 * unparsed entity
5875 */
5876 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5877 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5879 ctxt->sax->error(ctxt->userData,
5880 "Entity reference to unparsed entity %s\n", name);
5881 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005882 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005883 }
5884
5885 /*
5886 * [ WFC: No External Entity References ]
5887 * Attribute values cannot contain direct or indirect
5888 * entity references to external entities.
5889 */
5890 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5891 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5892 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5893 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5894 ctxt->sax->error(ctxt->userData,
5895 "Attribute references external entity '%s'\n", name);
5896 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005897 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005898 }
5899 /*
5900 * [ WFC: No < in Attribute Values ]
5901 * The replacement text of any entity referred to directly or
5902 * indirectly in an attribute value (other than "&lt;") must
5903 * not contain a <.
5904 */
5905 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5906 (ent != NULL) &&
5907 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5908 (ent->content != NULL) &&
5909 (xmlStrchr(ent->content, '<'))) {
5910 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5912 ctxt->sax->error(ctxt->userData,
5913 "'<' in entity '%s' is not allowed in attributes values\n", name);
5914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005916 }
5917
5918 /*
5919 * Internal check, no parameter entities here ...
5920 */
5921 else {
5922 switch (ent->etype) {
5923 case XML_INTERNAL_PARAMETER_ENTITY:
5924 case XML_EXTERNAL_PARAMETER_ENTITY:
5925 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5927 ctxt->sax->error(ctxt->userData,
5928 "Attempt to reference the parameter entity '%s'\n", name);
5929 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005930 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005931 break;
5932 default:
5933 break;
5934 }
5935 }
5936
5937 /*
5938 * [ WFC: No Recursion ]
5939 * A parsed entity must not contain a recursive reference
5940 * to itself, either directly or indirectly.
5941 * Done somewhere else
5942 */
5943
5944 } else {
5945 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5947 ctxt->sax->error(ctxt->userData,
5948 "xmlParseEntityRef: expecting ';'\n");
5949 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005950 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005951 }
5952 xmlFree(name);
5953 }
5954 }
5955 return(ent);
5956}
5957
5958/**
5959 * xmlParseStringEntityRef:
5960 * @ctxt: an XML parser context
5961 * @str: a pointer to an index in the string
5962 *
5963 * parse ENTITY references declarations, but this version parses it from
5964 * a string value.
5965 *
5966 * [68] EntityRef ::= '&' Name ';'
5967 *
5968 * [ WFC: Entity Declared ]
5969 * In a document without any DTD, a document with only an internal DTD
5970 * subset which contains no parameter entity references, or a document
5971 * with "standalone='yes'", the Name given in the entity reference
5972 * must match that in an entity declaration, except that well-formed
5973 * documents need not declare any of the following entities: amp, lt,
5974 * gt, apos, quot. The declaration of a parameter entity must precede
5975 * any reference to it. Similarly, the declaration of a general entity
5976 * must precede any reference to it which appears in a default value in an
5977 * attribute-list declaration. Note that if entities are declared in the
5978 * external subset or in external parameter entities, a non-validating
5979 * processor is not obligated to read and process their declarations;
5980 * for such documents, the rule that an entity must be declared is a
5981 * well-formedness constraint only if standalone='yes'.
5982 *
5983 * [ WFC: Parsed Entity ]
5984 * An entity reference must not contain the name of an unparsed entity
5985 *
5986 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5987 * is updated to the current location in the string.
5988 */
5989xmlEntityPtr
5990xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5991 xmlChar *name;
5992 const xmlChar *ptr;
5993 xmlChar cur;
5994 xmlEntityPtr ent = NULL;
5995
5996 if ((str == NULL) || (*str == NULL))
5997 return(NULL);
5998 ptr = *str;
5999 cur = *ptr;
6000 if (cur == '&') {
6001 ptr++;
6002 cur = *ptr;
6003 name = xmlParseStringName(ctxt, &ptr);
6004 if (name == NULL) {
6005 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6007 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006008 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006011 } else {
6012 if (*ptr == ';') {
6013 ptr++;
6014 /*
6015 * Ask first SAX for entity resolution, otherwise try the
6016 * predefined set.
6017 */
6018 if (ctxt->sax != NULL) {
6019 if (ctxt->sax->getEntity != NULL)
6020 ent = ctxt->sax->getEntity(ctxt->userData, name);
6021 if (ent == NULL)
6022 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006023 if ((ent == NULL) && (ctxt->userData==ctxt)) {
6024 ent = getEntity(ctxt, name);
6025 }
Owen Taylor3473f882001-02-23 17:55:21 +00006026 }
6027 /*
6028 * [ WFC: Entity Declared ]
6029 * In a document without any DTD, a document with only an
6030 * internal DTD subset which contains no parameter entity
6031 * references, or a document with "standalone='yes'", the
6032 * Name given in the entity reference must match that in an
6033 * entity declaration, except that well-formed documents
6034 * need not declare any of the following entities: amp, lt,
6035 * gt, apos, quot.
6036 * The declaration of a parameter entity must precede any
6037 * reference to it.
6038 * Similarly, the declaration of a general entity must
6039 * precede any reference to it which appears in a default
6040 * value in an attribute-list declaration. Note that if
6041 * entities are declared in the external subset or in
6042 * external parameter entities, a non-validating processor
6043 * is not obligated to read and process their declarations;
6044 * for such documents, the rule that an entity must be
6045 * declared is a well-formedness constraint only if
6046 * standalone='yes'.
6047 */
6048 if (ent == NULL) {
6049 if ((ctxt->standalone == 1) ||
6050 ((ctxt->hasExternalSubset == 0) &&
6051 (ctxt->hasPErefs == 0))) {
6052 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6054 ctxt->sax->error(ctxt->userData,
6055 "Entity '%s' not defined\n", name);
6056 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006058 } else {
6059 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6060 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6061 ctxt->sax->warning(ctxt->userData,
6062 "Entity '%s' not defined\n", name);
6063 }
6064 }
6065
6066 /*
6067 * [ WFC: Parsed Entity ]
6068 * An entity reference must not contain the name of an
6069 * unparsed entity
6070 */
6071 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6072 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6074 ctxt->sax->error(ctxt->userData,
6075 "Entity reference to unparsed entity %s\n", name);
6076 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006077 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006078 }
6079
6080 /*
6081 * [ WFC: No External Entity References ]
6082 * Attribute values cannot contain direct or indirect
6083 * entity references to external entities.
6084 */
6085 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6086 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6087 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6089 ctxt->sax->error(ctxt->userData,
6090 "Attribute references external entity '%s'\n", name);
6091 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006092 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006093 }
6094 /*
6095 * [ WFC: No < in Attribute Values ]
6096 * The replacement text of any entity referred to directly or
6097 * indirectly in an attribute value (other than "&lt;") must
6098 * not contain a <.
6099 */
6100 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6101 (ent != NULL) &&
6102 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6103 (ent->content != NULL) &&
6104 (xmlStrchr(ent->content, '<'))) {
6105 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6106 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6107 ctxt->sax->error(ctxt->userData,
6108 "'<' in entity '%s' is not allowed in attributes values\n", name);
6109 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006110 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006111 }
6112
6113 /*
6114 * Internal check, no parameter entities here ...
6115 */
6116 else {
6117 switch (ent->etype) {
6118 case XML_INTERNAL_PARAMETER_ENTITY:
6119 case XML_EXTERNAL_PARAMETER_ENTITY:
6120 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6122 ctxt->sax->error(ctxt->userData,
6123 "Attempt to reference the parameter entity '%s'\n", name);
6124 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006125 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006126 break;
6127 default:
6128 break;
6129 }
6130 }
6131
6132 /*
6133 * [ WFC: No Recursion ]
6134 * A parsed entity must not contain a recursive reference
6135 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006136 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006137 */
6138
6139 } else {
6140 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6142 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006143 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006144 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006145 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006146 }
6147 xmlFree(name);
6148 }
6149 }
6150 *str = ptr;
6151 return(ent);
6152}
6153
6154/**
6155 * xmlParsePEReference:
6156 * @ctxt: an XML parser context
6157 *
6158 * parse PEReference declarations
6159 * The entity content is handled directly by pushing it's content as
6160 * a new input stream.
6161 *
6162 * [69] PEReference ::= '%' Name ';'
6163 *
6164 * [ WFC: No Recursion ]
6165 * A parsed entity must not contain a recursive
6166 * reference to itself, either directly or indirectly.
6167 *
6168 * [ WFC: Entity Declared ]
6169 * In a document without any DTD, a document with only an internal DTD
6170 * subset which contains no parameter entity references, or a document
6171 * with "standalone='yes'", ... ... The declaration of a parameter
6172 * entity must precede any reference to it...
6173 *
6174 * [ VC: Entity Declared ]
6175 * In a document with an external subset or external parameter entities
6176 * with "standalone='no'", ... ... The declaration of a parameter entity
6177 * must precede any reference to it...
6178 *
6179 * [ WFC: In DTD ]
6180 * Parameter-entity references may only appear in the DTD.
6181 * NOTE: misleading but this is handled.
6182 */
6183void
6184xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6185 xmlChar *name;
6186 xmlEntityPtr entity = NULL;
6187 xmlParserInputPtr input;
6188
6189 if (RAW == '%') {
6190 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006191 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006192 if (name == NULL) {
6193 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6194 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6195 ctxt->sax->error(ctxt->userData,
6196 "xmlParsePEReference: no name\n");
6197 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006198 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006199 } else {
6200 if (RAW == ';') {
6201 NEXT;
6202 if ((ctxt->sax != NULL) &&
6203 (ctxt->sax->getParameterEntity != NULL))
6204 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6205 name);
6206 if (entity == NULL) {
6207 /*
6208 * [ WFC: Entity Declared ]
6209 * In a document without any DTD, a document with only an
6210 * internal DTD subset which contains no parameter entity
6211 * references, or a document with "standalone='yes'", ...
6212 * ... The declaration of a parameter entity must precede
6213 * any reference to it...
6214 */
6215 if ((ctxt->standalone == 1) ||
6216 ((ctxt->hasExternalSubset == 0) &&
6217 (ctxt->hasPErefs == 0))) {
6218 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6219 if ((!ctxt->disableSAX) &&
6220 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6221 ctxt->sax->error(ctxt->userData,
6222 "PEReference: %%%s; not found\n", name);
6223 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006224 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006225 } else {
6226 /*
6227 * [ VC: Entity Declared ]
6228 * In a document with an external subset or external
6229 * parameter entities with "standalone='no'", ...
6230 * ... The declaration of a parameter entity must precede
6231 * any reference to it...
6232 */
6233 if ((!ctxt->disableSAX) &&
6234 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6235 ctxt->sax->warning(ctxt->userData,
6236 "PEReference: %%%s; not found\n", name);
6237 ctxt->valid = 0;
6238 }
6239 } else {
6240 /*
6241 * Internal checking in case the entity quest barfed
6242 */
6243 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6244 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6245 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6246 ctxt->sax->warning(ctxt->userData,
6247 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006248 } else if (ctxt->input->free != deallocblankswrapper) {
6249 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6250 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006251 } else {
6252 /*
6253 * TODO !!!
6254 * handle the extra spaces added before and after
6255 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6256 */
6257 input = xmlNewEntityInputStream(ctxt, entity);
6258 xmlPushInput(ctxt, input);
6259 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6260 (RAW == '<') && (NXT(1) == '?') &&
6261 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6262 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6263 xmlParseTextDecl(ctxt);
6264 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6265 /*
6266 * The XML REC instructs us to stop parsing
6267 * right here
6268 */
6269 ctxt->instate = XML_PARSER_EOF;
6270 xmlFree(name);
6271 return;
6272 }
6273 }
Owen Taylor3473f882001-02-23 17:55:21 +00006274 }
6275 }
6276 ctxt->hasPErefs = 1;
6277 } else {
6278 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6280 ctxt->sax->error(ctxt->userData,
6281 "xmlParsePEReference: expecting ';'\n");
6282 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006283 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006284 }
6285 xmlFree(name);
6286 }
6287 }
6288}
6289
6290/**
6291 * xmlParseStringPEReference:
6292 * @ctxt: an XML parser context
6293 * @str: a pointer to an index in the string
6294 *
6295 * parse PEReference declarations
6296 *
6297 * [69] PEReference ::= '%' Name ';'
6298 *
6299 * [ WFC: No Recursion ]
6300 * A parsed entity must not contain a recursive
6301 * reference to itself, either directly or indirectly.
6302 *
6303 * [ WFC: Entity Declared ]
6304 * In a document without any DTD, a document with only an internal DTD
6305 * subset which contains no parameter entity references, or a document
6306 * with "standalone='yes'", ... ... The declaration of a parameter
6307 * entity must precede any reference to it...
6308 *
6309 * [ VC: Entity Declared ]
6310 * In a document with an external subset or external parameter entities
6311 * with "standalone='no'", ... ... The declaration of a parameter entity
6312 * must precede any reference to it...
6313 *
6314 * [ WFC: In DTD ]
6315 * Parameter-entity references may only appear in the DTD.
6316 * NOTE: misleading but this is handled.
6317 *
6318 * Returns the string of the entity content.
6319 * str is updated to the current value of the index
6320 */
6321xmlEntityPtr
6322xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6323 const xmlChar *ptr;
6324 xmlChar cur;
6325 xmlChar *name;
6326 xmlEntityPtr entity = NULL;
6327
6328 if ((str == NULL) || (*str == NULL)) return(NULL);
6329 ptr = *str;
6330 cur = *ptr;
6331 if (cur == '%') {
6332 ptr++;
6333 cur = *ptr;
6334 name = xmlParseStringName(ctxt, &ptr);
6335 if (name == NULL) {
6336 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6338 ctxt->sax->error(ctxt->userData,
6339 "xmlParseStringPEReference: no name\n");
6340 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006341 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006342 } else {
6343 cur = *ptr;
6344 if (cur == ';') {
6345 ptr++;
6346 cur = *ptr;
6347 if ((ctxt->sax != NULL) &&
6348 (ctxt->sax->getParameterEntity != NULL))
6349 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6350 name);
6351 if (entity == NULL) {
6352 /*
6353 * [ WFC: Entity Declared ]
6354 * In a document without any DTD, a document with only an
6355 * internal DTD subset which contains no parameter entity
6356 * references, or a document with "standalone='yes'", ...
6357 * ... The declaration of a parameter entity must precede
6358 * any reference to it...
6359 */
6360 if ((ctxt->standalone == 1) ||
6361 ((ctxt->hasExternalSubset == 0) &&
6362 (ctxt->hasPErefs == 0))) {
6363 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6365 ctxt->sax->error(ctxt->userData,
6366 "PEReference: %%%s; not found\n", name);
6367 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006368 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006369 } else {
6370 /*
6371 * [ VC: Entity Declared ]
6372 * In a document with an external subset or external
6373 * parameter entities with "standalone='no'", ...
6374 * ... The declaration of a parameter entity must
6375 * precede any reference to it...
6376 */
6377 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6378 ctxt->sax->warning(ctxt->userData,
6379 "PEReference: %%%s; not found\n", name);
6380 ctxt->valid = 0;
6381 }
6382 } else {
6383 /*
6384 * Internal checking in case the entity quest barfed
6385 */
6386 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6387 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6388 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6389 ctxt->sax->warning(ctxt->userData,
6390 "Internal: %%%s; is not a parameter entity\n", name);
6391 }
6392 }
6393 ctxt->hasPErefs = 1;
6394 } else {
6395 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6397 ctxt->sax->error(ctxt->userData,
6398 "xmlParseStringPEReference: expecting ';'\n");
6399 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006400 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006401 }
6402 xmlFree(name);
6403 }
6404 }
6405 *str = ptr;
6406 return(entity);
6407}
6408
6409/**
6410 * xmlParseDocTypeDecl:
6411 * @ctxt: an XML parser context
6412 *
6413 * parse a DOCTYPE declaration
6414 *
6415 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6416 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6417 *
6418 * [ VC: Root Element Type ]
6419 * The Name in the document type declaration must match the element
6420 * type of the root element.
6421 */
6422
6423void
6424xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6425 xmlChar *name = NULL;
6426 xmlChar *ExternalID = NULL;
6427 xmlChar *URI = NULL;
6428
6429 /*
6430 * We know that '<!DOCTYPE' has been detected.
6431 */
6432 SKIP(9);
6433
6434 SKIP_BLANKS;
6435
6436 /*
6437 * Parse the DOCTYPE name.
6438 */
6439 name = xmlParseName(ctxt);
6440 if (name == NULL) {
6441 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6443 ctxt->sax->error(ctxt->userData,
6444 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6445 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006446 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006447 }
6448 ctxt->intSubName = name;
6449
6450 SKIP_BLANKS;
6451
6452 /*
6453 * Check for SystemID and ExternalID
6454 */
6455 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6456
6457 if ((URI != NULL) || (ExternalID != NULL)) {
6458 ctxt->hasExternalSubset = 1;
6459 }
6460 ctxt->extSubURI = URI;
6461 ctxt->extSubSystem = ExternalID;
6462
6463 SKIP_BLANKS;
6464
6465 /*
6466 * Create and update the internal subset.
6467 */
6468 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6469 (!ctxt->disableSAX))
6470 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6471
6472 /*
6473 * Is there any internal subset declarations ?
6474 * they are handled separately in xmlParseInternalSubset()
6475 */
6476 if (RAW == '[')
6477 return;
6478
6479 /*
6480 * We should be at the end of the DOCTYPE declaration.
6481 */
6482 if (RAW != '>') {
6483 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006485 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006486 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006487 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006488 }
6489 NEXT;
6490}
6491
6492/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006493 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006494 * @ctxt: an XML parser context
6495 *
6496 * parse the internal subset declaration
6497 *
6498 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6499 */
6500
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006501static void
Owen Taylor3473f882001-02-23 17:55:21 +00006502xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6503 /*
6504 * Is there any DTD definition ?
6505 */
6506 if (RAW == '[') {
6507 ctxt->instate = XML_PARSER_DTD;
6508 NEXT;
6509 /*
6510 * Parse the succession of Markup declarations and
6511 * PEReferences.
6512 * Subsequence (markupdecl | PEReference | S)*
6513 */
6514 while (RAW != ']') {
6515 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006516 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006517
6518 SKIP_BLANKS;
6519 xmlParseMarkupDecl(ctxt);
6520 xmlParsePEReference(ctxt);
6521
6522 /*
6523 * Pop-up of finished entities.
6524 */
6525 while ((RAW == 0) && (ctxt->inputNr > 1))
6526 xmlPopInput(ctxt);
6527
6528 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6529 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6531 ctxt->sax->error(ctxt->userData,
6532 "xmlParseInternalSubset: error detected in Markup declaration\n");
6533 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006534 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006535 break;
6536 }
6537 }
6538 if (RAW == ']') {
6539 NEXT;
6540 SKIP_BLANKS;
6541 }
6542 }
6543
6544 /*
6545 * We should be at the end of the DOCTYPE declaration.
6546 */
6547 if (RAW != '>') {
6548 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006550 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006553 }
6554 NEXT;
6555}
6556
6557/**
6558 * xmlParseAttribute:
6559 * @ctxt: an XML parser context
6560 * @value: a xmlChar ** used to store the value of the attribute
6561 *
6562 * parse an attribute
6563 *
6564 * [41] Attribute ::= Name Eq AttValue
6565 *
6566 * [ WFC: No External Entity References ]
6567 * Attribute values cannot contain direct or indirect entity references
6568 * to external entities.
6569 *
6570 * [ WFC: No < in Attribute Values ]
6571 * The replacement text of any entity referred to directly or indirectly in
6572 * an attribute value (other than "&lt;") must not contain a <.
6573 *
6574 * [ VC: Attribute Value Type ]
6575 * The attribute must have been declared; the value must be of the type
6576 * declared for it.
6577 *
6578 * [25] Eq ::= S? '=' S?
6579 *
6580 * With namespace:
6581 *
6582 * [NS 11] Attribute ::= QName Eq AttValue
6583 *
6584 * Also the case QName == xmlns:??? is handled independently as a namespace
6585 * definition.
6586 *
6587 * Returns the attribute name, and the value in *value.
6588 */
6589
6590xmlChar *
6591xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6592 xmlChar *name, *val;
6593
6594 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006595 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006596 name = xmlParseName(ctxt);
6597 if (name == NULL) {
6598 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6600 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6601 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006602 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006603 return(NULL);
6604 }
6605
6606 /*
6607 * read the value
6608 */
6609 SKIP_BLANKS;
6610 if (RAW == '=') {
6611 NEXT;
6612 SKIP_BLANKS;
6613 val = xmlParseAttValue(ctxt);
6614 ctxt->instate = XML_PARSER_CONTENT;
6615 } else {
6616 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6618 ctxt->sax->error(ctxt->userData,
6619 "Specification mandate value for attribute %s\n", name);
6620 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006621 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006622 xmlFree(name);
6623 return(NULL);
6624 }
6625
6626 /*
6627 * Check that xml:lang conforms to the specification
6628 * No more registered as an error, just generate a warning now
6629 * since this was deprecated in XML second edition
6630 */
6631 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6632 if (!xmlCheckLanguageID(val)) {
6633 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6634 ctxt->sax->warning(ctxt->userData,
6635 "Malformed value for xml:lang : %s\n", val);
6636 }
6637 }
6638
6639 /*
6640 * Check that xml:space conforms to the specification
6641 */
6642 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6643 if (xmlStrEqual(val, BAD_CAST "default"))
6644 *(ctxt->space) = 0;
6645 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6646 *(ctxt->space) = 1;
6647 else {
6648 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6650 ctxt->sax->error(ctxt->userData,
Daniel Veillard642104e2003-03-26 16:32:05 +00006651"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006652 val);
6653 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006654 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006655 }
6656 }
6657
6658 *value = val;
6659 return(name);
6660}
6661
6662/**
6663 * xmlParseStartTag:
6664 * @ctxt: an XML parser context
6665 *
6666 * parse a start of tag either for rule element or
6667 * EmptyElement. In both case we don't parse the tag closing chars.
6668 *
6669 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6670 *
6671 * [ WFC: Unique Att Spec ]
6672 * No attribute name may appear more than once in the same start-tag or
6673 * empty-element tag.
6674 *
6675 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6676 *
6677 * [ WFC: Unique Att Spec ]
6678 * No attribute name may appear more than once in the same start-tag or
6679 * empty-element tag.
6680 *
6681 * With namespace:
6682 *
6683 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6684 *
6685 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6686 *
6687 * Returns the element name parsed
6688 */
6689
6690xmlChar *
6691xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6692 xmlChar *name;
6693 xmlChar *attname;
6694 xmlChar *attvalue;
6695 const xmlChar **atts = NULL;
6696 int nbatts = 0;
6697 int maxatts = 0;
6698 int i;
6699
6700 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006701 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006702
6703 name = xmlParseName(ctxt);
6704 if (name == NULL) {
6705 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6707 ctxt->sax->error(ctxt->userData,
6708 "xmlParseStartTag: invalid element name\n");
6709 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006710 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006711 return(NULL);
6712 }
6713
6714 /*
6715 * Now parse the attributes, it ends up with the ending
6716 *
6717 * (S Attribute)* S?
6718 */
6719 SKIP_BLANKS;
6720 GROW;
6721
Daniel Veillard21a0f912001-02-25 19:54:14 +00006722 while ((RAW != '>') &&
6723 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard34ba3872003-07-15 13:34:05 +00006724 (IS_CHAR((unsigned int) RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006725 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006726 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006727
6728 attname = xmlParseAttribute(ctxt, &attvalue);
6729 if ((attname != NULL) && (attvalue != NULL)) {
6730 /*
6731 * [ WFC: Unique Att Spec ]
6732 * No attribute name may appear more than once in the same
6733 * start-tag or empty-element tag.
6734 */
6735 for (i = 0; i < nbatts;i += 2) {
6736 if (xmlStrEqual(atts[i], attname)) {
6737 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6739 ctxt->sax->error(ctxt->userData,
6740 "Attribute %s redefined\n",
6741 attname);
6742 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006743 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006744 xmlFree(attname);
6745 xmlFree(attvalue);
6746 goto failed;
6747 }
6748 }
6749
6750 /*
6751 * Add the pair to atts
6752 */
6753 if (atts == NULL) {
6754 maxatts = 10;
6755 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6756 if (atts == NULL) {
6757 xmlGenericError(xmlGenericErrorContext,
6758 "malloc of %ld byte failed\n",
6759 maxatts * (long)sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006760 if (attname != NULL)
6761 xmlFree(attname);
6762 if (attvalue != NULL)
6763 xmlFree(attvalue);
6764 ctxt->errNo = XML_ERR_NO_MEMORY;
6765 ctxt->instate = XML_PARSER_EOF;
6766 ctxt->disableSAX = 1;
6767 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006768 }
6769 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006770 const xmlChar **n;
6771
Owen Taylor3473f882001-02-23 17:55:21 +00006772 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006773 n = (const xmlChar **) xmlRealloc((void *) atts,
Owen Taylor3473f882001-02-23 17:55:21 +00006774 maxatts * sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006775 if (n == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006776 xmlGenericError(xmlGenericErrorContext,
6777 "realloc of %ld byte failed\n",
6778 maxatts * (long)sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006779 if (attname != NULL)
6780 xmlFree(attname);
6781 if (attvalue != NULL)
6782 xmlFree(attvalue);
6783 ctxt->errNo = XML_ERR_NO_MEMORY;
6784 ctxt->instate = XML_PARSER_EOF;
6785 ctxt->disableSAX = 1;
6786 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006787 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006788 atts = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006789 }
6790 atts[nbatts++] = attname;
6791 atts[nbatts++] = attvalue;
6792 atts[nbatts] = NULL;
6793 atts[nbatts + 1] = NULL;
6794 } else {
6795 if (attname != NULL)
6796 xmlFree(attname);
6797 if (attvalue != NULL)
6798 xmlFree(attvalue);
6799 }
6800
6801failed:
6802
Daniel Veillard3772de32002-12-17 10:31:45 +00006803 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006804 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6805 break;
6806 if (!IS_BLANK(RAW)) {
6807 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6809 ctxt->sax->error(ctxt->userData,
6810 "attributes construct error\n");
6811 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006812 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006813 }
6814 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006815 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6816 (attname == NULL) && (attvalue == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006817 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6819 ctxt->sax->error(ctxt->userData,
6820 "xmlParseStartTag: problem parsing attributes\n");
6821 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006822 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006823 break;
6824 }
6825 GROW;
6826 }
6827
6828 /*
6829 * SAX: Start of Element !
6830 */
6831 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6832 (!ctxt->disableSAX))
6833 ctxt->sax->startElement(ctxt->userData, name, atts);
6834
6835 if (atts != NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006836 for (i = 0;i < nbatts;i++)
6837 if (atts[i] != NULL)
6838 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006839 xmlFree((void *) atts);
6840 }
6841 return(name);
6842}
6843
6844/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006845 * xmlParseEndTagInternal:
Owen Taylor3473f882001-02-23 17:55:21 +00006846 * @ctxt: an XML parser context
6847 *
6848 * parse an end of tag
6849 *
6850 * [42] ETag ::= '</' Name S? '>'
6851 *
6852 * With namespace
6853 *
6854 * [NS 9] ETag ::= '</' QName S? '>'
6855 */
6856
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006857static void
6858xmlParseEndTagInternal(xmlParserCtxtPtr ctxt, int line) {
Owen Taylor3473f882001-02-23 17:55:21 +00006859 xmlChar *name;
6860 xmlChar *oldname;
6861
6862 GROW;
6863 if ((RAW != '<') || (NXT(1) != '/')) {
6864 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6866 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6867 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006868 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006869 return;
6870 }
6871 SKIP(2);
6872
Daniel Veillard46de64e2002-05-29 08:21:33 +00006873 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006874
6875 /*
6876 * We should definitely be at the ending "S? '>'" part
6877 */
6878 GROW;
6879 SKIP_BLANKS;
Daniel Veillard34ba3872003-07-15 13:34:05 +00006880 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006881 ctxt->errNo = XML_ERR_GT_REQUIRED;
6882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6883 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6884 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006885 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006886 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006887 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006888
6889 /*
6890 * [ WFC: Element Type Match ]
6891 * The Name in an element's end-tag must match the element type in the
6892 * start-tag.
6893 *
6894 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006895 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006896 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006898 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006899 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006900 "Opening and ending tag mismatch: %s line %d and %s\n",
6901 ctxt->name, line, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006902 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006903 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006904 "Ending tag error for: %s line %d\n", ctxt->name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00006905 }
6906
6907 }
6908 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006909 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006910 if (name != NULL)
6911 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006912 }
6913
6914 /*
6915 * SAX: End of Tag
6916 */
6917 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6918 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006919 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006920
Owen Taylor3473f882001-02-23 17:55:21 +00006921 oldname = namePop(ctxt);
6922 spacePop(ctxt);
6923 if (oldname != NULL) {
6924#ifdef DEBUG_STACK
6925 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6926#endif
6927 xmlFree(oldname);
6928 }
6929 return;
6930}
6931
6932/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006933 * xmlParseEndTag:
6934 * @ctxt: an XML parser context
6935 *
6936 * parse an end of tag
6937 *
6938 * [42] ETag ::= '</' Name S? '>'
6939 *
6940 * With namespace
6941 *
6942 * [NS 9] ETag ::= '</' QName S? '>'
6943 */
6944
6945void
6946xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6947 xmlParseEndTagInternal(ctxt, 0);
6948}
6949
6950/**
Owen Taylor3473f882001-02-23 17:55:21 +00006951 * xmlParseCDSect:
6952 * @ctxt: an XML parser context
6953 *
6954 * Parse escaped pure raw content.
6955 *
6956 * [18] CDSect ::= CDStart CData CDEnd
6957 *
6958 * [19] CDStart ::= '<![CDATA['
6959 *
6960 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6961 *
6962 * [21] CDEnd ::= ']]>'
6963 */
6964void
6965xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6966 xmlChar *buf = NULL;
6967 int len = 0;
6968 int size = XML_PARSER_BUFFER_SIZE;
6969 int r, rl;
6970 int s, sl;
6971 int cur, l;
6972 int count = 0;
6973
6974 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6975 (NXT(2) == '[') && (NXT(3) == 'C') &&
6976 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6977 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6978 (NXT(8) == '[')) {
6979 SKIP(9);
6980 } else
6981 return;
6982
6983 ctxt->instate = XML_PARSER_CDATA_SECTION;
6984 r = CUR_CHAR(rl);
6985 if (!IS_CHAR(r)) {
6986 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6988 ctxt->sax->error(ctxt->userData,
6989 "CData section not finished\n");
6990 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006991 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006992 ctxt->instate = XML_PARSER_CONTENT;
6993 return;
6994 }
6995 NEXTL(rl);
6996 s = CUR_CHAR(sl);
6997 if (!IS_CHAR(s)) {
6998 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7000 ctxt->sax->error(ctxt->userData,
7001 "CData section not finished\n");
7002 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007003 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007004 ctxt->instate = XML_PARSER_CONTENT;
7005 return;
7006 }
7007 NEXTL(sl);
7008 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007009 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007010 if (buf == NULL) {
7011 xmlGenericError(xmlGenericErrorContext,
7012 "malloc of %d byte failed\n", size);
7013 return;
7014 }
7015 while (IS_CHAR(cur) &&
7016 ((r != ']') || (s != ']') || (cur != '>'))) {
7017 if (len + 5 >= size) {
7018 size *= 2;
7019 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7020 if (buf == NULL) {
7021 xmlGenericError(xmlGenericErrorContext,
7022 "realloc of %d byte failed\n", size);
7023 return;
7024 }
7025 }
7026 COPY_BUF(rl,buf,len,r);
7027 r = s;
7028 rl = sl;
7029 s = cur;
7030 sl = l;
7031 count++;
7032 if (count > 50) {
7033 GROW;
7034 count = 0;
7035 }
7036 NEXTL(l);
7037 cur = CUR_CHAR(l);
7038 }
7039 buf[len] = 0;
7040 ctxt->instate = XML_PARSER_CONTENT;
7041 if (cur != '>') {
7042 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
7043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7044 ctxt->sax->error(ctxt->userData,
7045 "CData section not finished\n%.50s\n", buf);
7046 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007047 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007048 xmlFree(buf);
7049 return;
7050 }
7051 NEXTL(l);
7052
7053 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007054 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007055 */
7056 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7057 if (ctxt->sax->cdataBlock != NULL)
7058 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007059 else if (ctxt->sax->characters != NULL)
7060 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007061 }
7062 xmlFree(buf);
7063}
7064
7065/**
7066 * xmlParseContent:
7067 * @ctxt: an XML parser context
7068 *
7069 * Parse a content:
7070 *
7071 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7072 */
7073
7074void
7075xmlParseContent(xmlParserCtxtPtr ctxt) {
7076 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007077 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007078 ((RAW != '<') || (NXT(1) != '/'))) {
7079 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007080 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007081 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007082
7083 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007084 * First case : a Processing Instruction.
7085 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007086 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007087 xmlParsePI(ctxt);
7088 }
7089
7090 /*
7091 * Second case : a CDSection
7092 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007093 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007094 (NXT(2) == '[') && (NXT(3) == 'C') &&
7095 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7096 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7097 (NXT(8) == '[')) {
7098 xmlParseCDSect(ctxt);
7099 }
7100
7101 /*
7102 * Third case : a comment
7103 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007104 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007105 (NXT(2) == '-') && (NXT(3) == '-')) {
7106 xmlParseComment(ctxt);
7107 ctxt->instate = XML_PARSER_CONTENT;
7108 }
7109
7110 /*
7111 * Fourth case : a sub-element.
7112 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007113 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007114 xmlParseElement(ctxt);
7115 }
7116
7117 /*
7118 * Fifth case : a reference. If if has not been resolved,
7119 * parsing returns it's Name, create the node
7120 */
7121
Daniel Veillard21a0f912001-02-25 19:54:14 +00007122 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007123 xmlParseReference(ctxt);
7124 }
7125
7126 /*
7127 * Last case, text. Note that References are handled directly.
7128 */
7129 else {
7130 xmlParseCharData(ctxt, 0);
7131 }
7132
7133 GROW;
7134 /*
7135 * Pop-up of finished entities.
7136 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007137 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007138 xmlPopInput(ctxt);
7139 SHRINK;
7140
Daniel Veillardfdc91562002-07-01 21:52:03 +00007141 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007142 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7144 ctxt->sax->error(ctxt->userData,
7145 "detected an error in element content\n");
7146 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007147 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007148 ctxt->instate = XML_PARSER_EOF;
7149 break;
7150 }
7151 }
7152}
7153
7154/**
7155 * xmlParseElement:
7156 * @ctxt: an XML parser context
7157 *
7158 * parse an XML element, this is highly recursive
7159 *
7160 * [39] element ::= EmptyElemTag | STag content ETag
7161 *
7162 * [ WFC: Element Type Match ]
7163 * The Name in an element's end-tag must match the element type in the
7164 * start-tag.
7165 *
7166 * [ VC: Element Valid ]
7167 * An element is valid if there is a declaration matching elementdecl
7168 * where the Name matches the element type and one of the following holds:
7169 * - The declaration matches EMPTY and the element has no content.
7170 * - The declaration matches children and the sequence of child elements
7171 * belongs to the language generated by the regular expression in the
7172 * content model, with optional white space (characters matching the
7173 * nonterminal S) between each pair of child elements.
7174 * - The declaration matches Mixed and the content consists of character
7175 * data and child elements whose types match names in the content model.
7176 * - The declaration matches ANY, and the types of any child elements have
7177 * been declared.
7178 */
7179
7180void
7181xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007182 xmlChar *name;
7183 xmlChar *oldname;
7184 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007185 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00007186 xmlNodePtr ret;
7187
7188 /* Capture start position */
7189 if (ctxt->record_info) {
7190 node_info.begin_pos = ctxt->input->consumed +
7191 (CUR_PTR - ctxt->input->base);
7192 node_info.begin_line = ctxt->input->line;
7193 }
7194
7195 if (ctxt->spaceNr == 0)
7196 spacePush(ctxt, -1);
7197 else
7198 spacePush(ctxt, *ctxt->space);
7199
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007200 line = ctxt->input->line;
Owen Taylor3473f882001-02-23 17:55:21 +00007201 name = xmlParseStartTag(ctxt);
7202 if (name == NULL) {
7203 spacePop(ctxt);
7204 return;
7205 }
7206 namePush(ctxt, name);
7207 ret = ctxt->node;
7208
7209 /*
7210 * [ VC: Root Element Type ]
7211 * The Name in the document type declaration must match the element
7212 * type of the root element.
7213 */
7214 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7215 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7216 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7217
7218 /*
7219 * Check for an Empty Element.
7220 */
7221 if ((RAW == '/') && (NXT(1) == '>')) {
7222 SKIP(2);
7223 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7224 (!ctxt->disableSAX))
7225 ctxt->sax->endElement(ctxt->userData, name);
7226 oldname = namePop(ctxt);
7227 spacePop(ctxt);
7228 if (oldname != NULL) {
7229#ifdef DEBUG_STACK
7230 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7231#endif
7232 xmlFree(oldname);
7233 }
7234 if ( ret != NULL && ctxt->record_info ) {
7235 node_info.end_pos = ctxt->input->consumed +
7236 (CUR_PTR - ctxt->input->base);
7237 node_info.end_line = ctxt->input->line;
7238 node_info.node = ret;
7239 xmlParserAddNodeInfo(ctxt, &node_info);
7240 }
7241 return;
7242 }
7243 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007244 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007245 } else {
7246 ctxt->errNo = XML_ERR_GT_REQUIRED;
7247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7248 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007249 "Couldn't find end of Start Tag %s line %d\n",
7250 name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007251 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007252 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007253
7254 /*
7255 * end of parsing of this node.
7256 */
7257 nodePop(ctxt);
7258 oldname = namePop(ctxt);
7259 spacePop(ctxt);
7260 if (oldname != NULL) {
7261#ifdef DEBUG_STACK
7262 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7263#endif
7264 xmlFree(oldname);
7265 }
7266
7267 /*
7268 * Capture end position and add node
7269 */
7270 if ( ret != NULL && ctxt->record_info ) {
7271 node_info.end_pos = ctxt->input->consumed +
7272 (CUR_PTR - ctxt->input->base);
7273 node_info.end_line = ctxt->input->line;
7274 node_info.node = ret;
7275 xmlParserAddNodeInfo(ctxt, &node_info);
7276 }
7277 return;
7278 }
7279
7280 /*
7281 * Parse the content of the element:
7282 */
7283 xmlParseContent(ctxt);
Daniel Veillard34ba3872003-07-15 13:34:05 +00007284 if (!IS_CHAR((unsigned int) RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007285 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7287 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007288 "Premature end of data in tag %s line %d\n", name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007289 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007290 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007291
7292 /*
7293 * end of parsing of this node.
7294 */
7295 nodePop(ctxt);
7296 oldname = namePop(ctxt);
7297 spacePop(ctxt);
7298 if (oldname != NULL) {
7299#ifdef DEBUG_STACK
7300 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7301#endif
7302 xmlFree(oldname);
7303 }
7304 return;
7305 }
7306
7307 /*
7308 * parse the end of tag: '</' should be here.
7309 */
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007310 xmlParseEndTagInternal(ctxt, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007311
7312 /*
7313 * Capture end position and add node
7314 */
7315 if ( ret != NULL && ctxt->record_info ) {
7316 node_info.end_pos = ctxt->input->consumed +
7317 (CUR_PTR - ctxt->input->base);
7318 node_info.end_line = ctxt->input->line;
7319 node_info.node = ret;
7320 xmlParserAddNodeInfo(ctxt, &node_info);
7321 }
7322}
7323
7324/**
7325 * xmlParseVersionNum:
7326 * @ctxt: an XML parser context
7327 *
7328 * parse the XML version value.
7329 *
7330 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7331 *
7332 * Returns the string giving the XML version number, or NULL
7333 */
7334xmlChar *
7335xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7336 xmlChar *buf = NULL;
7337 int len = 0;
7338 int size = 10;
7339 xmlChar cur;
7340
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007341 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007342 if (buf == NULL) {
7343 xmlGenericError(xmlGenericErrorContext,
7344 "malloc of %d byte failed\n", size);
7345 return(NULL);
7346 }
7347 cur = CUR;
7348 while (((cur >= 'a') && (cur <= 'z')) ||
7349 ((cur >= 'A') && (cur <= 'Z')) ||
7350 ((cur >= '0') && (cur <= '9')) ||
7351 (cur == '_') || (cur == '.') ||
7352 (cur == ':') || (cur == '-')) {
7353 if (len + 1 >= size) {
7354 size *= 2;
7355 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7356 if (buf == NULL) {
7357 xmlGenericError(xmlGenericErrorContext,
7358 "realloc of %d byte failed\n", size);
7359 return(NULL);
7360 }
7361 }
7362 buf[len++] = cur;
7363 NEXT;
7364 cur=CUR;
7365 }
7366 buf[len] = 0;
7367 return(buf);
7368}
7369
7370/**
7371 * xmlParseVersionInfo:
7372 * @ctxt: an XML parser context
7373 *
7374 * parse the XML version.
7375 *
7376 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7377 *
7378 * [25] Eq ::= S? '=' S?
7379 *
7380 * Returns the version string, e.g. "1.0"
7381 */
7382
7383xmlChar *
7384xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7385 xmlChar *version = NULL;
7386 const xmlChar *q;
7387
7388 if ((RAW == 'v') && (NXT(1) == 'e') &&
7389 (NXT(2) == 'r') && (NXT(3) == 's') &&
7390 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7391 (NXT(6) == 'n')) {
7392 SKIP(7);
7393 SKIP_BLANKS;
7394 if (RAW != '=') {
7395 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7397 ctxt->sax->error(ctxt->userData,
7398 "xmlParseVersionInfo : expected '='\n");
7399 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007400 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007401 return(NULL);
7402 }
7403 NEXT;
7404 SKIP_BLANKS;
7405 if (RAW == '"') {
7406 NEXT;
7407 q = CUR_PTR;
7408 version = xmlParseVersionNum(ctxt);
7409 if (RAW != '"') {
7410 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7412 ctxt->sax->error(ctxt->userData,
7413 "String not closed\n%.50s\n", q);
7414 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007415 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007416 } else
7417 NEXT;
7418 } else if (RAW == '\''){
7419 NEXT;
7420 q = CUR_PTR;
7421 version = xmlParseVersionNum(ctxt);
7422 if (RAW != '\'') {
7423 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7425 ctxt->sax->error(ctxt->userData,
7426 "String not closed\n%.50s\n", q);
7427 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007428 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007429 } else
7430 NEXT;
7431 } else {
7432 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7433 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7434 ctxt->sax->error(ctxt->userData,
7435 "xmlParseVersionInfo : expected ' or \"\n");
7436 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007437 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007438 }
7439 }
7440 return(version);
7441}
7442
7443/**
7444 * xmlParseEncName:
7445 * @ctxt: an XML parser context
7446 *
7447 * parse the XML encoding name
7448 *
7449 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7450 *
7451 * Returns the encoding name value or NULL
7452 */
7453xmlChar *
7454xmlParseEncName(xmlParserCtxtPtr ctxt) {
7455 xmlChar *buf = NULL;
7456 int len = 0;
7457 int size = 10;
7458 xmlChar cur;
7459
7460 cur = CUR;
7461 if (((cur >= 'a') && (cur <= 'z')) ||
7462 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007463 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007464 if (buf == NULL) {
7465 xmlGenericError(xmlGenericErrorContext,
7466 "malloc of %d byte failed\n", size);
7467 return(NULL);
7468 }
7469
7470 buf[len++] = cur;
7471 NEXT;
7472 cur = CUR;
7473 while (((cur >= 'a') && (cur <= 'z')) ||
7474 ((cur >= 'A') && (cur <= 'Z')) ||
7475 ((cur >= '0') && (cur <= '9')) ||
7476 (cur == '.') || (cur == '_') ||
7477 (cur == '-')) {
7478 if (len + 1 >= size) {
7479 size *= 2;
7480 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7481 if (buf == NULL) {
7482 xmlGenericError(xmlGenericErrorContext,
7483 "realloc of %d byte failed\n", size);
7484 return(NULL);
7485 }
7486 }
7487 buf[len++] = cur;
7488 NEXT;
7489 cur = CUR;
7490 if (cur == 0) {
7491 SHRINK;
7492 GROW;
7493 cur = CUR;
7494 }
7495 }
7496 buf[len] = 0;
7497 } else {
7498 ctxt->errNo = XML_ERR_ENCODING_NAME;
7499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7500 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7501 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007502 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007503 }
7504 return(buf);
7505}
7506
7507/**
7508 * xmlParseEncodingDecl:
7509 * @ctxt: an XML parser context
7510 *
7511 * parse the XML encoding declaration
7512 *
7513 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7514 *
7515 * this setups the conversion filters.
7516 *
7517 * Returns the encoding value or NULL
7518 */
7519
7520xmlChar *
7521xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7522 xmlChar *encoding = NULL;
7523 const xmlChar *q;
7524
7525 SKIP_BLANKS;
7526 if ((RAW == 'e') && (NXT(1) == 'n') &&
7527 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7528 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7529 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7530 SKIP(8);
7531 SKIP_BLANKS;
7532 if (RAW != '=') {
7533 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7535 ctxt->sax->error(ctxt->userData,
7536 "xmlParseEncodingDecl : expected '='\n");
7537 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007538 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007539 return(NULL);
7540 }
7541 NEXT;
7542 SKIP_BLANKS;
7543 if (RAW == '"') {
7544 NEXT;
7545 q = CUR_PTR;
7546 encoding = xmlParseEncName(ctxt);
7547 if (RAW != '"') {
7548 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7550 ctxt->sax->error(ctxt->userData,
7551 "String not closed\n%.50s\n", q);
7552 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007553 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007554 } else
7555 NEXT;
7556 } else if (RAW == '\''){
7557 NEXT;
7558 q = CUR_PTR;
7559 encoding = xmlParseEncName(ctxt);
7560 if (RAW != '\'') {
7561 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7562 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7563 ctxt->sax->error(ctxt->userData,
7564 "String not closed\n%.50s\n", q);
7565 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007566 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007567 } else
7568 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007569 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007570 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7572 ctxt->sax->error(ctxt->userData,
7573 "xmlParseEncodingDecl : expected ' or \"\n");
7574 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007575 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007576 }
7577 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00007578 xmlCharEncodingHandlerPtr handler;
7579
7580 if (ctxt->input->encoding != NULL)
7581 xmlFree((xmlChar *) ctxt->input->encoding);
7582 ctxt->input->encoding = encoding;
7583
Daniel Veillarda6874ca2003-07-29 16:47:24 +00007584 handler = xmlFindCharEncodingHandler((const char *) encoding);
7585 if (handler != NULL) {
7586 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00007587 } else {
Daniel Veillarda6874ca2003-07-29 16:47:24 +00007588 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7590 ctxt->sax->error(ctxt->userData,
7591 "Unsupported encoding %s\n", encoding);
7592 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007593 }
7594 }
7595 }
7596 return(encoding);
7597}
7598
7599/**
7600 * xmlParseSDDecl:
7601 * @ctxt: an XML parser context
7602 *
7603 * parse the XML standalone declaration
7604 *
7605 * [32] SDDecl ::= S 'standalone' Eq
7606 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7607 *
7608 * [ VC: Standalone Document Declaration ]
7609 * TODO The standalone document declaration must have the value "no"
7610 * if any external markup declarations contain declarations of:
7611 * - attributes with default values, if elements to which these
7612 * attributes apply appear in the document without specifications
7613 * of values for these attributes, or
7614 * - entities (other than amp, lt, gt, apos, quot), if references
7615 * to those entities appear in the document, or
7616 * - attributes with values subject to normalization, where the
7617 * attribute appears in the document with a value which will change
7618 * as a result of normalization, or
7619 * - element types with element content, if white space occurs directly
7620 * within any instance of those types.
7621 *
7622 * Returns 1 if standalone, 0 otherwise
7623 */
7624
7625int
7626xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7627 int standalone = -1;
7628
7629 SKIP_BLANKS;
7630 if ((RAW == 's') && (NXT(1) == 't') &&
7631 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7632 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7633 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7634 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7635 SKIP(10);
7636 SKIP_BLANKS;
7637 if (RAW != '=') {
7638 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7640 ctxt->sax->error(ctxt->userData,
7641 "XML standalone declaration : expected '='\n");
7642 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007643 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007644 return(standalone);
7645 }
7646 NEXT;
7647 SKIP_BLANKS;
7648 if (RAW == '\''){
7649 NEXT;
7650 if ((RAW == 'n') && (NXT(1) == 'o')) {
7651 standalone = 0;
7652 SKIP(2);
7653 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7654 (NXT(2) == 's')) {
7655 standalone = 1;
7656 SKIP(3);
7657 } else {
7658 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7660 ctxt->sax->error(ctxt->userData,
7661 "standalone accepts only 'yes' or 'no'\n");
7662 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007663 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007664 }
7665 if (RAW != '\'') {
7666 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7668 ctxt->sax->error(ctxt->userData, "String not closed\n");
7669 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007670 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007671 } else
7672 NEXT;
7673 } else if (RAW == '"'){
7674 NEXT;
7675 if ((RAW == 'n') && (NXT(1) == 'o')) {
7676 standalone = 0;
7677 SKIP(2);
7678 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7679 (NXT(2) == 's')) {
7680 standalone = 1;
7681 SKIP(3);
7682 } else {
7683 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7684 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7685 ctxt->sax->error(ctxt->userData,
7686 "standalone accepts only 'yes' or 'no'\n");
7687 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007688 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007689 }
7690 if (RAW != '"') {
7691 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7693 ctxt->sax->error(ctxt->userData, "String not closed\n");
7694 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007695 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007696 } else
7697 NEXT;
7698 } else {
7699 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7701 ctxt->sax->error(ctxt->userData,
7702 "Standalone value not found\n");
7703 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007704 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007705 }
7706 }
7707 return(standalone);
7708}
7709
7710/**
7711 * xmlParseXMLDecl:
7712 * @ctxt: an XML parser context
7713 *
7714 * parse an XML declaration header
7715 *
7716 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7717 */
7718
7719void
7720xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7721 xmlChar *version;
7722
7723 /*
7724 * We know that '<?xml' is here.
7725 */
7726 SKIP(5);
7727
7728 if (!IS_BLANK(RAW)) {
7729 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7731 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7732 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007733 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007734 }
7735 SKIP_BLANKS;
7736
7737 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007738 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007739 */
7740 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007741 if (version == NULL) {
7742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7743 ctxt->sax->error(ctxt->userData,
7744 "Malformed declaration expecting version\n");
7745 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007746 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007747 } else {
7748 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7749 /*
7750 * TODO: Blueberry should be detected here
7751 */
7752 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7753 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7754 version);
7755 }
7756 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007757 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007758 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007759 }
Owen Taylor3473f882001-02-23 17:55:21 +00007760
7761 /*
7762 * We may have the encoding declaration
7763 */
7764 if (!IS_BLANK(RAW)) {
7765 if ((RAW == '?') && (NXT(1) == '>')) {
7766 SKIP(2);
7767 return;
7768 }
7769 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7771 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7772 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007773 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007774 }
7775 xmlParseEncodingDecl(ctxt);
7776 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7777 /*
7778 * The XML REC instructs us to stop parsing right here
7779 */
7780 return;
7781 }
7782
7783 /*
7784 * We may have the standalone status.
7785 */
7786 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7787 if ((RAW == '?') && (NXT(1) == '>')) {
7788 SKIP(2);
7789 return;
7790 }
7791 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7793 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7794 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007795 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007796 }
7797 SKIP_BLANKS;
7798 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7799
7800 SKIP_BLANKS;
7801 if ((RAW == '?') && (NXT(1) == '>')) {
7802 SKIP(2);
7803 } else if (RAW == '>') {
7804 /* Deprecated old WD ... */
7805 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7807 ctxt->sax->error(ctxt->userData,
7808 "XML declaration must end-up with '?>'\n");
7809 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007810 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007811 NEXT;
7812 } else {
7813 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7815 ctxt->sax->error(ctxt->userData,
7816 "parsing XML declaration: '?>' expected\n");
7817 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007818 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007819 MOVETO_ENDTAG(CUR_PTR);
7820 NEXT;
7821 }
7822}
7823
7824/**
7825 * xmlParseMisc:
7826 * @ctxt: an XML parser context
7827 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007828 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007829 *
7830 * [27] Misc ::= Comment | PI | S
7831 */
7832
7833void
7834xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007835 while (((RAW == '<') && (NXT(1) == '?')) ||
7836 ((RAW == '<') && (NXT(1) == '!') &&
7837 (NXT(2) == '-') && (NXT(3) == '-')) ||
7838 IS_BLANK(CUR)) {
7839 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007840 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007841 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007842 NEXT;
7843 } else
7844 xmlParseComment(ctxt);
7845 }
7846}
7847
7848/**
7849 * xmlParseDocument:
7850 * @ctxt: an XML parser context
7851 *
7852 * parse an XML document (and build a tree if using the standard SAX
7853 * interface).
7854 *
7855 * [1] document ::= prolog element Misc*
7856 *
7857 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7858 *
7859 * Returns 0, -1 in case of error. the parser context is augmented
7860 * as a result of the parsing.
7861 */
7862
7863int
7864xmlParseDocument(xmlParserCtxtPtr ctxt) {
7865 xmlChar start[4];
7866 xmlCharEncoding enc;
7867
7868 xmlInitParser();
7869
7870 GROW;
7871
7872 /*
7873 * SAX: beginning of the document processing.
7874 */
7875 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7876 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7877
Daniel Veillard50f34372001-08-03 12:06:36 +00007878 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007879 /*
7880 * Get the 4 first bytes and decode the charset
7881 * if enc != XML_CHAR_ENCODING_NONE
7882 * plug some encoding conversion routines.
7883 */
7884 start[0] = RAW;
7885 start[1] = NXT(1);
7886 start[2] = NXT(2);
7887 start[3] = NXT(3);
7888 enc = xmlDetectCharEncoding(start, 4);
7889 if (enc != XML_CHAR_ENCODING_NONE) {
7890 xmlSwitchEncoding(ctxt, enc);
7891 }
Owen Taylor3473f882001-02-23 17:55:21 +00007892 }
7893
7894
7895 if (CUR == 0) {
7896 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7898 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7899 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007900 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007901 }
7902
7903 /*
7904 * Check for the XMLDecl in the Prolog.
7905 */
7906 GROW;
7907 if ((RAW == '<') && (NXT(1) == '?') &&
7908 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7909 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7910
7911 /*
7912 * Note that we will switch encoding on the fly.
7913 */
7914 xmlParseXMLDecl(ctxt);
7915 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7916 /*
7917 * The XML REC instructs us to stop parsing right here
7918 */
7919 return(-1);
7920 }
7921 ctxt->standalone = ctxt->input->standalone;
7922 SKIP_BLANKS;
7923 } else {
7924 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7925 }
7926 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7927 ctxt->sax->startDocument(ctxt->userData);
7928
7929 /*
7930 * The Misc part of the Prolog
7931 */
7932 GROW;
7933 xmlParseMisc(ctxt);
7934
7935 /*
7936 * Then possibly doc type declaration(s) and more Misc
7937 * (doctypedecl Misc*)?
7938 */
7939 GROW;
7940 if ((RAW == '<') && (NXT(1) == '!') &&
7941 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7942 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7943 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7944 (NXT(8) == 'E')) {
7945
7946 ctxt->inSubset = 1;
7947 xmlParseDocTypeDecl(ctxt);
7948 if (RAW == '[') {
7949 ctxt->instate = XML_PARSER_DTD;
7950 xmlParseInternalSubset(ctxt);
7951 }
7952
7953 /*
7954 * Create and update the external subset.
7955 */
7956 ctxt->inSubset = 2;
7957 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7958 (!ctxt->disableSAX))
7959 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7960 ctxt->extSubSystem, ctxt->extSubURI);
7961 ctxt->inSubset = 0;
7962
7963
7964 ctxt->instate = XML_PARSER_PROLOG;
7965 xmlParseMisc(ctxt);
7966 }
7967
7968 /*
7969 * Time to start parsing the tree itself
7970 */
7971 GROW;
7972 if (RAW != '<') {
7973 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7975 ctxt->sax->error(ctxt->userData,
7976 "Start tag expected, '<' not found\n");
7977 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007978 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007979 ctxt->instate = XML_PARSER_EOF;
7980 } else {
7981 ctxt->instate = XML_PARSER_CONTENT;
7982 xmlParseElement(ctxt);
7983 ctxt->instate = XML_PARSER_EPILOG;
7984
7985
7986 /*
7987 * The Misc part at the end
7988 */
7989 xmlParseMisc(ctxt);
7990
Daniel Veillard561b7f82002-03-20 21:55:57 +00007991 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007992 ctxt->errNo = XML_ERR_DOCUMENT_END;
7993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7994 ctxt->sax->error(ctxt->userData,
7995 "Extra content at the end of the document\n");
7996 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007997 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007998 }
7999 ctxt->instate = XML_PARSER_EOF;
8000 }
8001
8002 /*
8003 * SAX: end of the document processing.
8004 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008005 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008006 ctxt->sax->endDocument(ctxt->userData);
8007
Daniel Veillard5997aca2002-03-18 18:36:20 +00008008 /*
8009 * Remove locally kept entity definitions if the tree was not built
8010 */
8011 if ((ctxt->myDoc != NULL) &&
8012 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8013 xmlFreeDoc(ctxt->myDoc);
8014 ctxt->myDoc = NULL;
8015 }
8016
Daniel Veillardc7612992002-02-17 22:47:37 +00008017 if (! ctxt->wellFormed) {
8018 ctxt->valid = 0;
8019 return(-1);
8020 }
Owen Taylor3473f882001-02-23 17:55:21 +00008021 return(0);
8022}
8023
8024/**
8025 * xmlParseExtParsedEnt:
8026 * @ctxt: an XML parser context
8027 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008028 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008029 * An external general parsed entity is well-formed if it matches the
8030 * production labeled extParsedEnt.
8031 *
8032 * [78] extParsedEnt ::= TextDecl? content
8033 *
8034 * Returns 0, -1 in case of error. the parser context is augmented
8035 * as a result of the parsing.
8036 */
8037
8038int
8039xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8040 xmlChar start[4];
8041 xmlCharEncoding enc;
8042
8043 xmlDefaultSAXHandlerInit();
8044
8045 GROW;
8046
8047 /*
8048 * SAX: beginning of the document processing.
8049 */
8050 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8051 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8052
8053 /*
8054 * Get the 4 first bytes and decode the charset
8055 * if enc != XML_CHAR_ENCODING_NONE
8056 * plug some encoding conversion routines.
8057 */
8058 start[0] = RAW;
8059 start[1] = NXT(1);
8060 start[2] = NXT(2);
8061 start[3] = NXT(3);
8062 enc = xmlDetectCharEncoding(start, 4);
8063 if (enc != XML_CHAR_ENCODING_NONE) {
8064 xmlSwitchEncoding(ctxt, enc);
8065 }
8066
8067
8068 if (CUR == 0) {
8069 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8070 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8071 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8072 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008073 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008074 }
8075
8076 /*
8077 * Check for the XMLDecl in the Prolog.
8078 */
8079 GROW;
8080 if ((RAW == '<') && (NXT(1) == '?') &&
8081 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8082 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8083
8084 /*
8085 * Note that we will switch encoding on the fly.
8086 */
8087 xmlParseXMLDecl(ctxt);
8088 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8089 /*
8090 * The XML REC instructs us to stop parsing right here
8091 */
8092 return(-1);
8093 }
8094 SKIP_BLANKS;
8095 } else {
8096 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8097 }
8098 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8099 ctxt->sax->startDocument(ctxt->userData);
8100
8101 /*
8102 * Doing validity checking on chunk doesn't make sense
8103 */
8104 ctxt->instate = XML_PARSER_CONTENT;
8105 ctxt->validate = 0;
8106 ctxt->loadsubset = 0;
8107 ctxt->depth = 0;
8108
8109 xmlParseContent(ctxt);
8110
8111 if ((RAW == '<') && (NXT(1) == '/')) {
8112 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8114 ctxt->sax->error(ctxt->userData,
8115 "chunk is not well balanced\n");
8116 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008117 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008118 } else if (RAW != 0) {
8119 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8120 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8121 ctxt->sax->error(ctxt->userData,
8122 "extra content at the end of well balanced chunk\n");
8123 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008124 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008125 }
8126
8127 /*
8128 * SAX: end of the document processing.
8129 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008130 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008131 ctxt->sax->endDocument(ctxt->userData);
8132
8133 if (! ctxt->wellFormed) return(-1);
8134 return(0);
8135}
8136
8137/************************************************************************
8138 * *
8139 * Progressive parsing interfaces *
8140 * *
8141 ************************************************************************/
8142
8143/**
8144 * xmlParseLookupSequence:
8145 * @ctxt: an XML parser context
8146 * @first: the first char to lookup
8147 * @next: the next char to lookup or zero
8148 * @third: the next char to lookup or zero
8149 *
8150 * Try to find if a sequence (first, next, third) or just (first next) or
8151 * (first) is available in the input stream.
8152 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8153 * to avoid rescanning sequences of bytes, it DOES change the state of the
8154 * parser, do not use liberally.
8155 *
8156 * Returns the index to the current parsing point if the full sequence
8157 * is available, -1 otherwise.
8158 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008159static int
Owen Taylor3473f882001-02-23 17:55:21 +00008160xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8161 xmlChar next, xmlChar third) {
8162 int base, len;
8163 xmlParserInputPtr in;
8164 const xmlChar *buf;
8165
8166 in = ctxt->input;
8167 if (in == NULL) return(-1);
8168 base = in->cur - in->base;
8169 if (base < 0) return(-1);
8170 if (ctxt->checkIndex > base)
8171 base = ctxt->checkIndex;
8172 if (in->buf == NULL) {
8173 buf = in->base;
8174 len = in->length;
8175 } else {
8176 buf = in->buf->buffer->content;
8177 len = in->buf->buffer->use;
8178 }
8179 /* take into account the sequence length */
8180 if (third) len -= 2;
8181 else if (next) len --;
8182 for (;base < len;base++) {
8183 if (buf[base] == first) {
8184 if (third != 0) {
8185 if ((buf[base + 1] != next) ||
8186 (buf[base + 2] != third)) continue;
8187 } else if (next != 0) {
8188 if (buf[base + 1] != next) continue;
8189 }
8190 ctxt->checkIndex = 0;
8191#ifdef DEBUG_PUSH
8192 if (next == 0)
8193 xmlGenericError(xmlGenericErrorContext,
8194 "PP: lookup '%c' found at %d\n",
8195 first, base);
8196 else if (third == 0)
8197 xmlGenericError(xmlGenericErrorContext,
8198 "PP: lookup '%c%c' found at %d\n",
8199 first, next, base);
8200 else
8201 xmlGenericError(xmlGenericErrorContext,
8202 "PP: lookup '%c%c%c' found at %d\n",
8203 first, next, third, base);
8204#endif
8205 return(base - (in->cur - in->base));
8206 }
8207 }
8208 ctxt->checkIndex = base;
8209#ifdef DEBUG_PUSH
8210 if (next == 0)
8211 xmlGenericError(xmlGenericErrorContext,
8212 "PP: lookup '%c' failed\n", first);
8213 else if (third == 0)
8214 xmlGenericError(xmlGenericErrorContext,
8215 "PP: lookup '%c%c' failed\n", first, next);
8216 else
8217 xmlGenericError(xmlGenericErrorContext,
8218 "PP: lookup '%c%c%c' failed\n", first, next, third);
8219#endif
8220 return(-1);
8221}
8222
8223/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008224 * xmlParseGetLasts:
8225 * @ctxt: an XML parser context
8226 * @lastlt: pointer to store the last '<' from the input
8227 * @lastgt: pointer to store the last '>' from the input
8228 *
8229 * Lookup the last < and > in the current chunk
8230 */
8231static void
8232xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8233 const xmlChar **lastgt) {
8234 const xmlChar *tmp;
8235
8236 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8237 xmlGenericError(xmlGenericErrorContext,
8238 "Internal error: xmlParseGetLasts\n");
8239 return;
8240 }
8241 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
8242 tmp = ctxt->input->end;
8243 tmp--;
8244 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8245 (*tmp != '>')) tmp--;
8246 if (tmp < ctxt->input->base) {
8247 *lastlt = NULL;
8248 *lastgt = NULL;
8249 } else if (*tmp == '<') {
8250 *lastlt = tmp;
8251 tmp--;
8252 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8253 if (tmp < ctxt->input->base)
8254 *lastgt = NULL;
8255 else
8256 *lastgt = tmp;
8257 } else {
8258 *lastgt = tmp;
8259 tmp--;
8260 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8261 if (tmp < ctxt->input->base)
8262 *lastlt = NULL;
8263 else
8264 *lastlt = tmp;
8265 }
8266
8267 } else {
8268 *lastlt = NULL;
8269 *lastgt = NULL;
8270 }
8271}
8272/**
Owen Taylor3473f882001-02-23 17:55:21 +00008273 * xmlParseTryOrFinish:
8274 * @ctxt: an XML parser context
8275 * @terminate: last chunk indicator
8276 *
8277 * Try to progress on parsing
8278 *
8279 * Returns zero if no parsing was possible
8280 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008281static int
Owen Taylor3473f882001-02-23 17:55:21 +00008282xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8283 int ret = 0;
8284 int avail;
8285 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008286 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008287
8288#ifdef DEBUG_PUSH
8289 switch (ctxt->instate) {
8290 case XML_PARSER_EOF:
8291 xmlGenericError(xmlGenericErrorContext,
8292 "PP: try EOF\n"); break;
8293 case XML_PARSER_START:
8294 xmlGenericError(xmlGenericErrorContext,
8295 "PP: try START\n"); break;
8296 case XML_PARSER_MISC:
8297 xmlGenericError(xmlGenericErrorContext,
8298 "PP: try MISC\n");break;
8299 case XML_PARSER_COMMENT:
8300 xmlGenericError(xmlGenericErrorContext,
8301 "PP: try COMMENT\n");break;
8302 case XML_PARSER_PROLOG:
8303 xmlGenericError(xmlGenericErrorContext,
8304 "PP: try PROLOG\n");break;
8305 case XML_PARSER_START_TAG:
8306 xmlGenericError(xmlGenericErrorContext,
8307 "PP: try START_TAG\n");break;
8308 case XML_PARSER_CONTENT:
8309 xmlGenericError(xmlGenericErrorContext,
8310 "PP: try CONTENT\n");break;
8311 case XML_PARSER_CDATA_SECTION:
8312 xmlGenericError(xmlGenericErrorContext,
8313 "PP: try CDATA_SECTION\n");break;
8314 case XML_PARSER_END_TAG:
8315 xmlGenericError(xmlGenericErrorContext,
8316 "PP: try END_TAG\n");break;
8317 case XML_PARSER_ENTITY_DECL:
8318 xmlGenericError(xmlGenericErrorContext,
8319 "PP: try ENTITY_DECL\n");break;
8320 case XML_PARSER_ENTITY_VALUE:
8321 xmlGenericError(xmlGenericErrorContext,
8322 "PP: try ENTITY_VALUE\n");break;
8323 case XML_PARSER_ATTRIBUTE_VALUE:
8324 xmlGenericError(xmlGenericErrorContext,
8325 "PP: try ATTRIBUTE_VALUE\n");break;
8326 case XML_PARSER_DTD:
8327 xmlGenericError(xmlGenericErrorContext,
8328 "PP: try DTD\n");break;
8329 case XML_PARSER_EPILOG:
8330 xmlGenericError(xmlGenericErrorContext,
8331 "PP: try EPILOG\n");break;
8332 case XML_PARSER_PI:
8333 xmlGenericError(xmlGenericErrorContext,
8334 "PP: try PI\n");break;
8335 case XML_PARSER_IGNORE:
8336 xmlGenericError(xmlGenericErrorContext,
8337 "PP: try IGNORE\n");break;
8338 }
8339#endif
8340
Daniel Veillarda880b122003-04-21 21:36:41 +00008341 if (ctxt->input->cur - ctxt->input->base > 4096) {
8342 xmlSHRINK(ctxt);
8343 ctxt->checkIndex = 0;
8344 }
8345 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008346
Daniel Veillarda880b122003-04-21 21:36:41 +00008347 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008348 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8349 return(0);
8350
8351
Owen Taylor3473f882001-02-23 17:55:21 +00008352 /*
8353 * Pop-up of finished entities.
8354 */
8355 while ((RAW == 0) && (ctxt->inputNr > 1))
8356 xmlPopInput(ctxt);
8357
8358 if (ctxt->input ==NULL) break;
8359 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008360 avail = ctxt->input->length -
8361 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008362 else {
8363 /*
8364 * If we are operating on converted input, try to flush
8365 * remainng chars to avoid them stalling in the non-converted
8366 * buffer.
8367 */
8368 if ((ctxt->input->buf->raw != NULL) &&
8369 (ctxt->input->buf->raw->use > 0)) {
8370 int base = ctxt->input->base -
8371 ctxt->input->buf->buffer->content;
8372 int current = ctxt->input->cur - ctxt->input->base;
8373
8374 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8375 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8376 ctxt->input->cur = ctxt->input->base + current;
8377 ctxt->input->end =
8378 &ctxt->input->buf->buffer->content[
8379 ctxt->input->buf->buffer->use];
8380 }
8381 avail = ctxt->input->buf->buffer->use -
8382 (ctxt->input->cur - ctxt->input->base);
8383 }
Owen Taylor3473f882001-02-23 17:55:21 +00008384 if (avail < 1)
8385 goto done;
8386 switch (ctxt->instate) {
8387 case XML_PARSER_EOF:
8388 /*
8389 * Document parsing is done !
8390 */
8391 goto done;
8392 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008393 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8394 xmlChar start[4];
8395 xmlCharEncoding enc;
8396
8397 /*
8398 * Very first chars read from the document flow.
8399 */
8400 if (avail < 4)
8401 goto done;
8402
8403 /*
8404 * Get the 4 first bytes and decode the charset
8405 * if enc != XML_CHAR_ENCODING_NONE
8406 * plug some encoding conversion routines.
8407 */
8408 start[0] = RAW;
8409 start[1] = NXT(1);
8410 start[2] = NXT(2);
8411 start[3] = NXT(3);
8412 enc = xmlDetectCharEncoding(start, 4);
8413 if (enc != XML_CHAR_ENCODING_NONE) {
8414 xmlSwitchEncoding(ctxt, enc);
8415 }
8416 break;
8417 }
Owen Taylor3473f882001-02-23 17:55:21 +00008418
8419 cur = ctxt->input->cur[0];
8420 next = ctxt->input->cur[1];
8421 if (cur == 0) {
8422 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8423 ctxt->sax->setDocumentLocator(ctxt->userData,
8424 &xmlDefaultSAXLocator);
8425 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8427 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8428 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008429 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008430 ctxt->instate = XML_PARSER_EOF;
8431#ifdef DEBUG_PUSH
8432 xmlGenericError(xmlGenericErrorContext,
8433 "PP: entering EOF\n");
8434#endif
8435 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8436 ctxt->sax->endDocument(ctxt->userData);
8437 goto done;
8438 }
8439 if ((cur == '<') && (next == '?')) {
8440 /* PI or XML decl */
8441 if (avail < 5) return(ret);
8442 if ((!terminate) &&
8443 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8444 return(ret);
8445 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8446 ctxt->sax->setDocumentLocator(ctxt->userData,
8447 &xmlDefaultSAXLocator);
8448 if ((ctxt->input->cur[2] == 'x') &&
8449 (ctxt->input->cur[3] == 'm') &&
8450 (ctxt->input->cur[4] == 'l') &&
8451 (IS_BLANK(ctxt->input->cur[5]))) {
8452 ret += 5;
8453#ifdef DEBUG_PUSH
8454 xmlGenericError(xmlGenericErrorContext,
8455 "PP: Parsing XML Decl\n");
8456#endif
8457 xmlParseXMLDecl(ctxt);
8458 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8459 /*
8460 * The XML REC instructs us to stop parsing right
8461 * here
8462 */
8463 ctxt->instate = XML_PARSER_EOF;
8464 return(0);
8465 }
8466 ctxt->standalone = ctxt->input->standalone;
8467 if ((ctxt->encoding == NULL) &&
8468 (ctxt->input->encoding != NULL))
8469 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8470 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8471 (!ctxt->disableSAX))
8472 ctxt->sax->startDocument(ctxt->userData);
8473 ctxt->instate = XML_PARSER_MISC;
8474#ifdef DEBUG_PUSH
8475 xmlGenericError(xmlGenericErrorContext,
8476 "PP: entering MISC\n");
8477#endif
8478 } else {
8479 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8480 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8481 (!ctxt->disableSAX))
8482 ctxt->sax->startDocument(ctxt->userData);
8483 ctxt->instate = XML_PARSER_MISC;
8484#ifdef DEBUG_PUSH
8485 xmlGenericError(xmlGenericErrorContext,
8486 "PP: entering MISC\n");
8487#endif
8488 }
8489 } else {
8490 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8491 ctxt->sax->setDocumentLocator(ctxt->userData,
8492 &xmlDefaultSAXLocator);
8493 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8494 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8495 (!ctxt->disableSAX))
8496 ctxt->sax->startDocument(ctxt->userData);
8497 ctxt->instate = XML_PARSER_MISC;
8498#ifdef DEBUG_PUSH
8499 xmlGenericError(xmlGenericErrorContext,
8500 "PP: entering MISC\n");
8501#endif
8502 }
8503 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00008504 case XML_PARSER_START_TAG: {
8505 xmlChar *name, *oldname;
8506
8507 if ((avail < 2) && (ctxt->inputNr == 1))
8508 goto done;
8509 cur = ctxt->input->cur[0];
8510 if (cur != '<') {
8511 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8513 ctxt->sax->error(ctxt->userData,
8514 "Start tag expect, '<' not found\n");
8515 ctxt->wellFormed = 0;
8516 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8517 ctxt->instate = XML_PARSER_EOF;
8518#ifdef DEBUG_PUSH
8519 xmlGenericError(xmlGenericErrorContext,
8520 "PP: entering EOF\n");
8521#endif
8522 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8523 ctxt->sax->endDocument(ctxt->userData);
8524 goto done;
8525 }
8526 if (!terminate) {
8527 if (ctxt->progressive) {
8528 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8529 goto done;
8530 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8531 goto done;
8532 }
8533 }
8534 if (ctxt->spaceNr == 0)
8535 spacePush(ctxt, -1);
8536 else
8537 spacePush(ctxt, *ctxt->space);
8538 name = xmlParseStartTag(ctxt);
8539 if (name == NULL) {
8540 spacePop(ctxt);
8541 ctxt->instate = XML_PARSER_EOF;
8542#ifdef DEBUG_PUSH
8543 xmlGenericError(xmlGenericErrorContext,
8544 "PP: entering EOF\n");
8545#endif
8546 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8547 ctxt->sax->endDocument(ctxt->userData);
8548 goto done;
8549 }
8550 namePush(ctxt, name);
8551
8552 /*
8553 * [ VC: Root Element Type ]
8554 * The Name in the document type declaration must match
8555 * the element type of the root element.
8556 */
8557 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8558 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8559 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8560
8561 /*
8562 * Check for an Empty Element.
8563 */
8564 if ((RAW == '/') && (NXT(1) == '>')) {
8565 SKIP(2);
8566 if ((ctxt->sax != NULL) &&
8567 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8568 ctxt->sax->endElement(ctxt->userData, name);
8569 oldname = namePop(ctxt);
8570 spacePop(ctxt);
8571 if (oldname != NULL) {
8572#ifdef DEBUG_STACK
8573 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8574#endif
8575 xmlFree(oldname);
8576 }
8577 if (ctxt->name == NULL) {
8578 ctxt->instate = XML_PARSER_EPILOG;
8579#ifdef DEBUG_PUSH
8580 xmlGenericError(xmlGenericErrorContext,
8581 "PP: entering EPILOG\n");
8582#endif
8583 } else {
8584 ctxt->instate = XML_PARSER_CONTENT;
8585#ifdef DEBUG_PUSH
8586 xmlGenericError(xmlGenericErrorContext,
8587 "PP: entering CONTENT\n");
8588#endif
8589 }
8590 break;
8591 }
8592 if (RAW == '>') {
8593 NEXT;
8594 } else {
8595 ctxt->errNo = XML_ERR_GT_REQUIRED;
8596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8597 ctxt->sax->error(ctxt->userData,
8598 "Couldn't find end of Start Tag %s\n",
8599 name);
8600 ctxt->wellFormed = 0;
8601 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8602
8603 /*
8604 * end of parsing of this node.
8605 */
8606 nodePop(ctxt);
8607 oldname = namePop(ctxt);
8608 spacePop(ctxt);
8609 if (oldname != NULL) {
8610#ifdef DEBUG_STACK
8611 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8612#endif
8613 xmlFree(oldname);
8614 }
8615 }
8616 ctxt->instate = XML_PARSER_CONTENT;
8617#ifdef DEBUG_PUSH
8618 xmlGenericError(xmlGenericErrorContext,
8619 "PP: entering CONTENT\n");
8620#endif
8621 break;
8622 }
8623 case XML_PARSER_CONTENT: {
8624 const xmlChar *test;
8625 unsigned int cons;
8626 if ((avail < 2) && (ctxt->inputNr == 1))
8627 goto done;
8628 cur = ctxt->input->cur[0];
8629 next = ctxt->input->cur[1];
8630
8631 test = CUR_PTR;
8632 cons = ctxt->input->consumed;
8633 if ((cur == '<') && (next == '/')) {
8634 ctxt->instate = XML_PARSER_END_TAG;
8635#ifdef DEBUG_PUSH
8636 xmlGenericError(xmlGenericErrorContext,
8637 "PP: entering END_TAG\n");
8638#endif
8639 break;
8640 } else if ((cur == '<') && (next == '?')) {
8641 if ((!terminate) &&
8642 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8643 goto done;
8644#ifdef DEBUG_PUSH
8645 xmlGenericError(xmlGenericErrorContext,
8646 "PP: Parsing PI\n");
8647#endif
8648 xmlParsePI(ctxt);
8649 } else if ((cur == '<') && (next != '!')) {
8650 ctxt->instate = XML_PARSER_START_TAG;
8651#ifdef DEBUG_PUSH
8652 xmlGenericError(xmlGenericErrorContext,
8653 "PP: entering START_TAG\n");
8654#endif
8655 break;
8656 } else if ((cur == '<') && (next == '!') &&
8657 (ctxt->input->cur[2] == '-') &&
8658 (ctxt->input->cur[3] == '-')) {
8659 if ((!terminate) &&
8660 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8661 goto done;
8662#ifdef DEBUG_PUSH
8663 xmlGenericError(xmlGenericErrorContext,
8664 "PP: Parsing Comment\n");
8665#endif
8666 xmlParseComment(ctxt);
8667 ctxt->instate = XML_PARSER_CONTENT;
8668 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8669 (ctxt->input->cur[2] == '[') &&
8670 (ctxt->input->cur[3] == 'C') &&
8671 (ctxt->input->cur[4] == 'D') &&
8672 (ctxt->input->cur[5] == 'A') &&
8673 (ctxt->input->cur[6] == 'T') &&
8674 (ctxt->input->cur[7] == 'A') &&
8675 (ctxt->input->cur[8] == '[')) {
8676 SKIP(9);
8677 ctxt->instate = XML_PARSER_CDATA_SECTION;
8678#ifdef DEBUG_PUSH
8679 xmlGenericError(xmlGenericErrorContext,
8680 "PP: entering CDATA_SECTION\n");
8681#endif
8682 break;
8683 } else if ((cur == '<') && (next == '!') &&
8684 (avail < 9)) {
8685 goto done;
8686 } else if (cur == '&') {
8687 if ((!terminate) &&
8688 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8689 goto done;
8690#ifdef DEBUG_PUSH
8691 xmlGenericError(xmlGenericErrorContext,
8692 "PP: Parsing Reference\n");
8693#endif
8694 xmlParseReference(ctxt);
8695 } else {
8696 /* TODO Avoid the extra copy, handle directly !!! */
8697 /*
8698 * Goal of the following test is:
8699 * - minimize calls to the SAX 'character' callback
8700 * when they are mergeable
8701 * - handle an problem for isBlank when we only parse
8702 * a sequence of blank chars and the next one is
8703 * not available to check against '<' presence.
8704 * - tries to homogenize the differences in SAX
8705 * callbacks between the push and pull versions
8706 * of the parser.
8707 */
8708 if ((ctxt->inputNr == 1) &&
8709 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8710 if (!terminate) {
8711 if (ctxt->progressive) {
8712 if ((lastlt == NULL) ||
8713 (ctxt->input->cur > lastlt))
8714 goto done;
8715 } else if (xmlParseLookupSequence(ctxt,
8716 '<', 0, 0) < 0) {
8717 goto done;
8718 }
8719 }
8720 }
8721 ctxt->checkIndex = 0;
8722#ifdef DEBUG_PUSH
8723 xmlGenericError(xmlGenericErrorContext,
8724 "PP: Parsing char data\n");
8725#endif
8726 xmlParseCharData(ctxt, 0);
8727 }
8728 /*
8729 * Pop-up of finished entities.
8730 */
8731 while ((RAW == 0) && (ctxt->inputNr > 1))
8732 xmlPopInput(ctxt);
8733 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8734 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8736 ctxt->sax->error(ctxt->userData,
8737 "detected an error in element content\n");
8738 ctxt->wellFormed = 0;
8739 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8740 ctxt->instate = XML_PARSER_EOF;
8741 break;
8742 }
8743 break;
8744 }
8745 case XML_PARSER_END_TAG:
8746 if (avail < 2)
8747 goto done;
8748 if (!terminate) {
8749 if (ctxt->progressive) {
8750 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8751 goto done;
8752 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8753 goto done;
8754 }
8755 }
8756 xmlParseEndTag(ctxt);
8757 if (ctxt->name == NULL) {
8758 ctxt->instate = XML_PARSER_EPILOG;
8759#ifdef DEBUG_PUSH
8760 xmlGenericError(xmlGenericErrorContext,
8761 "PP: entering EPILOG\n");
8762#endif
8763 } else {
8764 ctxt->instate = XML_PARSER_CONTENT;
8765#ifdef DEBUG_PUSH
8766 xmlGenericError(xmlGenericErrorContext,
8767 "PP: entering CONTENT\n");
8768#endif
8769 }
8770 break;
8771 case XML_PARSER_CDATA_SECTION: {
8772 /*
8773 * The Push mode need to have the SAX callback for
8774 * cdataBlock merge back contiguous callbacks.
8775 */
8776 int base;
8777
8778 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8779 if (base < 0) {
8780 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8781 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8782 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00008783 ctxt->sax->cdataBlock(ctxt->userData,
8784 ctxt->input->cur,
8785 XML_PARSER_BIG_BUFFER_SIZE);
8786 else if (ctxt->sax->characters != NULL)
8787 ctxt->sax->characters(ctxt->userData,
8788 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00008789 XML_PARSER_BIG_BUFFER_SIZE);
8790 }
8791 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8792 ctxt->checkIndex = 0;
8793 }
8794 goto done;
8795 } else {
8796 if ((ctxt->sax != NULL) && (base > 0) &&
8797 (!ctxt->disableSAX)) {
8798 if (ctxt->sax->cdataBlock != NULL)
8799 ctxt->sax->cdataBlock(ctxt->userData,
8800 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00008801 else if (ctxt->sax->characters != NULL)
8802 ctxt->sax->characters(ctxt->userData,
8803 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00008804 }
8805 SKIP(base + 3);
8806 ctxt->checkIndex = 0;
8807 ctxt->instate = XML_PARSER_CONTENT;
8808#ifdef DEBUG_PUSH
8809 xmlGenericError(xmlGenericErrorContext,
8810 "PP: entering CONTENT\n");
8811#endif
8812 }
8813 break;
8814 }
Owen Taylor3473f882001-02-23 17:55:21 +00008815 case XML_PARSER_MISC:
8816 SKIP_BLANKS;
8817 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008818 avail = ctxt->input->length -
8819 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00008820 else
Daniel Veillarda880b122003-04-21 21:36:41 +00008821 avail = ctxt->input->buf->buffer->use -
8822 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00008823 if (avail < 2)
8824 goto done;
8825 cur = ctxt->input->cur[0];
8826 next = ctxt->input->cur[1];
8827 if ((cur == '<') && (next == '?')) {
8828 if ((!terminate) &&
8829 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8830 goto done;
8831#ifdef DEBUG_PUSH
8832 xmlGenericError(xmlGenericErrorContext,
8833 "PP: Parsing PI\n");
8834#endif
8835 xmlParsePI(ctxt);
8836 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00008837 (ctxt->input->cur[2] == '-') &&
8838 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008839 if ((!terminate) &&
8840 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8841 goto done;
8842#ifdef DEBUG_PUSH
8843 xmlGenericError(xmlGenericErrorContext,
8844 "PP: Parsing Comment\n");
8845#endif
8846 xmlParseComment(ctxt);
8847 ctxt->instate = XML_PARSER_MISC;
8848 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00008849 (ctxt->input->cur[2] == 'D') &&
8850 (ctxt->input->cur[3] == 'O') &&
8851 (ctxt->input->cur[4] == 'C') &&
8852 (ctxt->input->cur[5] == 'T') &&
8853 (ctxt->input->cur[6] == 'Y') &&
8854 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008855 (ctxt->input->cur[8] == 'E')) {
8856 if ((!terminate) &&
8857 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8858 goto done;
8859#ifdef DEBUG_PUSH
8860 xmlGenericError(xmlGenericErrorContext,
8861 "PP: Parsing internal subset\n");
8862#endif
8863 ctxt->inSubset = 1;
8864 xmlParseDocTypeDecl(ctxt);
8865 if (RAW == '[') {
8866 ctxt->instate = XML_PARSER_DTD;
8867#ifdef DEBUG_PUSH
8868 xmlGenericError(xmlGenericErrorContext,
8869 "PP: entering DTD\n");
8870#endif
8871 } else {
8872 /*
8873 * Create and update the external subset.
8874 */
8875 ctxt->inSubset = 2;
8876 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8877 (ctxt->sax->externalSubset != NULL))
8878 ctxt->sax->externalSubset(ctxt->userData,
8879 ctxt->intSubName, ctxt->extSubSystem,
8880 ctxt->extSubURI);
8881 ctxt->inSubset = 0;
8882 ctxt->instate = XML_PARSER_PROLOG;
8883#ifdef DEBUG_PUSH
8884 xmlGenericError(xmlGenericErrorContext,
8885 "PP: entering PROLOG\n");
8886#endif
8887 }
8888 } else if ((cur == '<') && (next == '!') &&
8889 (avail < 9)) {
8890 goto done;
8891 } else {
8892 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00008893 ctxt->progressive = 1;
8894 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00008895#ifdef DEBUG_PUSH
8896 xmlGenericError(xmlGenericErrorContext,
8897 "PP: entering START_TAG\n");
8898#endif
8899 }
8900 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008901 case XML_PARSER_PROLOG:
8902 SKIP_BLANKS;
8903 if (ctxt->input->buf == NULL)
8904 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8905 else
8906 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8907 if (avail < 2)
8908 goto done;
8909 cur = ctxt->input->cur[0];
8910 next = ctxt->input->cur[1];
8911 if ((cur == '<') && (next == '?')) {
8912 if ((!terminate) &&
8913 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8914 goto done;
8915#ifdef DEBUG_PUSH
8916 xmlGenericError(xmlGenericErrorContext,
8917 "PP: Parsing PI\n");
8918#endif
8919 xmlParsePI(ctxt);
8920 } else if ((cur == '<') && (next == '!') &&
8921 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8922 if ((!terminate) &&
8923 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8924 goto done;
8925#ifdef DEBUG_PUSH
8926 xmlGenericError(xmlGenericErrorContext,
8927 "PP: Parsing Comment\n");
8928#endif
8929 xmlParseComment(ctxt);
8930 ctxt->instate = XML_PARSER_PROLOG;
8931 } else if ((cur == '<') && (next == '!') &&
8932 (avail < 4)) {
8933 goto done;
8934 } else {
8935 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00008936 ctxt->progressive = 1;
8937 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00008938#ifdef DEBUG_PUSH
8939 xmlGenericError(xmlGenericErrorContext,
8940 "PP: entering START_TAG\n");
8941#endif
8942 }
8943 break;
8944 case XML_PARSER_EPILOG:
8945 SKIP_BLANKS;
8946 if (ctxt->input->buf == NULL)
8947 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8948 else
8949 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8950 if (avail < 2)
8951 goto done;
8952 cur = ctxt->input->cur[0];
8953 next = ctxt->input->cur[1];
8954 if ((cur == '<') && (next == '?')) {
8955 if ((!terminate) &&
8956 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8957 goto done;
8958#ifdef DEBUG_PUSH
8959 xmlGenericError(xmlGenericErrorContext,
8960 "PP: Parsing PI\n");
8961#endif
8962 xmlParsePI(ctxt);
8963 ctxt->instate = XML_PARSER_EPILOG;
8964 } else if ((cur == '<') && (next == '!') &&
8965 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8966 if ((!terminate) &&
8967 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8968 goto done;
8969#ifdef DEBUG_PUSH
8970 xmlGenericError(xmlGenericErrorContext,
8971 "PP: Parsing Comment\n");
8972#endif
8973 xmlParseComment(ctxt);
8974 ctxt->instate = XML_PARSER_EPILOG;
8975 } else if ((cur == '<') && (next == '!') &&
8976 (avail < 4)) {
8977 goto done;
8978 } else {
8979 ctxt->errNo = XML_ERR_DOCUMENT_END;
8980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8981 ctxt->sax->error(ctxt->userData,
8982 "Extra content at the end of the document\n");
8983 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008984 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008985 ctxt->instate = XML_PARSER_EOF;
8986#ifdef DEBUG_PUSH
8987 xmlGenericError(xmlGenericErrorContext,
8988 "PP: entering EOF\n");
8989#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008990 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008991 ctxt->sax->endDocument(ctxt->userData);
8992 goto done;
8993 }
8994 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008995 case XML_PARSER_DTD: {
8996 /*
8997 * Sorry but progressive parsing of the internal subset
8998 * is not expected to be supported. We first check that
8999 * the full content of the internal subset is available and
9000 * the parsing is launched only at that point.
9001 * Internal subset ends up with "']' S? '>'" in an unescaped
9002 * section and not in a ']]>' sequence which are conditional
9003 * sections (whoever argued to keep that crap in XML deserve
9004 * a place in hell !).
9005 */
9006 int base, i;
9007 xmlChar *buf;
9008 xmlChar quote = 0;
9009
9010 base = ctxt->input->cur - ctxt->input->base;
9011 if (base < 0) return(0);
9012 if (ctxt->checkIndex > base)
9013 base = ctxt->checkIndex;
9014 buf = ctxt->input->buf->buffer->content;
9015 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9016 base++) {
9017 if (quote != 0) {
9018 if (buf[base] == quote)
9019 quote = 0;
9020 continue;
9021 }
9022 if (buf[base] == '"') {
9023 quote = '"';
9024 continue;
9025 }
9026 if (buf[base] == '\'') {
9027 quote = '\'';
9028 continue;
9029 }
9030 if (buf[base] == ']') {
9031 if ((unsigned int) base +1 >=
9032 ctxt->input->buf->buffer->use)
9033 break;
9034 if (buf[base + 1] == ']') {
9035 /* conditional crap, skip both ']' ! */
9036 base++;
9037 continue;
9038 }
9039 for (i = 0;
9040 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9041 i++) {
9042 if (buf[base + i] == '>')
9043 goto found_end_int_subset;
9044 }
9045 break;
9046 }
9047 }
9048 /*
9049 * We didn't found the end of the Internal subset
9050 */
9051 if (quote == 0)
9052 ctxt->checkIndex = base;
9053#ifdef DEBUG_PUSH
9054 if (next == 0)
9055 xmlGenericError(xmlGenericErrorContext,
9056 "PP: lookup of int subset end filed\n");
9057#endif
9058 goto done;
9059
9060found_end_int_subset:
9061 xmlParseInternalSubset(ctxt);
9062 ctxt->inSubset = 2;
9063 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9064 (ctxt->sax->externalSubset != NULL))
9065 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9066 ctxt->extSubSystem, ctxt->extSubURI);
9067 ctxt->inSubset = 0;
9068 ctxt->instate = XML_PARSER_PROLOG;
9069 ctxt->checkIndex = 0;
9070#ifdef DEBUG_PUSH
9071 xmlGenericError(xmlGenericErrorContext,
9072 "PP: entering PROLOG\n");
9073#endif
9074 break;
9075 }
9076 case XML_PARSER_COMMENT:
9077 xmlGenericError(xmlGenericErrorContext,
9078 "PP: internal error, state == COMMENT\n");
9079 ctxt->instate = XML_PARSER_CONTENT;
9080#ifdef DEBUG_PUSH
9081 xmlGenericError(xmlGenericErrorContext,
9082 "PP: entering CONTENT\n");
9083#endif
9084 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009085 case XML_PARSER_IGNORE:
9086 xmlGenericError(xmlGenericErrorContext,
9087 "PP: internal error, state == IGNORE");
9088 ctxt->instate = XML_PARSER_DTD;
9089#ifdef DEBUG_PUSH
9090 xmlGenericError(xmlGenericErrorContext,
9091 "PP: entering DTD\n");
9092#endif
9093 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009094 case XML_PARSER_PI:
9095 xmlGenericError(xmlGenericErrorContext,
9096 "PP: internal error, state == PI\n");
9097 ctxt->instate = XML_PARSER_CONTENT;
9098#ifdef DEBUG_PUSH
9099 xmlGenericError(xmlGenericErrorContext,
9100 "PP: entering CONTENT\n");
9101#endif
9102 break;
9103 case XML_PARSER_ENTITY_DECL:
9104 xmlGenericError(xmlGenericErrorContext,
9105 "PP: internal error, state == ENTITY_DECL\n");
9106 ctxt->instate = XML_PARSER_DTD;
9107#ifdef DEBUG_PUSH
9108 xmlGenericError(xmlGenericErrorContext,
9109 "PP: entering DTD\n");
9110#endif
9111 break;
9112 case XML_PARSER_ENTITY_VALUE:
9113 xmlGenericError(xmlGenericErrorContext,
9114 "PP: internal error, state == ENTITY_VALUE\n");
9115 ctxt->instate = XML_PARSER_CONTENT;
9116#ifdef DEBUG_PUSH
9117 xmlGenericError(xmlGenericErrorContext,
9118 "PP: entering DTD\n");
9119#endif
9120 break;
9121 case XML_PARSER_ATTRIBUTE_VALUE:
9122 xmlGenericError(xmlGenericErrorContext,
9123 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9124 ctxt->instate = XML_PARSER_START_TAG;
9125#ifdef DEBUG_PUSH
9126 xmlGenericError(xmlGenericErrorContext,
9127 "PP: entering START_TAG\n");
9128#endif
9129 break;
9130 case XML_PARSER_SYSTEM_LITERAL:
9131 xmlGenericError(xmlGenericErrorContext,
9132 "PP: internal error, state == SYSTEM_LITERAL\n");
9133 ctxt->instate = XML_PARSER_START_TAG;
9134#ifdef DEBUG_PUSH
9135 xmlGenericError(xmlGenericErrorContext,
9136 "PP: entering START_TAG\n");
9137#endif
9138 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009139 case XML_PARSER_PUBLIC_LITERAL:
9140 xmlGenericError(xmlGenericErrorContext,
9141 "PP: internal error, state == PUBLIC_LITERAL\n");
9142 ctxt->instate = XML_PARSER_START_TAG;
9143#ifdef DEBUG_PUSH
9144 xmlGenericError(xmlGenericErrorContext,
9145 "PP: entering START_TAG\n");
9146#endif
9147 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009148 }
9149 }
9150done:
9151#ifdef DEBUG_PUSH
9152 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9153#endif
9154 return(ret);
9155}
9156
9157/**
Owen Taylor3473f882001-02-23 17:55:21 +00009158 * xmlParseChunk:
9159 * @ctxt: an XML parser context
9160 * @chunk: an char array
9161 * @size: the size in byte of the chunk
9162 * @terminate: last chunk indicator
9163 *
9164 * Parse a Chunk of memory
9165 *
9166 * Returns zero if no error, the xmlParserErrors otherwise.
9167 */
9168int
9169xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9170 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009171 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9172 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009173 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9174 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9175 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9176 int cur = ctxt->input->cur - ctxt->input->base;
9177
9178 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9179 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9180 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009181 ctxt->input->end =
9182 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009183#ifdef DEBUG_PUSH
9184 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9185#endif
9186
Daniel Veillarda880b122003-04-21 21:36:41 +00009187#if 0
Owen Taylor3473f882001-02-23 17:55:21 +00009188 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9189 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda880b122003-04-21 21:36:41 +00009190#endif
Owen Taylor3473f882001-02-23 17:55:21 +00009191 } else if (ctxt->instate != XML_PARSER_EOF) {
9192 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9193 xmlParserInputBufferPtr in = ctxt->input->buf;
9194 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9195 (in->raw != NULL)) {
9196 int nbchars;
9197
9198 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9199 if (nbchars < 0) {
9200 xmlGenericError(xmlGenericErrorContext,
9201 "xmlParseChunk: encoder error\n");
9202 return(XML_ERR_INVALID_ENCODING);
9203 }
9204 }
9205 }
9206 }
9207 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009208 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9209 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009210 if (terminate) {
9211 /*
9212 * Check for termination
9213 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009214 int avail = 0;
9215 if (ctxt->input->buf == NULL)
9216 avail = ctxt->input->length -
9217 (ctxt->input->cur - ctxt->input->base);
9218 else
9219 avail = ctxt->input->buf->buffer->use -
9220 (ctxt->input->cur - ctxt->input->base);
9221
Owen Taylor3473f882001-02-23 17:55:21 +00009222 if ((ctxt->instate != XML_PARSER_EOF) &&
9223 (ctxt->instate != XML_PARSER_EPILOG)) {
9224 ctxt->errNo = XML_ERR_DOCUMENT_END;
9225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9226 ctxt->sax->error(ctxt->userData,
9227 "Extra content at the end of the document\n");
9228 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009229 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009230 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009231 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9232 ctxt->errNo = XML_ERR_DOCUMENT_END;
9233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9234 ctxt->sax->error(ctxt->userData,
9235 "Extra content at the end of the document\n");
9236 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009237 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009238
9239 }
Owen Taylor3473f882001-02-23 17:55:21 +00009240 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009241 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009242 ctxt->sax->endDocument(ctxt->userData);
9243 }
9244 ctxt->instate = XML_PARSER_EOF;
9245 }
9246 return((xmlParserErrors) ctxt->errNo);
9247}
9248
9249/************************************************************************
9250 * *
9251 * I/O front end functions to the parser *
9252 * *
9253 ************************************************************************/
9254
9255/**
9256 * xmlStopParser:
9257 * @ctxt: an XML parser context
9258 *
9259 * Blocks further parser processing
9260 */
9261void
9262xmlStopParser(xmlParserCtxtPtr ctxt) {
9263 ctxt->instate = XML_PARSER_EOF;
9264 if (ctxt->input != NULL)
9265 ctxt->input->cur = BAD_CAST"";
9266}
9267
9268/**
9269 * xmlCreatePushParserCtxt:
9270 * @sax: a SAX handler
9271 * @user_data: The user data returned on SAX callbacks
9272 * @chunk: a pointer to an array of chars
9273 * @size: number of chars in the array
9274 * @filename: an optional file name or URI
9275 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009276 * Create a parser context for using the XML parser in push mode.
9277 * If @buffer and @size are non-NULL, the data is used to detect
9278 * the encoding. The remaining characters will be parsed so they
9279 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009280 * To allow content encoding detection, @size should be >= 4
9281 * The value of @filename is used for fetching external entities
9282 * and error/warning reports.
9283 *
9284 * Returns the new parser context or NULL
9285 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009286
Owen Taylor3473f882001-02-23 17:55:21 +00009287xmlParserCtxtPtr
9288xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9289 const char *chunk, int size, const char *filename) {
9290 xmlParserCtxtPtr ctxt;
9291 xmlParserInputPtr inputStream;
9292 xmlParserInputBufferPtr buf;
9293 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9294
9295 /*
9296 * plug some encoding conversion routines
9297 */
9298 if ((chunk != NULL) && (size >= 4))
9299 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9300
9301 buf = xmlAllocParserInputBuffer(enc);
9302 if (buf == NULL) return(NULL);
9303
9304 ctxt = xmlNewParserCtxt();
9305 if (ctxt == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009306 xmlGenericError(xmlGenericErrorContext,
9307 "xml parser: out of memory\n");
9308 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009309 return(NULL);
9310 }
9311 if (sax != NULL) {
9312 if (ctxt->sax != &xmlDefaultSAXHandler)
9313 xmlFree(ctxt->sax);
9314 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9315 if (ctxt->sax == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009316 xmlGenericError(xmlGenericErrorContext,
9317 "xml parser: out of memory\n");
9318 xmlFreeParserInputBuffer(buf);
9319 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009320 return(NULL);
9321 }
9322 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9323 if (user_data != NULL)
9324 ctxt->userData = user_data;
9325 }
9326 if (filename == NULL) {
9327 ctxt->directory = NULL;
9328 } else {
9329 ctxt->directory = xmlParserGetDirectory(filename);
9330 }
9331
9332 inputStream = xmlNewInputStream(ctxt);
9333 if (inputStream == NULL) {
9334 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009335 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009336 return(NULL);
9337 }
9338
9339 if (filename == NULL)
9340 inputStream->filename = NULL;
9341 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009342 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009343 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009344 inputStream->buf = buf;
9345 inputStream->base = inputStream->buf->buffer->content;
9346 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009347 inputStream->end =
9348 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009349
9350 inputPush(ctxt, inputStream);
9351
9352 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9353 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009354 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9355 int cur = ctxt->input->cur - ctxt->input->base;
9356
Owen Taylor3473f882001-02-23 17:55:21 +00009357 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009358
9359 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9360 ctxt->input->cur = ctxt->input->base + cur;
9361 ctxt->input->end =
9362 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009363#ifdef DEBUG_PUSH
9364 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9365#endif
9366 }
9367
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009368 if (enc != XML_CHAR_ENCODING_NONE) {
9369 xmlSwitchEncoding(ctxt, enc);
9370 }
9371
Owen Taylor3473f882001-02-23 17:55:21 +00009372 return(ctxt);
9373}
9374
9375/**
9376 * xmlCreateIOParserCtxt:
9377 * @sax: a SAX handler
9378 * @user_data: The user data returned on SAX callbacks
9379 * @ioread: an I/O read function
9380 * @ioclose: an I/O close function
9381 * @ioctx: an I/O handler
9382 * @enc: the charset encoding if known
9383 *
9384 * Create a parser context for using the XML parser with an existing
9385 * I/O stream
9386 *
9387 * Returns the new parser context or NULL
9388 */
9389xmlParserCtxtPtr
9390xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9391 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9392 void *ioctx, xmlCharEncoding enc) {
9393 xmlParserCtxtPtr ctxt;
9394 xmlParserInputPtr inputStream;
9395 xmlParserInputBufferPtr buf;
9396
9397 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9398 if (buf == NULL) return(NULL);
9399
9400 ctxt = xmlNewParserCtxt();
9401 if (ctxt == NULL) {
9402 xmlFree(buf);
9403 return(NULL);
9404 }
9405 if (sax != NULL) {
9406 if (ctxt->sax != &xmlDefaultSAXHandler)
9407 xmlFree(ctxt->sax);
9408 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9409 if (ctxt->sax == NULL) {
9410 xmlFree(buf);
9411 xmlFree(ctxt);
9412 return(NULL);
9413 }
9414 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9415 if (user_data != NULL)
9416 ctxt->userData = user_data;
9417 }
9418
9419 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9420 if (inputStream == NULL) {
9421 xmlFreeParserCtxt(ctxt);
9422 return(NULL);
9423 }
9424 inputPush(ctxt, inputStream);
9425
9426 return(ctxt);
9427}
9428
9429/************************************************************************
9430 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009431 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009432 * *
9433 ************************************************************************/
9434
9435/**
9436 * xmlIOParseDTD:
9437 * @sax: the SAX handler block or NULL
9438 * @input: an Input Buffer
9439 * @enc: the charset encoding if known
9440 *
9441 * Load and parse a DTD
9442 *
9443 * Returns the resulting xmlDtdPtr or NULL in case of error.
9444 * @input will be freed at parsing end.
9445 */
9446
9447xmlDtdPtr
9448xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9449 xmlCharEncoding enc) {
9450 xmlDtdPtr ret = NULL;
9451 xmlParserCtxtPtr ctxt;
9452 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009453 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009454
9455 if (input == NULL)
9456 return(NULL);
9457
9458 ctxt = xmlNewParserCtxt();
9459 if (ctxt == NULL) {
9460 return(NULL);
9461 }
9462
9463 /*
9464 * Set-up the SAX context
9465 */
9466 if (sax != NULL) {
9467 if (ctxt->sax != NULL)
9468 xmlFree(ctxt->sax);
9469 ctxt->sax = sax;
9470 ctxt->userData = NULL;
9471 }
9472
9473 /*
9474 * generate a parser input from the I/O handler
9475 */
9476
9477 pinput = xmlNewIOInputStream(ctxt, input, enc);
9478 if (pinput == NULL) {
9479 if (sax != NULL) ctxt->sax = NULL;
9480 xmlFreeParserCtxt(ctxt);
9481 return(NULL);
9482 }
9483
9484 /*
9485 * plug some encoding conversion routines here.
9486 */
9487 xmlPushInput(ctxt, pinput);
9488
9489 pinput->filename = NULL;
9490 pinput->line = 1;
9491 pinput->col = 1;
9492 pinput->base = ctxt->input->cur;
9493 pinput->cur = ctxt->input->cur;
9494 pinput->free = NULL;
9495
9496 /*
9497 * let's parse that entity knowing it's an external subset.
9498 */
9499 ctxt->inSubset = 2;
9500 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9501 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9502 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009503
9504 if (enc == XML_CHAR_ENCODING_NONE) {
9505 /*
9506 * Get the 4 first bytes and decode the charset
9507 * if enc != XML_CHAR_ENCODING_NONE
9508 * plug some encoding conversion routines.
9509 */
9510 start[0] = RAW;
9511 start[1] = NXT(1);
9512 start[2] = NXT(2);
9513 start[3] = NXT(3);
9514 enc = xmlDetectCharEncoding(start, 4);
9515 if (enc != XML_CHAR_ENCODING_NONE) {
9516 xmlSwitchEncoding(ctxt, enc);
9517 }
9518 }
9519
Owen Taylor3473f882001-02-23 17:55:21 +00009520 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9521
9522 if (ctxt->myDoc != NULL) {
9523 if (ctxt->wellFormed) {
9524 ret = ctxt->myDoc->extSubset;
9525 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +00009526 if (ret != NULL) {
9527 xmlNodePtr tmp;
9528
9529 ret->doc = NULL;
9530 tmp = ret->children;
9531 while (tmp != NULL) {
9532 tmp->doc = NULL;
9533 tmp = tmp->next;
9534 }
9535 }
Owen Taylor3473f882001-02-23 17:55:21 +00009536 } else {
9537 ret = NULL;
9538 }
9539 xmlFreeDoc(ctxt->myDoc);
9540 ctxt->myDoc = NULL;
9541 }
9542 if (sax != NULL) ctxt->sax = NULL;
9543 xmlFreeParserCtxt(ctxt);
9544
9545 return(ret);
9546}
9547
9548/**
9549 * xmlSAXParseDTD:
9550 * @sax: the SAX handler block
9551 * @ExternalID: a NAME* containing the External ID of the DTD
9552 * @SystemID: a NAME* containing the URL to the DTD
9553 *
9554 * Load and parse an external subset.
9555 *
9556 * Returns the resulting xmlDtdPtr or NULL in case of error.
9557 */
9558
9559xmlDtdPtr
9560xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9561 const xmlChar *SystemID) {
9562 xmlDtdPtr ret = NULL;
9563 xmlParserCtxtPtr ctxt;
9564 xmlParserInputPtr input = NULL;
9565 xmlCharEncoding enc;
9566
9567 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9568
9569 ctxt = xmlNewParserCtxt();
9570 if (ctxt == NULL) {
9571 return(NULL);
9572 }
9573
9574 /*
9575 * Set-up the SAX context
9576 */
9577 if (sax != NULL) {
9578 if (ctxt->sax != NULL)
9579 xmlFree(ctxt->sax);
9580 ctxt->sax = sax;
9581 ctxt->userData = NULL;
9582 }
9583
9584 /*
9585 * Ask the Entity resolver to load the damn thing
9586 */
9587
9588 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +00009589 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +00009590 if (input == NULL) {
9591 if (sax != NULL) ctxt->sax = NULL;
9592 xmlFreeParserCtxt(ctxt);
9593 return(NULL);
9594 }
9595
9596 /*
9597 * plug some encoding conversion routines here.
9598 */
9599 xmlPushInput(ctxt, input);
9600 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9601 xmlSwitchEncoding(ctxt, enc);
9602
9603 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +00009604 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +00009605 input->line = 1;
9606 input->col = 1;
9607 input->base = ctxt->input->cur;
9608 input->cur = ctxt->input->cur;
9609 input->free = NULL;
9610
9611 /*
9612 * let's parse that entity knowing it's an external subset.
9613 */
9614 ctxt->inSubset = 2;
9615 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9616 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9617 ExternalID, SystemID);
9618 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9619
9620 if (ctxt->myDoc != NULL) {
9621 if (ctxt->wellFormed) {
9622 ret = ctxt->myDoc->extSubset;
9623 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +00009624 if (ret != NULL) {
9625 xmlNodePtr tmp;
9626
9627 ret->doc = NULL;
9628 tmp = ret->children;
9629 while (tmp != NULL) {
9630 tmp->doc = NULL;
9631 tmp = tmp->next;
9632 }
9633 }
Owen Taylor3473f882001-02-23 17:55:21 +00009634 } else {
9635 ret = NULL;
9636 }
9637 xmlFreeDoc(ctxt->myDoc);
9638 ctxt->myDoc = NULL;
9639 }
9640 if (sax != NULL) ctxt->sax = NULL;
9641 xmlFreeParserCtxt(ctxt);
9642
9643 return(ret);
9644}
9645
9646/**
9647 * xmlParseDTD:
9648 * @ExternalID: a NAME* containing the External ID of the DTD
9649 * @SystemID: a NAME* containing the URL to the DTD
9650 *
9651 * Load and parse an external subset.
9652 *
9653 * Returns the resulting xmlDtdPtr or NULL in case of error.
9654 */
9655
9656xmlDtdPtr
9657xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9658 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9659}
9660
9661/************************************************************************
9662 * *
9663 * Front ends when parsing an Entity *
9664 * *
9665 ************************************************************************/
9666
9667/**
Owen Taylor3473f882001-02-23 17:55:21 +00009668 * xmlParseCtxtExternalEntity:
9669 * @ctx: the existing parsing context
9670 * @URL: the URL for the entity to load
9671 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009672 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009673 *
9674 * Parse an external general entity within an existing parsing context
9675 * An external general parsed entity is well-formed if it matches the
9676 * production labeled extParsedEnt.
9677 *
9678 * [78] extParsedEnt ::= TextDecl? content
9679 *
9680 * Returns 0 if the entity is well formed, -1 in case of args problem and
9681 * the parser error code otherwise
9682 */
9683
9684int
9685xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009686 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009687 xmlParserCtxtPtr ctxt;
9688 xmlDocPtr newDoc;
9689 xmlSAXHandlerPtr oldsax = NULL;
9690 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009691 xmlChar start[4];
9692 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009693
9694 if (ctx->depth > 40) {
9695 return(XML_ERR_ENTITY_LOOP);
9696 }
9697
Daniel Veillardcda96922001-08-21 10:56:31 +00009698 if (lst != NULL)
9699 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009700 if ((URL == NULL) && (ID == NULL))
9701 return(-1);
9702 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9703 return(-1);
9704
9705
9706 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9707 if (ctxt == NULL) return(-1);
9708 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +00009709 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +00009710 oldsax = ctxt->sax;
9711 ctxt->sax = ctx->sax;
9712 newDoc = xmlNewDoc(BAD_CAST "1.0");
9713 if (newDoc == NULL) {
9714 xmlFreeParserCtxt(ctxt);
9715 return(-1);
9716 }
9717 if (ctx->myDoc != NULL) {
9718 newDoc->intSubset = ctx->myDoc->intSubset;
9719 newDoc->extSubset = ctx->myDoc->extSubset;
9720 }
9721 if (ctx->myDoc->URL != NULL) {
9722 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9723 }
9724 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9725 if (newDoc->children == NULL) {
9726 ctxt->sax = oldsax;
9727 xmlFreeParserCtxt(ctxt);
9728 newDoc->intSubset = NULL;
9729 newDoc->extSubset = NULL;
9730 xmlFreeDoc(newDoc);
9731 return(-1);
9732 }
9733 nodePush(ctxt, newDoc->children);
9734 if (ctx->myDoc == NULL) {
9735 ctxt->myDoc = newDoc;
9736 } else {
9737 ctxt->myDoc = ctx->myDoc;
9738 newDoc->children->doc = ctx->myDoc;
9739 }
9740
Daniel Veillard87a764e2001-06-20 17:41:10 +00009741 /*
9742 * Get the 4 first bytes and decode the charset
9743 * if enc != XML_CHAR_ENCODING_NONE
9744 * plug some encoding conversion routines.
9745 */
9746 GROW
9747 start[0] = RAW;
9748 start[1] = NXT(1);
9749 start[2] = NXT(2);
9750 start[3] = NXT(3);
9751 enc = xmlDetectCharEncoding(start, 4);
9752 if (enc != XML_CHAR_ENCODING_NONE) {
9753 xmlSwitchEncoding(ctxt, enc);
9754 }
9755
Owen Taylor3473f882001-02-23 17:55:21 +00009756 /*
9757 * Parse a possible text declaration first
9758 */
Owen Taylor3473f882001-02-23 17:55:21 +00009759 if ((RAW == '<') && (NXT(1) == '?') &&
9760 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9761 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9762 xmlParseTextDecl(ctxt);
9763 }
9764
9765 /*
9766 * Doing validity checking on chunk doesn't make sense
9767 */
9768 ctxt->instate = XML_PARSER_CONTENT;
9769 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009770 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009771 ctxt->loadsubset = ctx->loadsubset;
9772 ctxt->depth = ctx->depth + 1;
9773 ctxt->replaceEntities = ctx->replaceEntities;
9774 if (ctxt->validate) {
9775 ctxt->vctxt.error = ctx->vctxt.error;
9776 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009777 } else {
9778 ctxt->vctxt.error = NULL;
9779 ctxt->vctxt.warning = NULL;
9780 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009781 ctxt->vctxt.nodeTab = NULL;
9782 ctxt->vctxt.nodeNr = 0;
9783 ctxt->vctxt.nodeMax = 0;
9784 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009785
9786 xmlParseContent(ctxt);
9787
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009788 ctx->validate = ctxt->validate;
9789 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009790 if ((RAW == '<') && (NXT(1) == '/')) {
9791 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9793 ctxt->sax->error(ctxt->userData,
9794 "chunk is not well balanced\n");
9795 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009796 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009797 } else if (RAW != 0) {
9798 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9800 ctxt->sax->error(ctxt->userData,
9801 "extra content at the end of well balanced chunk\n");
9802 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009803 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009804 }
9805 if (ctxt->node != newDoc->children) {
9806 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9808 ctxt->sax->error(ctxt->userData,
9809 "chunk is not well balanced\n");
9810 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009811 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009812 }
9813
9814 if (!ctxt->wellFormed) {
9815 if (ctxt->errNo == 0)
9816 ret = 1;
9817 else
9818 ret = ctxt->errNo;
9819 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009820 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009821 xmlNodePtr cur;
9822
9823 /*
9824 * Return the newly created nodeset after unlinking it from
9825 * they pseudo parent.
9826 */
9827 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009828 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009829 while (cur != NULL) {
9830 cur->parent = NULL;
9831 cur = cur->next;
9832 }
9833 newDoc->children->children = NULL;
9834 }
9835 ret = 0;
9836 }
9837 ctxt->sax = oldsax;
9838 xmlFreeParserCtxt(ctxt);
9839 newDoc->intSubset = NULL;
9840 newDoc->extSubset = NULL;
9841 xmlFreeDoc(newDoc);
9842
9843 return(ret);
9844}
9845
9846/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009847 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009848 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009849 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009850 * @sax: the SAX handler bloc (possibly NULL)
9851 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9852 * @depth: Used for loop detection, use 0
9853 * @URL: the URL for the entity to load
9854 * @ID: the System ID for the entity to load
9855 * @list: the return value for the set of parsed nodes
9856 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009857 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009858 *
9859 * Returns 0 if the entity is well formed, -1 in case of args problem and
9860 * the parser error code otherwise
9861 */
9862
Daniel Veillard257d9102001-05-08 10:41:44 +00009863static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009864xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9865 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009866 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009867 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009868 xmlParserCtxtPtr ctxt;
9869 xmlDocPtr newDoc;
9870 xmlSAXHandlerPtr oldsax = NULL;
9871 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009872 xmlChar start[4];
9873 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009874
9875 if (depth > 40) {
9876 return(XML_ERR_ENTITY_LOOP);
9877 }
9878
9879
9880
9881 if (list != NULL)
9882 *list = NULL;
9883 if ((URL == NULL) && (ID == NULL))
9884 return(-1);
9885 if (doc == NULL) /* @@ relax but check for dereferences */
9886 return(-1);
9887
9888
9889 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9890 if (ctxt == NULL) return(-1);
9891 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009892 if (oldctxt != NULL) {
9893 ctxt->_private = oldctxt->_private;
9894 ctxt->loadsubset = oldctxt->loadsubset;
9895 ctxt->validate = oldctxt->validate;
9896 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009897 ctxt->record_info = oldctxt->record_info;
9898 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
9899 ctxt->node_seq.length = oldctxt->node_seq.length;
9900 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009901 } else {
9902 /*
9903 * Doing validity checking on chunk without context
9904 * doesn't make sense
9905 */
9906 ctxt->_private = NULL;
9907 ctxt->validate = 0;
9908 ctxt->external = 2;
9909 ctxt->loadsubset = 0;
9910 }
Owen Taylor3473f882001-02-23 17:55:21 +00009911 if (sax != NULL) {
9912 oldsax = ctxt->sax;
9913 ctxt->sax = sax;
9914 if (user_data != NULL)
9915 ctxt->userData = user_data;
9916 }
9917 newDoc = xmlNewDoc(BAD_CAST "1.0");
9918 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009919 ctxt->node_seq.maximum = 0;
9920 ctxt->node_seq.length = 0;
9921 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009922 xmlFreeParserCtxt(ctxt);
9923 return(-1);
9924 }
9925 if (doc != NULL) {
9926 newDoc->intSubset = doc->intSubset;
9927 newDoc->extSubset = doc->extSubset;
9928 }
9929 if (doc->URL != NULL) {
9930 newDoc->URL = xmlStrdup(doc->URL);
9931 }
9932 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9933 if (newDoc->children == NULL) {
9934 if (sax != NULL)
9935 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009936 ctxt->node_seq.maximum = 0;
9937 ctxt->node_seq.length = 0;
9938 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009939 xmlFreeParserCtxt(ctxt);
9940 newDoc->intSubset = NULL;
9941 newDoc->extSubset = NULL;
9942 xmlFreeDoc(newDoc);
9943 return(-1);
9944 }
9945 nodePush(ctxt, newDoc->children);
9946 if (doc == NULL) {
9947 ctxt->myDoc = newDoc;
9948 } else {
9949 ctxt->myDoc = doc;
9950 newDoc->children->doc = doc;
9951 }
9952
Daniel Veillard87a764e2001-06-20 17:41:10 +00009953 /*
9954 * Get the 4 first bytes and decode the charset
9955 * if enc != XML_CHAR_ENCODING_NONE
9956 * plug some encoding conversion routines.
9957 */
9958 GROW;
9959 start[0] = RAW;
9960 start[1] = NXT(1);
9961 start[2] = NXT(2);
9962 start[3] = NXT(3);
9963 enc = xmlDetectCharEncoding(start, 4);
9964 if (enc != XML_CHAR_ENCODING_NONE) {
9965 xmlSwitchEncoding(ctxt, enc);
9966 }
9967
Owen Taylor3473f882001-02-23 17:55:21 +00009968 /*
9969 * Parse a possible text declaration first
9970 */
Owen Taylor3473f882001-02-23 17:55:21 +00009971 if ((RAW == '<') && (NXT(1) == '?') &&
9972 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9973 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9974 xmlParseTextDecl(ctxt);
9975 }
9976
Owen Taylor3473f882001-02-23 17:55:21 +00009977 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009978 ctxt->depth = depth;
9979
9980 xmlParseContent(ctxt);
9981
Daniel Veillard561b7f82002-03-20 21:55:57 +00009982 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009983 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9985 ctxt->sax->error(ctxt->userData,
9986 "chunk is not well balanced\n");
9987 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009988 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009989 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009990 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9992 ctxt->sax->error(ctxt->userData,
9993 "extra content at the end of well balanced chunk\n");
9994 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009995 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009996 }
9997 if (ctxt->node != newDoc->children) {
9998 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10000 ctxt->sax->error(ctxt->userData,
10001 "chunk is not well balanced\n");
10002 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010003 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010004 }
10005
10006 if (!ctxt->wellFormed) {
10007 if (ctxt->errNo == 0)
10008 ret = 1;
10009 else
10010 ret = ctxt->errNo;
10011 } else {
10012 if (list != NULL) {
10013 xmlNodePtr cur;
10014
10015 /*
10016 * Return the newly created nodeset after unlinking it from
10017 * they pseudo parent.
10018 */
10019 cur = newDoc->children->children;
10020 *list = cur;
10021 while (cur != NULL) {
10022 cur->parent = NULL;
10023 cur = cur->next;
10024 }
10025 newDoc->children->children = NULL;
10026 }
10027 ret = 0;
10028 }
10029 if (sax != NULL)
10030 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010031 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10032 oldctxt->node_seq.length = ctxt->node_seq.length;
10033 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010034 ctxt->node_seq.maximum = 0;
10035 ctxt->node_seq.length = 0;
10036 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010037 xmlFreeParserCtxt(ctxt);
10038 newDoc->intSubset = NULL;
10039 newDoc->extSubset = NULL;
10040 xmlFreeDoc(newDoc);
10041
10042 return(ret);
10043}
10044
10045/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010046 * xmlParseExternalEntity:
10047 * @doc: the document the chunk pertains to
10048 * @sax: the SAX handler bloc (possibly NULL)
10049 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10050 * @depth: Used for loop detection, use 0
10051 * @URL: the URL for the entity to load
10052 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010053 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010054 *
10055 * Parse an external general entity
10056 * An external general parsed entity is well-formed if it matches the
10057 * production labeled extParsedEnt.
10058 *
10059 * [78] extParsedEnt ::= TextDecl? content
10060 *
10061 * Returns 0 if the entity is well formed, -1 in case of args problem and
10062 * the parser error code otherwise
10063 */
10064
10065int
10066xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010067 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010068 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010069 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010070}
10071
10072/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010073 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010074 * @doc: the document the chunk pertains to
10075 * @sax: the SAX handler bloc (possibly NULL)
10076 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10077 * @depth: Used for loop detection, use 0
10078 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010079 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010080 *
10081 * Parse a well-balanced chunk of an XML document
10082 * called by the parser
10083 * The allowed sequence for the Well Balanced Chunk is the one defined by
10084 * the content production in the XML grammar:
10085 *
10086 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10087 *
10088 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10089 * the parser error code otherwise
10090 */
10091
10092int
10093xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010094 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010095 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10096 depth, string, lst, 0 );
10097}
10098
10099/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010100 * xmlParseBalancedChunkMemoryInternal:
10101 * @oldctxt: the existing parsing context
10102 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10103 * @user_data: the user data field for the parser context
10104 * @lst: the return value for the set of parsed nodes
10105 *
10106 *
10107 * Parse a well-balanced chunk of an XML document
10108 * called by the parser
10109 * The allowed sequence for the Well Balanced Chunk is the one defined by
10110 * the content production in the XML grammar:
10111 *
10112 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10113 *
10114 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10115 * the parser error code otherwise
10116 *
10117 * In case recover is set to 1, the nodelist will not be empty even if
10118 * the parsed chunk is not well balanced.
10119 */
10120static int
10121xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10122 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10123 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010124 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010125 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010126 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010127 int size;
10128 int ret = 0;
10129
10130 if (oldctxt->depth > 40) {
10131 return(XML_ERR_ENTITY_LOOP);
10132 }
10133
10134
10135 if (lst != NULL)
10136 *lst = NULL;
10137 if (string == NULL)
10138 return(-1);
10139
10140 size = xmlStrlen(string);
10141
10142 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10143 if (ctxt == NULL) return(-1);
10144 if (user_data != NULL)
10145 ctxt->userData = user_data;
10146 else
10147 ctxt->userData = ctxt;
10148
10149 oldsax = ctxt->sax;
10150 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010151 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010152 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010153 newDoc = xmlNewDoc(BAD_CAST "1.0");
10154 if (newDoc == NULL) {
10155 ctxt->sax = oldsax;
10156 xmlFreeParserCtxt(ctxt);
10157 return(-1);
10158 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010159 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010160 } else {
10161 ctxt->myDoc = oldctxt->myDoc;
10162 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010163 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010164 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010165 BAD_CAST "pseudoroot", NULL);
10166 if (ctxt->myDoc->children == NULL) {
10167 ctxt->sax = oldsax;
10168 xmlFreeParserCtxt(ctxt);
10169 if (newDoc != NULL)
10170 xmlFreeDoc(newDoc);
10171 return(-1);
10172 }
10173 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010174 ctxt->instate = XML_PARSER_CONTENT;
10175 ctxt->depth = oldctxt->depth + 1;
10176
Daniel Veillard328f48c2002-11-15 15:24:34 +000010177 ctxt->validate = 0;
10178 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010179 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10180 /*
10181 * ID/IDREF registration will be done in xmlValidateElement below
10182 */
10183 ctxt->loadsubset |= XML_SKIP_IDS;
10184 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010185
Daniel Veillard68e9e742002-11-16 15:35:11 +000010186 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010187 if ((RAW == '<') && (NXT(1) == '/')) {
10188 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10190 ctxt->sax->error(ctxt->userData,
10191 "chunk is not well balanced\n");
10192 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010193 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010194 } else if (RAW != 0) {
10195 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10197 ctxt->sax->error(ctxt->userData,
10198 "extra content at the end of well balanced chunk\n");
10199 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010200 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010201 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010202 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010203 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10205 ctxt->sax->error(ctxt->userData,
10206 "chunk is not well balanced\n");
10207 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010208 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010209 }
10210
10211 if (!ctxt->wellFormed) {
10212 if (ctxt->errNo == 0)
10213 ret = 1;
10214 else
10215 ret = ctxt->errNo;
10216 } else {
10217 ret = 0;
10218 }
10219
10220 if ((lst != NULL) && (ret == 0)) {
10221 xmlNodePtr cur;
10222
10223 /*
10224 * Return the newly created nodeset after unlinking it from
10225 * they pseudo parent.
10226 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010227 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010228 *lst = cur;
10229 while (cur != NULL) {
Daniel Veillard8d589042003-02-04 15:07:21 +000010230 if (oldctxt->validate && oldctxt->wellFormed &&
10231 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10232 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10233 oldctxt->myDoc, cur);
10234 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010235 cur->parent = NULL;
10236 cur = cur->next;
10237 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010238 ctxt->myDoc->children->children = NULL;
10239 }
10240 if (ctxt->myDoc != NULL) {
10241 xmlFreeNode(ctxt->myDoc->children);
10242 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010243 }
10244
10245 ctxt->sax = oldsax;
10246 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010247 if (newDoc != NULL)
10248 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010249
10250 return(ret);
10251}
10252
10253/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010254 * xmlParseBalancedChunkMemoryRecover:
10255 * @doc: the document the chunk pertains to
10256 * @sax: the SAX handler bloc (possibly NULL)
10257 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10258 * @depth: Used for loop detection, use 0
10259 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10260 * @lst: the return value for the set of parsed nodes
10261 * @recover: return nodes even if the data is broken (use 0)
10262 *
10263 *
10264 * Parse a well-balanced chunk of an XML document
10265 * called by the parser
10266 * The allowed sequence for the Well Balanced Chunk is the one defined by
10267 * the content production in the XML grammar:
10268 *
10269 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10270 *
10271 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10272 * the parser error code otherwise
10273 *
10274 * In case recover is set to 1, the nodelist will not be empty even if
10275 * the parsed chunk is not well balanced.
10276 */
10277int
10278xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10279 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10280 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010281 xmlParserCtxtPtr ctxt;
10282 xmlDocPtr newDoc;
10283 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010284 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010285 int size;
10286 int ret = 0;
10287
10288 if (depth > 40) {
10289 return(XML_ERR_ENTITY_LOOP);
10290 }
10291
10292
Daniel Veillardcda96922001-08-21 10:56:31 +000010293 if (lst != NULL)
10294 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010295 if (string == NULL)
10296 return(-1);
10297
10298 size = xmlStrlen(string);
10299
10300 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10301 if (ctxt == NULL) return(-1);
10302 ctxt->userData = ctxt;
10303 if (sax != NULL) {
10304 oldsax = ctxt->sax;
10305 ctxt->sax = sax;
10306 if (user_data != NULL)
10307 ctxt->userData = user_data;
10308 }
10309 newDoc = xmlNewDoc(BAD_CAST "1.0");
10310 if (newDoc == NULL) {
10311 xmlFreeParserCtxt(ctxt);
10312 return(-1);
10313 }
10314 if (doc != NULL) {
10315 newDoc->intSubset = doc->intSubset;
10316 newDoc->extSubset = doc->extSubset;
10317 }
10318 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10319 if (newDoc->children == NULL) {
10320 if (sax != NULL)
10321 ctxt->sax = oldsax;
10322 xmlFreeParserCtxt(ctxt);
10323 newDoc->intSubset = NULL;
10324 newDoc->extSubset = NULL;
10325 xmlFreeDoc(newDoc);
10326 return(-1);
10327 }
10328 nodePush(ctxt, newDoc->children);
10329 if (doc == NULL) {
10330 ctxt->myDoc = newDoc;
10331 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010332 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010333 newDoc->children->doc = doc;
10334 }
10335 ctxt->instate = XML_PARSER_CONTENT;
10336 ctxt->depth = depth;
10337
10338 /*
10339 * Doing validity checking on chunk doesn't make sense
10340 */
10341 ctxt->validate = 0;
10342 ctxt->loadsubset = 0;
10343
Daniel Veillardb39bc392002-10-26 19:29:51 +000010344 if ( doc != NULL ){
10345 content = doc->children;
10346 doc->children = NULL;
10347 xmlParseContent(ctxt);
10348 doc->children = content;
10349 }
10350 else {
10351 xmlParseContent(ctxt);
10352 }
Owen Taylor3473f882001-02-23 17:55:21 +000010353 if ((RAW == '<') && (NXT(1) == '/')) {
10354 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10356 ctxt->sax->error(ctxt->userData,
10357 "chunk is not well balanced\n");
10358 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010359 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010360 } else if (RAW != 0) {
10361 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10363 ctxt->sax->error(ctxt->userData,
10364 "extra content at the end of well balanced chunk\n");
10365 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010366 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010367 }
10368 if (ctxt->node != newDoc->children) {
10369 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10370 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10371 ctxt->sax->error(ctxt->userData,
10372 "chunk is not well balanced\n");
10373 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010374 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010375 }
10376
10377 if (!ctxt->wellFormed) {
10378 if (ctxt->errNo == 0)
10379 ret = 1;
10380 else
10381 ret = ctxt->errNo;
10382 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010383 ret = 0;
10384 }
10385
10386 if (lst != NULL && (ret == 0 || recover == 1)) {
10387 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010388
10389 /*
10390 * Return the newly created nodeset after unlinking it from
10391 * they pseudo parent.
10392 */
10393 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010394 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010395 while (cur != NULL) {
10396 cur->parent = NULL;
10397 cur = cur->next;
10398 }
10399 newDoc->children->children = NULL;
10400 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010401
Owen Taylor3473f882001-02-23 17:55:21 +000010402 if (sax != NULL)
10403 ctxt->sax = oldsax;
10404 xmlFreeParserCtxt(ctxt);
10405 newDoc->intSubset = NULL;
10406 newDoc->extSubset = NULL;
10407 xmlFreeDoc(newDoc);
10408
10409 return(ret);
10410}
10411
10412/**
10413 * xmlSAXParseEntity:
10414 * @sax: the SAX handler block
10415 * @filename: the filename
10416 *
10417 * parse an XML external entity out of context and build a tree.
10418 * It use the given SAX function block to handle the parsing callback.
10419 * If sax is NULL, fallback to the default DOM tree building routines.
10420 *
10421 * [78] extParsedEnt ::= TextDecl? content
10422 *
10423 * This correspond to a "Well Balanced" chunk
10424 *
10425 * Returns the resulting document tree
10426 */
10427
10428xmlDocPtr
10429xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10430 xmlDocPtr ret;
10431 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010432
10433 ctxt = xmlCreateFileParserCtxt(filename);
10434 if (ctxt == NULL) {
10435 return(NULL);
10436 }
10437 if (sax != NULL) {
10438 if (ctxt->sax != NULL)
10439 xmlFree(ctxt->sax);
10440 ctxt->sax = sax;
10441 ctxt->userData = NULL;
10442 }
10443
Owen Taylor3473f882001-02-23 17:55:21 +000010444 xmlParseExtParsedEnt(ctxt);
10445
10446 if (ctxt->wellFormed)
10447 ret = ctxt->myDoc;
10448 else {
10449 ret = NULL;
10450 xmlFreeDoc(ctxt->myDoc);
10451 ctxt->myDoc = NULL;
10452 }
10453 if (sax != NULL)
10454 ctxt->sax = NULL;
10455 xmlFreeParserCtxt(ctxt);
10456
10457 return(ret);
10458}
10459
10460/**
10461 * xmlParseEntity:
10462 * @filename: the filename
10463 *
10464 * parse an XML external entity out of context and build a tree.
10465 *
10466 * [78] extParsedEnt ::= TextDecl? content
10467 *
10468 * This correspond to a "Well Balanced" chunk
10469 *
10470 * Returns the resulting document tree
10471 */
10472
10473xmlDocPtr
10474xmlParseEntity(const char *filename) {
10475 return(xmlSAXParseEntity(NULL, filename));
10476}
10477
10478/**
10479 * xmlCreateEntityParserCtxt:
10480 * @URL: the entity URL
10481 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010482 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010483 *
10484 * Create a parser context for an external entity
10485 * Automatic support for ZLIB/Compress compressed document is provided
10486 * by default if found at compile-time.
10487 *
10488 * Returns the new parser context or NULL
10489 */
10490xmlParserCtxtPtr
10491xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10492 const xmlChar *base) {
10493 xmlParserCtxtPtr ctxt;
10494 xmlParserInputPtr inputStream;
10495 char *directory = NULL;
10496 xmlChar *uri;
10497
10498 ctxt = xmlNewParserCtxt();
10499 if (ctxt == NULL) {
10500 return(NULL);
10501 }
10502
10503 uri = xmlBuildURI(URL, base);
10504
10505 if (uri == NULL) {
10506 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10507 if (inputStream == NULL) {
10508 xmlFreeParserCtxt(ctxt);
10509 return(NULL);
10510 }
10511
10512 inputPush(ctxt, inputStream);
10513
10514 if ((ctxt->directory == NULL) && (directory == NULL))
10515 directory = xmlParserGetDirectory((char *)URL);
10516 if ((ctxt->directory == NULL) && (directory != NULL))
10517 ctxt->directory = directory;
10518 } else {
10519 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10520 if (inputStream == NULL) {
10521 xmlFree(uri);
10522 xmlFreeParserCtxt(ctxt);
10523 return(NULL);
10524 }
10525
10526 inputPush(ctxt, inputStream);
10527
10528 if ((ctxt->directory == NULL) && (directory == NULL))
10529 directory = xmlParserGetDirectory((char *)uri);
10530 if ((ctxt->directory == NULL) && (directory != NULL))
10531 ctxt->directory = directory;
10532 xmlFree(uri);
10533 }
10534
10535 return(ctxt);
10536}
10537
10538/************************************************************************
10539 * *
10540 * Front ends when parsing from a file *
10541 * *
10542 ************************************************************************/
10543
10544/**
10545 * xmlCreateFileParserCtxt:
10546 * @filename: the filename
10547 *
10548 * Create a parser context for a file content.
10549 * Automatic support for ZLIB/Compress compressed document is provided
10550 * by default if found at compile-time.
10551 *
10552 * Returns the new parser context or NULL
10553 */
10554xmlParserCtxtPtr
10555xmlCreateFileParserCtxt(const char *filename)
10556{
10557 xmlParserCtxtPtr ctxt;
10558 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010559 char *directory = NULL;
10560
Owen Taylor3473f882001-02-23 17:55:21 +000010561 ctxt = xmlNewParserCtxt();
10562 if (ctxt == NULL) {
10563 if (xmlDefaultSAXHandler.error != NULL) {
10564 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10565 }
10566 return(NULL);
10567 }
10568
Igor Zlatkovicce076162003-02-23 13:39:39 +000010569
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000010570 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010571 if (inputStream == NULL) {
10572 xmlFreeParserCtxt(ctxt);
10573 return(NULL);
10574 }
10575
Owen Taylor3473f882001-02-23 17:55:21 +000010576 inputPush(ctxt, inputStream);
10577 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010578 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010579 if ((ctxt->directory == NULL) && (directory != NULL))
10580 ctxt->directory = directory;
10581
10582 return(ctxt);
10583}
10584
10585/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010586 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010587 * @sax: the SAX handler block
10588 * @filename: the filename
10589 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10590 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010591 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010592 *
10593 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10594 * compressed document is provided by default if found at compile-time.
10595 * It use the given SAX function block to handle the parsing callback.
10596 * If sax is NULL, fallback to the default DOM tree building routines.
10597 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010598 * User data (void *) is stored within the parser context in the
10599 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010600 *
Owen Taylor3473f882001-02-23 17:55:21 +000010601 * Returns the resulting document tree
10602 */
10603
10604xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010605xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10606 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010607 xmlDocPtr ret;
10608 xmlParserCtxtPtr ctxt;
10609 char *directory = NULL;
10610
Daniel Veillard635ef722001-10-29 11:48:19 +000010611 xmlInitParser();
10612
Owen Taylor3473f882001-02-23 17:55:21 +000010613 ctxt = xmlCreateFileParserCtxt(filename);
10614 if (ctxt == NULL) {
10615 return(NULL);
10616 }
10617 if (sax != NULL) {
10618 if (ctxt->sax != NULL)
10619 xmlFree(ctxt->sax);
10620 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010621 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010622 if (data!=NULL) {
10623 ctxt->_private=data;
10624 }
Owen Taylor3473f882001-02-23 17:55:21 +000010625
10626 if ((ctxt->directory == NULL) && (directory == NULL))
10627 directory = xmlParserGetDirectory(filename);
10628 if ((ctxt->directory == NULL) && (directory != NULL))
10629 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10630
Daniel Veillarddad3f682002-11-17 16:47:27 +000010631 ctxt->recovery = recovery;
10632
Owen Taylor3473f882001-02-23 17:55:21 +000010633 xmlParseDocument(ctxt);
10634
10635 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10636 else {
10637 ret = NULL;
10638 xmlFreeDoc(ctxt->myDoc);
10639 ctxt->myDoc = NULL;
10640 }
10641 if (sax != NULL)
10642 ctxt->sax = NULL;
10643 xmlFreeParserCtxt(ctxt);
10644
10645 return(ret);
10646}
10647
10648/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010649 * xmlSAXParseFile:
10650 * @sax: the SAX handler block
10651 * @filename: the filename
10652 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10653 * documents
10654 *
10655 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10656 * compressed document is provided by default if found at compile-time.
10657 * It use the given SAX function block to handle the parsing callback.
10658 * If sax is NULL, fallback to the default DOM tree building routines.
10659 *
10660 * Returns the resulting document tree
10661 */
10662
10663xmlDocPtr
10664xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10665 int recovery) {
10666 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10667}
10668
10669/**
Owen Taylor3473f882001-02-23 17:55:21 +000010670 * xmlRecoverDoc:
10671 * @cur: a pointer to an array of xmlChar
10672 *
10673 * parse an XML in-memory document and build a tree.
10674 * In the case the document is not Well Formed, a tree is built anyway
10675 *
10676 * Returns the resulting document tree
10677 */
10678
10679xmlDocPtr
10680xmlRecoverDoc(xmlChar *cur) {
10681 return(xmlSAXParseDoc(NULL, cur, 1));
10682}
10683
10684/**
10685 * xmlParseFile:
10686 * @filename: the filename
10687 *
10688 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10689 * compressed document is provided by default if found at compile-time.
10690 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010691 * Returns the resulting document tree if the file was wellformed,
10692 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010693 */
10694
10695xmlDocPtr
10696xmlParseFile(const char *filename) {
10697 return(xmlSAXParseFile(NULL, filename, 0));
10698}
10699
10700/**
10701 * xmlRecoverFile:
10702 * @filename: the filename
10703 *
10704 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10705 * compressed document is provided by default if found at compile-time.
10706 * In the case the document is not Well Formed, a tree is built anyway
10707 *
10708 * Returns the resulting document tree
10709 */
10710
10711xmlDocPtr
10712xmlRecoverFile(const char *filename) {
10713 return(xmlSAXParseFile(NULL, filename, 1));
10714}
10715
10716
10717/**
10718 * xmlSetupParserForBuffer:
10719 * @ctxt: an XML parser context
10720 * @buffer: a xmlChar * buffer
10721 * @filename: a file name
10722 *
10723 * Setup the parser context to parse a new buffer; Clears any prior
10724 * contents from the parser context. The buffer parameter must not be
10725 * NULL, but the filename parameter can be
10726 */
10727void
10728xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10729 const char* filename)
10730{
10731 xmlParserInputPtr input;
10732
10733 input = xmlNewInputStream(ctxt);
10734 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010735 xmlGenericError(xmlGenericErrorContext,
10736 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010737 xmlFree(ctxt);
10738 return;
10739 }
10740
10741 xmlClearParserCtxt(ctxt);
10742 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000010743 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010744 input->base = buffer;
10745 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010746 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010747 inputPush(ctxt, input);
10748}
10749
10750/**
10751 * xmlSAXUserParseFile:
10752 * @sax: a SAX handler
10753 * @user_data: The user data returned on SAX callbacks
10754 * @filename: a file name
10755 *
10756 * parse an XML file and call the given SAX handler routines.
10757 * Automatic support for ZLIB/Compress compressed document is provided
10758 *
10759 * Returns 0 in case of success or a error number otherwise
10760 */
10761int
10762xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10763 const char *filename) {
10764 int ret = 0;
10765 xmlParserCtxtPtr ctxt;
10766
10767 ctxt = xmlCreateFileParserCtxt(filename);
10768 if (ctxt == NULL) return -1;
10769 if (ctxt->sax != &xmlDefaultSAXHandler)
10770 xmlFree(ctxt->sax);
10771 ctxt->sax = sax;
10772 if (user_data != NULL)
10773 ctxt->userData = user_data;
10774
10775 xmlParseDocument(ctxt);
10776
10777 if (ctxt->wellFormed)
10778 ret = 0;
10779 else {
10780 if (ctxt->errNo != 0)
10781 ret = ctxt->errNo;
10782 else
10783 ret = -1;
10784 }
10785 if (sax != NULL)
10786 ctxt->sax = NULL;
10787 xmlFreeParserCtxt(ctxt);
10788
10789 return ret;
10790}
10791
10792/************************************************************************
10793 * *
10794 * Front ends when parsing from memory *
10795 * *
10796 ************************************************************************/
10797
10798/**
10799 * xmlCreateMemoryParserCtxt:
10800 * @buffer: a pointer to a char array
10801 * @size: the size of the array
10802 *
10803 * Create a parser context for an XML in-memory document.
10804 *
10805 * Returns the new parser context or NULL
10806 */
10807xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010808xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010809 xmlParserCtxtPtr ctxt;
10810 xmlParserInputPtr input;
10811 xmlParserInputBufferPtr buf;
10812
10813 if (buffer == NULL)
10814 return(NULL);
10815 if (size <= 0)
10816 return(NULL);
10817
10818 ctxt = xmlNewParserCtxt();
10819 if (ctxt == NULL)
10820 return(NULL);
10821
10822 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010823 if (buf == NULL) {
10824 xmlFreeParserCtxt(ctxt);
10825 return(NULL);
10826 }
Owen Taylor3473f882001-02-23 17:55:21 +000010827
10828 input = xmlNewInputStream(ctxt);
10829 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010830 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010831 xmlFreeParserCtxt(ctxt);
10832 return(NULL);
10833 }
10834
10835 input->filename = NULL;
10836 input->buf = buf;
10837 input->base = input->buf->buffer->content;
10838 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010839 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010840
10841 inputPush(ctxt, input);
10842 return(ctxt);
10843}
10844
10845/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010846 * xmlSAXParseMemoryWithData:
10847 * @sax: the SAX handler block
10848 * @buffer: an pointer to a char array
10849 * @size: the size of the array
10850 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10851 * documents
10852 * @data: the userdata
10853 *
10854 * parse an XML in-memory block and use the given SAX function block
10855 * to handle the parsing callback. If sax is NULL, fallback to the default
10856 * DOM tree building routines.
10857 *
10858 * User data (void *) is stored within the parser context in the
10859 * context's _private member, so it is available nearly everywhere in libxml
10860 *
10861 * Returns the resulting document tree
10862 */
10863
10864xmlDocPtr
10865xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10866 int size, int recovery, void *data) {
10867 xmlDocPtr ret;
10868 xmlParserCtxtPtr ctxt;
10869
10870 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10871 if (ctxt == NULL) return(NULL);
10872 if (sax != NULL) {
10873 if (ctxt->sax != NULL)
10874 xmlFree(ctxt->sax);
10875 ctxt->sax = sax;
10876 }
10877 if (data!=NULL) {
10878 ctxt->_private=data;
10879 }
10880
Daniel Veillardadba5f12003-04-04 16:09:01 +000010881 ctxt->recovery = recovery;
10882
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010883 xmlParseDocument(ctxt);
10884
10885 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10886 else {
10887 ret = NULL;
10888 xmlFreeDoc(ctxt->myDoc);
10889 ctxt->myDoc = NULL;
10890 }
10891 if (sax != NULL)
10892 ctxt->sax = NULL;
10893 xmlFreeParserCtxt(ctxt);
10894
10895 return(ret);
10896}
10897
10898/**
Owen Taylor3473f882001-02-23 17:55:21 +000010899 * xmlSAXParseMemory:
10900 * @sax: the SAX handler block
10901 * @buffer: an pointer to a char array
10902 * @size: the size of the array
10903 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10904 * documents
10905 *
10906 * parse an XML in-memory block and use the given SAX function block
10907 * to handle the parsing callback. If sax is NULL, fallback to the default
10908 * DOM tree building routines.
10909 *
10910 * Returns the resulting document tree
10911 */
10912xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010913xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10914 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010915 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010916}
10917
10918/**
10919 * xmlParseMemory:
10920 * @buffer: an pointer to a char array
10921 * @size: the size of the array
10922 *
10923 * parse an XML in-memory block and build a tree.
10924 *
10925 * Returns the resulting document tree
10926 */
10927
Daniel Veillard50822cb2001-07-26 20:05:51 +000010928xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010929 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10930}
10931
10932/**
10933 * xmlRecoverMemory:
10934 * @buffer: an pointer to a char array
10935 * @size: the size of the array
10936 *
10937 * parse an XML in-memory block and build a tree.
10938 * In the case the document is not Well Formed, a tree is built anyway
10939 *
10940 * Returns the resulting document tree
10941 */
10942
Daniel Veillard50822cb2001-07-26 20:05:51 +000010943xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010944 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10945}
10946
10947/**
10948 * xmlSAXUserParseMemory:
10949 * @sax: a SAX handler
10950 * @user_data: The user data returned on SAX callbacks
10951 * @buffer: an in-memory XML document input
10952 * @size: the length of the XML document in bytes
10953 *
10954 * A better SAX parsing routine.
10955 * parse an XML in-memory buffer and call the given SAX handler routines.
10956 *
10957 * Returns 0 in case of success or a error number otherwise
10958 */
10959int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010960 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010961 int ret = 0;
10962 xmlParserCtxtPtr ctxt;
10963 xmlSAXHandlerPtr oldsax = NULL;
10964
Daniel Veillard9e923512002-08-14 08:48:52 +000010965 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010966 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10967 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010968 oldsax = ctxt->sax;
10969 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010970 if (user_data != NULL)
10971 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010972
10973 xmlParseDocument(ctxt);
10974
10975 if (ctxt->wellFormed)
10976 ret = 0;
10977 else {
10978 if (ctxt->errNo != 0)
10979 ret = ctxt->errNo;
10980 else
10981 ret = -1;
10982 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010983 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010984 xmlFreeParserCtxt(ctxt);
10985
10986 return ret;
10987}
10988
10989/**
10990 * xmlCreateDocParserCtxt:
10991 * @cur: a pointer to an array of xmlChar
10992 *
10993 * Creates a parser context for an XML in-memory document.
10994 *
10995 * Returns the new parser context or NULL
10996 */
10997xmlParserCtxtPtr
10998xmlCreateDocParserCtxt(xmlChar *cur) {
10999 int len;
11000
11001 if (cur == NULL)
11002 return(NULL);
11003 len = xmlStrlen(cur);
11004 return(xmlCreateMemoryParserCtxt((char *)cur, len));
11005}
11006
11007/**
11008 * xmlSAXParseDoc:
11009 * @sax: the SAX handler block
11010 * @cur: a pointer to an array of xmlChar
11011 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11012 * documents
11013 *
11014 * parse an XML in-memory document and build a tree.
11015 * It use the given SAX function block to handle the parsing callback.
11016 * If sax is NULL, fallback to the default DOM tree building routines.
11017 *
11018 * Returns the resulting document tree
11019 */
11020
11021xmlDocPtr
11022xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11023 xmlDocPtr ret;
11024 xmlParserCtxtPtr ctxt;
11025
11026 if (cur == NULL) return(NULL);
11027
11028
11029 ctxt = xmlCreateDocParserCtxt(cur);
11030 if (ctxt == NULL) return(NULL);
11031 if (sax != NULL) {
11032 ctxt->sax = sax;
11033 ctxt->userData = NULL;
11034 }
11035
11036 xmlParseDocument(ctxt);
11037 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11038 else {
11039 ret = NULL;
11040 xmlFreeDoc(ctxt->myDoc);
11041 ctxt->myDoc = NULL;
11042 }
11043 if (sax != NULL)
11044 ctxt->sax = NULL;
11045 xmlFreeParserCtxt(ctxt);
11046
11047 return(ret);
11048}
11049
11050/**
11051 * xmlParseDoc:
11052 * @cur: a pointer to an array of xmlChar
11053 *
11054 * parse an XML in-memory document and build a tree.
11055 *
11056 * Returns the resulting document tree
11057 */
11058
11059xmlDocPtr
11060xmlParseDoc(xmlChar *cur) {
11061 return(xmlSAXParseDoc(NULL, cur, 0));
11062}
11063
Daniel Veillard8107a222002-01-13 14:10:10 +000011064/************************************************************************
11065 * *
11066 * Specific function to keep track of entities references *
11067 * and used by the XSLT debugger *
11068 * *
11069 ************************************************************************/
11070
11071static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11072
11073/**
11074 * xmlAddEntityReference:
11075 * @ent : A valid entity
11076 * @firstNode : A valid first node for children of entity
11077 * @lastNode : A valid last node of children entity
11078 *
11079 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11080 */
11081static void
11082xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11083 xmlNodePtr lastNode)
11084{
11085 if (xmlEntityRefFunc != NULL) {
11086 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11087 }
11088}
11089
11090
11091/**
11092 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011093 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011094 *
11095 * Set the function to call call back when a xml reference has been made
11096 */
11097void
11098xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11099{
11100 xmlEntityRefFunc = func;
11101}
Owen Taylor3473f882001-02-23 17:55:21 +000011102
11103/************************************************************************
11104 * *
11105 * Miscellaneous *
11106 * *
11107 ************************************************************************/
11108
11109#ifdef LIBXML_XPATH_ENABLED
11110#include <libxml/xpath.h>
11111#endif
11112
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011113extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011114static int xmlParserInitialized = 0;
11115
11116/**
11117 * xmlInitParser:
11118 *
11119 * Initialization function for the XML parser.
11120 * This is not reentrant. Call once before processing in case of
11121 * use in multithreaded programs.
11122 */
11123
11124void
11125xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011126 if (xmlParserInitialized != 0)
11127 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011128
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011129 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11130 (xmlGenericError == NULL))
11131 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011132 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011133 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011134 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011135 xmlInitCharEncodingHandlers();
11136 xmlInitializePredefinedEntities();
11137 xmlDefaultSAXHandlerInit();
11138 xmlRegisterDefaultInputCallbacks();
11139 xmlRegisterDefaultOutputCallbacks();
11140#ifdef LIBXML_HTML_ENABLED
11141 htmlInitAutoClose();
11142 htmlDefaultSAXHandlerInit();
11143#endif
11144#ifdef LIBXML_XPATH_ENABLED
11145 xmlXPathInit();
11146#endif
11147 xmlParserInitialized = 1;
11148}
11149
11150/**
11151 * xmlCleanupParser:
11152 *
11153 * Cleanup function for the XML parser. It tries to reclaim all
11154 * parsing related global memory allocated for the parser processing.
11155 * It doesn't deallocate any document related memory. Calling this
11156 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011157 * One should call xmlCleanupParser() only when the process has
11158 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011159 */
11160
11161void
11162xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000011163 xmlCleanupCharEncodingHandlers();
11164 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011165#ifdef LIBXML_CATALOG_ENABLED
11166 xmlCatalogCleanup();
11167#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011168 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011169 xmlCleanupGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011170 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011171}