blob: 1a8ba3650d1600e7c90c243ca67d2d70cf332fe9 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard21a0f912001-02-25 19:54:14 +000088#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000089#define XML_PARSER_BUFFER_SIZE 100
90
Daniel Veillard5997aca2002-03-18 18:36:20 +000091#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
92
Owen Taylor3473f882001-02-23 17:55:21 +000093/*
Owen Taylor3473f882001-02-23 17:55:21 +000094 * List of XML prefixed PI allowed by W3C specs
95 */
96
Daniel Veillardb44025c2001-10-11 22:55:55 +000097static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000098 "xml-stylesheet",
99 NULL
100};
101
102/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000103xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
104 const xmlChar **str);
105
Daniel Veillard257d9102001-05-08 10:41:44 +0000106static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000107xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
108 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000109 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000111
Daniel Veillard8107a222002-01-13 14:10:10 +0000112static void
113xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
114 xmlNodePtr lastNode);
115
Daniel Veillard328f48c2002-11-15 15:24:34 +0000116static int
117xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
118 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000119/************************************************************************
120 * *
121 * Parser stacks related functions and macros *
122 * *
123 ************************************************************************/
124
125xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
126 const xmlChar ** str);
127
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128/**
129 * inputPush:
130 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000131 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000132 *
133 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000134 *
135 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000137extern int
138inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
139{
140 if (ctxt->inputNr >= ctxt->inputMax) {
141 ctxt->inputMax *= 2;
142 ctxt->inputTab =
143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
144 ctxt->inputMax *
145 sizeof(ctxt->inputTab[0]));
146 if (ctxt->inputTab == NULL) {
147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
148 return (0);
149 }
150 }
151 ctxt->inputTab[ctxt->inputNr] = value;
152 ctxt->input = value;
153 return (ctxt->inputNr++);
154}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000155/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000156 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000157 * @ctxt: an XML parser context
158 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000159 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000160 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000161 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000163extern xmlParserInputPtr
164inputPop(xmlParserCtxtPtr ctxt)
165{
166 xmlParserInputPtr ret;
167
168 if (ctxt->inputNr <= 0)
169 return (0);
170 ctxt->inputNr--;
171 if (ctxt->inputNr > 0)
172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
173 else
174 ctxt->input = NULL;
175 ret = ctxt->inputTab[ctxt->inputNr];
176 ctxt->inputTab[ctxt->inputNr] = 0;
177 return (ret);
178}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179/**
180 * nodePush:
181 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000182 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 *
184 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000185 *
186 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000187 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000188extern int
189nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
190{
191 if (ctxt->nodeNr >= ctxt->nodeMax) {
192 ctxt->nodeMax *= 2;
193 ctxt->nodeTab =
194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
195 ctxt->nodeMax *
196 sizeof(ctxt->nodeTab[0]));
197 if (ctxt->nodeTab == NULL) {
198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
199 return (0);
200 }
201 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202#ifdef MAX_DEPTH
203 if (ctxt->nodeNr > MAX_DEPTH) {
204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
205 ctxt->sax->error(ctxt->userData,
206 "Excessive depth in document: change MAX_DEPTH = %d\n",
207 MAX_DEPTH);
208 ctxt->wellFormed = 0;
209 ctxt->instate = XML_PARSER_EOF;
210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
211 return(0);
212 }
213#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000214 ctxt->nodeTab[ctxt->nodeNr] = value;
215 ctxt->node = value;
216 return (ctxt->nodeNr++);
217}
218/**
219 * nodePop:
220 * @ctxt: an XML parser context
221 *
222 * Pops the top element node from the node stack
223 *
224 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000226extern xmlNodePtr
227nodePop(xmlParserCtxtPtr ctxt)
228{
229 xmlNodePtr ret;
230
231 if (ctxt->nodeNr <= 0)
232 return (0);
233 ctxt->nodeNr--;
234 if (ctxt->nodeNr > 0)
235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
236 else
237 ctxt->node = NULL;
238 ret = ctxt->nodeTab[ctxt->nodeNr];
239 ctxt->nodeTab[ctxt->nodeNr] = 0;
240 return (ret);
241}
242/**
243 * namePush:
244 * @ctxt: an XML parser context
245 * @value: the element name
246 *
247 * Pushes a new element name on top of the name stack
248 *
249 * Returns 0 in case of error, the index in the stack otherwise
250 */
251extern int
252namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
253{
254 if (ctxt->nameNr >= ctxt->nameMax) {
255 ctxt->nameMax *= 2;
256 ctxt->nameTab =
257 (xmlChar * *)xmlRealloc(ctxt->nameTab,
258 ctxt->nameMax *
259 sizeof(ctxt->nameTab[0]));
260 if (ctxt->nameTab == NULL) {
261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
262 return (0);
263 }
264 }
265 ctxt->nameTab[ctxt->nameNr] = value;
266 ctxt->name = value;
267 return (ctxt->nameNr++);
268}
269/**
270 * namePop:
271 * @ctxt: an XML parser context
272 *
273 * Pops the top element name from the name stack
274 *
275 * Returns the name just removed
276 */
277extern xmlChar *
278namePop(xmlParserCtxtPtr ctxt)
279{
280 xmlChar *ret;
281
282 if (ctxt->nameNr <= 0)
283 return (0);
284 ctxt->nameNr--;
285 if (ctxt->nameNr > 0)
286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
287 else
288 ctxt->name = NULL;
289 ret = ctxt->nameTab[ctxt->nameNr];
290 ctxt->nameTab[ctxt->nameNr] = 0;
291 return (ret);
292}
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000294static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000295 if (ctxt->spaceNr >= ctxt->spaceMax) {
296 ctxt->spaceMax *= 2;
297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
299 if (ctxt->spaceTab == NULL) {
300 xmlGenericError(xmlGenericErrorContext,
301 "realloc failed !\n");
302 return(0);
303 }
304 }
305 ctxt->spaceTab[ctxt->spaceNr] = val;
306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
307 return(ctxt->spaceNr++);
308}
309
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000311 int ret;
312 if (ctxt->spaceNr <= 0) return(0);
313 ctxt->spaceNr--;
314 if (ctxt->spaceNr > 0)
315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
316 else
317 ctxt->space = NULL;
318 ret = ctxt->spaceTab[ctxt->spaceNr];
319 ctxt->spaceTab[ctxt->spaceNr] = -1;
320 return(ret);
321}
322
323/*
324 * Macros for accessing the content. Those should be used only by the parser,
325 * and not exported.
326 *
327 * Dirty macros, i.e. one often need to make assumption on the context to
328 * use them
329 *
330 * CUR_PTR return the current pointer to the xmlChar to be parsed.
331 * To be used with extreme caution since operations consuming
332 * characters may move the input buffer to a different location !
333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
334 * This should be used internally by the parser
335 * only to compare to ASCII values otherwise it would break when
336 * running with UTF-8 encoding.
337 * RAW same as CUR but in the input buffer, bypass any token
338 * extraction that may have been done
339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
340 * to compare on ASCII based substring.
341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +0000342 * strings without newlines within the parser.
343 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
344 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +0000345 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
346 *
347 * NEXT Skip to the next character, this does the proper decoding
348 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +0000349 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +0000350 * CUR_CHAR(l) returns the current unicode character (int), set l
351 * to the number of xmlChars used for the encoding [0-5].
352 * CUR_SCHAR same but operate on a string instead of the context
353 * COPY_BUF copy the current unicode char to the target buffer, increment
354 * the index
355 * GROW, SHRINK handling of input buffers
356 */
357
Daniel Veillardfdc91562002-07-01 21:52:03 +0000358#define RAW (*ctxt->input->cur)
359#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000360#define NXT(val) ctxt->input->cur[(val)]
361#define CUR_PTR ctxt->input->cur
362
363#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000364 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +0000365 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000366 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000367 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
368 xmlPopInput(ctxt); \
369 } while (0)
370
Daniel Veillarda880b122003-04-21 21:36:41 +0000371#define SHRINK if ((ctxt->progressive == 0) && \
372 (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK))\
Daniel Veillard46de64e2002-05-29 08:21:33 +0000373 xmlSHRINK (ctxt);
374
375static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
376 xmlParserInputShrink(ctxt->input);
377 if ((*ctxt->input->cur == 0) &&
378 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
379 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000380 }
Owen Taylor3473f882001-02-23 17:55:21 +0000381
Daniel Veillarda880b122003-04-21 21:36:41 +0000382#define GROW if ((ctxt->progressive == 0) && \
383 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +0000384 xmlGROW (ctxt);
385
386static void xmlGROW (xmlParserCtxtPtr ctxt) {
387 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
388 if ((*ctxt->input->cur == 0) &&
389 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
390 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +0000391}
Owen Taylor3473f882001-02-23 17:55:21 +0000392
393#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
394
395#define NEXT xmlNextChar(ctxt)
396
Daniel Veillard21a0f912001-02-25 19:54:14 +0000397#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000398 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000399 ctxt->input->cur++; \
400 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000401 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000402 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
403 }
404
Owen Taylor3473f882001-02-23 17:55:21 +0000405#define NEXTL(l) do { \
406 if (*(ctxt->input->cur) == '\n') { \
407 ctxt->input->line++; ctxt->input->col = 1; \
408 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000409 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000410 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000411 } while (0)
412
413#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
414#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
415
416#define COPY_BUF(l,b,i,v) \
417 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000418 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000419
420/**
421 * xmlSkipBlankChars:
422 * @ctxt: the XML parser context
423 *
424 * skip all blanks character found at that point in the input streams.
425 * It pops up finished entities in the process if allowable at that point.
426 *
427 * Returns the number of space chars skipped
428 */
429
430int
431xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000432 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000433
434 /*
435 * It's Okay to use CUR/NEXT here since all the blanks are on
436 * the ASCII range.
437 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000438 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
439 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000440 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000441 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000442 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000443 cur = ctxt->input->cur;
444 while (IS_BLANK(*cur)) {
445 if (*cur == '\n') {
446 ctxt->input->line++; ctxt->input->col = 1;
447 }
448 cur++;
449 res++;
450 if (*cur == 0) {
451 ctxt->input->cur = cur;
452 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
453 cur = ctxt->input->cur;
454 }
455 }
456 ctxt->input->cur = cur;
457 } else {
458 int cur;
459 do {
460 cur = CUR;
461 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
462 NEXT;
463 cur = CUR;
464 res++;
465 }
466 while ((cur == 0) && (ctxt->inputNr > 1) &&
467 (ctxt->instate != XML_PARSER_COMMENT)) {
468 xmlPopInput(ctxt);
469 cur = CUR;
470 }
471 /*
472 * Need to handle support of entities branching here
473 */
474 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
475 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
476 }
Owen Taylor3473f882001-02-23 17:55:21 +0000477 return(res);
478}
479
480/************************************************************************
481 * *
482 * Commodity functions to handle entities *
483 * *
484 ************************************************************************/
485
486/**
487 * xmlPopInput:
488 * @ctxt: an XML parser context
489 *
490 * xmlPopInput: the current input pointed by ctxt->input came to an end
491 * pop it and return the next char.
492 *
493 * Returns the current xmlChar in the parser context
494 */
495xmlChar
496xmlPopInput(xmlParserCtxtPtr ctxt) {
497 if (ctxt->inputNr == 1) return(0); /* End of main Input */
498 if (xmlParserDebugEntities)
499 xmlGenericError(xmlGenericErrorContext,
500 "Popping input %d\n", ctxt->inputNr);
501 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000502 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000503 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
504 return(xmlPopInput(ctxt));
505 return(CUR);
506}
507
508/**
509 * xmlPushInput:
510 * @ctxt: an XML parser context
511 * @input: an XML parser input fragment (entity, XML fragment ...).
512 *
513 * xmlPushInput: switch to a new input stream which is stacked on top
514 * of the previous one(s).
515 */
516void
517xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
518 if (input == NULL) return;
519
520 if (xmlParserDebugEntities) {
521 if ((ctxt->input != NULL) && (ctxt->input->filename))
522 xmlGenericError(xmlGenericErrorContext,
523 "%s(%d): ", ctxt->input->filename,
524 ctxt->input->line);
525 xmlGenericError(xmlGenericErrorContext,
526 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
527 }
528 inputPush(ctxt, input);
529 GROW;
530}
531
532/**
533 * xmlParseCharRef:
534 * @ctxt: an XML parser context
535 *
536 * parse Reference declarations
537 *
538 * [66] CharRef ::= '&#' [0-9]+ ';' |
539 * '&#x' [0-9a-fA-F]+ ';'
540 *
541 * [ WFC: Legal Character ]
542 * Characters referred to using character references must match the
543 * production for Char.
544 *
545 * Returns the value parsed (as an int), 0 in case of error
546 */
547int
548xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000549 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000550 int count = 0;
551
Owen Taylor3473f882001-02-23 17:55:21 +0000552 /*
553 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
554 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000555 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000556 (NXT(2) == 'x')) {
557 SKIP(3);
558 GROW;
559 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000560 if (count++ > 20) {
561 count = 0;
562 GROW;
563 }
564 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000565 val = val * 16 + (CUR - '0');
566 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
567 val = val * 16 + (CUR - 'a') + 10;
568 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
569 val = val * 16 + (CUR - 'A') + 10;
570 else {
571 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
573 ctxt->sax->error(ctxt->userData,
574 "xmlParseCharRef: invalid hexadecimal value\n");
575 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000576 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000577 val = 0;
578 break;
579 }
580 NEXT;
581 count++;
582 }
583 if (RAW == ';') {
584 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000585 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000586 ctxt->nbChars ++;
587 ctxt->input->cur++;
588 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000589 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000590 SKIP(2);
591 GROW;
592 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000593 if (count++ > 20) {
594 count = 0;
595 GROW;
596 }
597 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000598 val = val * 10 + (CUR - '0');
599 else {
600 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
602 ctxt->sax->error(ctxt->userData,
603 "xmlParseCharRef: invalid decimal value\n");
604 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000605 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000606 val = 0;
607 break;
608 }
609 NEXT;
610 count++;
611 }
612 if (RAW == ';') {
613 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000614 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000615 ctxt->nbChars ++;
616 ctxt->input->cur++;
617 }
618 } else {
619 ctxt->errNo = XML_ERR_INVALID_CHARREF;
620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
621 ctxt->sax->error(ctxt->userData,
622 "xmlParseCharRef: invalid value\n");
623 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000624 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000625 }
626
627 /*
628 * [ WFC: Legal Character ]
629 * Characters referred to using character references must match the
630 * production for Char.
631 */
632 if (IS_CHAR(val)) {
633 return(val);
634 } else {
635 ctxt->errNo = XML_ERR_INVALID_CHAR;
636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000637 ctxt->sax->error(ctxt->userData,
638 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000639 val);
640 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000641 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000642 }
643 return(0);
644}
645
646/**
647 * xmlParseStringCharRef:
648 * @ctxt: an XML parser context
649 * @str: a pointer to an index in the string
650 *
651 * parse Reference declarations, variant parsing from a string rather
652 * than an an input flow.
653 *
654 * [66] CharRef ::= '&#' [0-9]+ ';' |
655 * '&#x' [0-9a-fA-F]+ ';'
656 *
657 * [ WFC: Legal Character ]
658 * Characters referred to using character references must match the
659 * production for Char.
660 *
661 * Returns the value parsed (as an int), 0 in case of error, str will be
662 * updated to the current value of the index
663 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000664static int
Owen Taylor3473f882001-02-23 17:55:21 +0000665xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
666 const xmlChar *ptr;
667 xmlChar cur;
668 int val = 0;
669
670 if ((str == NULL) || (*str == NULL)) return(0);
671 ptr = *str;
672 cur = *ptr;
673 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
674 ptr += 3;
675 cur = *ptr;
676 while (cur != ';') { /* Non input consuming loop */
677 if ((cur >= '0') && (cur <= '9'))
678 val = val * 16 + (cur - '0');
679 else if ((cur >= 'a') && (cur <= 'f'))
680 val = val * 16 + (cur - 'a') + 10;
681 else if ((cur >= 'A') && (cur <= 'F'))
682 val = val * 16 + (cur - 'A') + 10;
683 else {
684 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
686 ctxt->sax->error(ctxt->userData,
687 "xmlParseStringCharRef: invalid hexadecimal value\n");
688 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000689 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000690 val = 0;
691 break;
692 }
693 ptr++;
694 cur = *ptr;
695 }
696 if (cur == ';')
697 ptr++;
698 } else if ((cur == '&') && (ptr[1] == '#')){
699 ptr += 2;
700 cur = *ptr;
701 while (cur != ';') { /* Non input consuming loops */
702 if ((cur >= '0') && (cur <= '9'))
703 val = val * 10 + (cur - '0');
704 else {
705 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
707 ctxt->sax->error(ctxt->userData,
708 "xmlParseStringCharRef: invalid decimal value\n");
709 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000710 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000711 val = 0;
712 break;
713 }
714 ptr++;
715 cur = *ptr;
716 }
717 if (cur == ';')
718 ptr++;
719 } else {
720 ctxt->errNo = XML_ERR_INVALID_CHARREF;
721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
722 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000723 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000724 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000725 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000726 return(0);
727 }
728 *str = ptr;
729
730 /*
731 * [ WFC: Legal Character ]
732 * Characters referred to using character references must match the
733 * production for Char.
734 */
735 if (IS_CHAR(val)) {
736 return(val);
737 } else {
738 ctxt->errNo = XML_ERR_INVALID_CHAR;
739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
740 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000741 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000742 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000743 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000744 }
745 return(0);
746}
747
748/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000749 * xmlNewBlanksWrapperInputStream:
750 * @ctxt: an XML parser context
751 * @entity: an Entity pointer
752 *
753 * Create a new input stream for wrapping
754 * blanks around a PEReference
755 *
756 * Returns the new input stream or NULL
757 */
758
759static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
760
Daniel Veillardf4862f02002-09-10 11:13:43 +0000761static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000762xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
763 xmlParserInputPtr input;
764 xmlChar *buffer;
765 size_t length;
766 if (entity == NULL) {
767 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
769 ctxt->sax->error(ctxt->userData,
770 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
771 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
772 return(NULL);
773 }
774 if (xmlParserDebugEntities)
775 xmlGenericError(xmlGenericErrorContext,
776 "new blanks wrapper for entity: %s\n", entity->name);
777 input = xmlNewInputStream(ctxt);
778 if (input == NULL) {
779 return(NULL);
780 }
781 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000782 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +0000783 if (buffer == NULL) {
784 return(NULL);
785 }
786 buffer [0] = ' ';
787 buffer [1] = '%';
788 buffer [length-3] = ';';
789 buffer [length-2] = ' ';
790 buffer [length-1] = 0;
791 memcpy(buffer + 2, entity->name, length - 5);
792 input->free = deallocblankswrapper;
793 input->base = buffer;
794 input->cur = buffer;
795 input->length = length;
796 input->end = &buffer[length];
797 return(input);
798}
799
800/**
Owen Taylor3473f882001-02-23 17:55:21 +0000801 * xmlParserHandlePEReference:
802 * @ctxt: the parser context
803 *
804 * [69] PEReference ::= '%' Name ';'
805 *
806 * [ WFC: No Recursion ]
807 * A parsed entity must not contain a recursive
808 * reference to itself, either directly or indirectly.
809 *
810 * [ WFC: Entity Declared ]
811 * In a document without any DTD, a document with only an internal DTD
812 * subset which contains no parameter entity references, or a document
813 * with "standalone='yes'", ... ... The declaration of a parameter
814 * entity must precede any reference to it...
815 *
816 * [ VC: Entity Declared ]
817 * In a document with an external subset or external parameter entities
818 * with "standalone='no'", ... ... The declaration of a parameter entity
819 * must precede any reference to it...
820 *
821 * [ WFC: In DTD ]
822 * Parameter-entity references may only appear in the DTD.
823 * NOTE: misleading but this is handled.
824 *
825 * A PEReference may have been detected in the current input stream
826 * the handling is done accordingly to
827 * http://www.w3.org/TR/REC-xml#entproc
828 * i.e.
829 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000830 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000831 */
832void
833xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
834 xmlChar *name;
835 xmlEntityPtr entity = NULL;
836 xmlParserInputPtr input;
837
Owen Taylor3473f882001-02-23 17:55:21 +0000838 if (RAW != '%') return;
839 switch(ctxt->instate) {
840 case XML_PARSER_CDATA_SECTION:
841 return;
842 case XML_PARSER_COMMENT:
843 return;
844 case XML_PARSER_START_TAG:
845 return;
846 case XML_PARSER_END_TAG:
847 return;
848 case XML_PARSER_EOF:
849 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
851 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
852 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000853 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000854 return;
855 case XML_PARSER_PROLOG:
856 case XML_PARSER_START:
857 case XML_PARSER_MISC:
858 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
860 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
861 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000862 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000863 return;
864 case XML_PARSER_ENTITY_DECL:
865 case XML_PARSER_CONTENT:
866 case XML_PARSER_ATTRIBUTE_VALUE:
867 case XML_PARSER_PI:
868 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000869 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000870 /* we just ignore it there */
871 return;
872 case XML_PARSER_EPILOG:
873 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
875 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
876 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000877 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000878 return;
879 case XML_PARSER_ENTITY_VALUE:
880 /*
881 * NOTE: in the case of entity values, we don't do the
882 * substitution here since we need the literal
883 * entity value to be able to save the internal
884 * subset of the document.
885 * This will be handled by xmlStringDecodeEntities
886 */
887 return;
888 case XML_PARSER_DTD:
889 /*
890 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
891 * In the internal DTD subset, parameter-entity references
892 * can occur only where markup declarations can occur, not
893 * within markup declarations.
894 * In that case this is handled in xmlParseMarkupDecl
895 */
896 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
897 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000898 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
899 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000900 break;
901 case XML_PARSER_IGNORE:
902 return;
903 }
904
905 NEXT;
906 name = xmlParseName(ctxt);
907 if (xmlParserDebugEntities)
908 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000909 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000910 if (name == NULL) {
911 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000913 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000916 } else {
917 if (RAW == ';') {
918 NEXT;
919 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
920 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
921 if (entity == NULL) {
922
923 /*
924 * [ WFC: Entity Declared ]
925 * In a document without any DTD, a document with only an
926 * internal DTD subset which contains no parameter entity
927 * references, or a document with "standalone='yes'", ...
928 * ... The declaration of a parameter entity must precede
929 * any reference to it...
930 */
931 if ((ctxt->standalone == 1) ||
932 ((ctxt->hasExternalSubset == 0) &&
933 (ctxt->hasPErefs == 0))) {
934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
935 ctxt->sax->error(ctxt->userData,
936 "PEReference: %%%s; not found\n", name);
937 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000938 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000939 } else {
940 /*
941 * [ VC: Entity Declared ]
942 * In a document with an external subset or external
943 * parameter entities with "standalone='no'", ...
944 * ... The declaration of a parameter entity must precede
945 * any reference to it...
946 */
947 if ((!ctxt->disableSAX) &&
948 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
949 ctxt->vctxt.error(ctxt->vctxt.userData,
950 "PEReference: %%%s; not found\n", name);
951 } else if ((!ctxt->disableSAX) &&
952 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
953 ctxt->sax->warning(ctxt->userData,
954 "PEReference: %%%s; not found\n", name);
955 ctxt->valid = 0;
956 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000957 } else if (ctxt->input->free != deallocblankswrapper) {
958 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
959 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000960 } else {
961 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
962 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000963 xmlChar start[4];
964 xmlCharEncoding enc;
965
Owen Taylor3473f882001-02-23 17:55:21 +0000966 /*
967 * handle the extra spaces added before and after
968 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000969 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000970 */
971 input = xmlNewEntityInputStream(ctxt, entity);
972 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000973
974 /*
975 * Get the 4 first bytes and decode the charset
976 * if enc != XML_CHAR_ENCODING_NONE
977 * plug some encoding conversion routines.
978 */
979 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000980 if (entity->length >= 4) {
981 start[0] = RAW;
982 start[1] = NXT(1);
983 start[2] = NXT(2);
984 start[3] = NXT(3);
985 enc = xmlDetectCharEncoding(start, 4);
986 if (enc != XML_CHAR_ENCODING_NONE) {
987 xmlSwitchEncoding(ctxt, enc);
988 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000989 }
990
Owen Taylor3473f882001-02-23 17:55:21 +0000991 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
992 (RAW == '<') && (NXT(1) == '?') &&
993 (NXT(2) == 'x') && (NXT(3) == 'm') &&
994 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
995 xmlParseTextDecl(ctxt);
996 }
Owen Taylor3473f882001-02-23 17:55:21 +0000997 } else {
998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
999 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001000 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001001 name);
1002 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001003 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001004 }
1005 }
1006 } else {
1007 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1009 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001010 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001011 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001012 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001013 }
1014 xmlFree(name);
1015 }
1016}
1017
1018/*
1019 * Macro used to grow the current buffer.
1020 */
1021#define growBuffer(buffer) { \
1022 buffer##_size *= 2; \
1023 buffer = (xmlChar *) \
1024 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1025 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001026 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001027 return(NULL); \
1028 } \
1029}
1030
1031/**
1032 * xmlStringDecodeEntities:
1033 * @ctxt: the parser context
1034 * @str: the input string
1035 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1036 * @end: an end marker xmlChar, 0 if none
1037 * @end2: an end marker xmlChar, 0 if none
1038 * @end3: an end marker xmlChar, 0 if none
1039 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001040 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001041 *
1042 * [67] Reference ::= EntityRef | CharRef
1043 *
1044 * [69] PEReference ::= '%' Name ';'
1045 *
1046 * Returns A newly allocated string with the substitution done. The caller
1047 * must deallocate it !
1048 */
1049xmlChar *
1050xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1051 xmlChar end, xmlChar end2, xmlChar end3) {
1052 xmlChar *buffer = NULL;
1053 int buffer_size = 0;
1054
1055 xmlChar *current = NULL;
1056 xmlEntityPtr ent;
1057 int c,l;
1058 int nbchars = 0;
1059
1060 if (str == NULL)
1061 return(NULL);
1062
1063 if (ctxt->depth > 40) {
1064 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1066 ctxt->sax->error(ctxt->userData,
1067 "Detected entity reference loop\n");
1068 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001069 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001070 return(NULL);
1071 }
1072
1073 /*
1074 * allocate a translation buffer.
1075 */
1076 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001077 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001078 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001079 xmlGenericError(xmlGenericErrorContext,
1080 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001081 return(NULL);
1082 }
1083
1084 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001085 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001086 * we are operating on already parsed values.
1087 */
1088 c = CUR_SCHAR(str, l);
1089 while ((c != 0) && (c != end) && /* non input consuming loop */
1090 (c != end2) && (c != end3)) {
1091
1092 if (c == 0) break;
1093 if ((c == '&') && (str[1] == '#')) {
1094 int val = xmlParseStringCharRef(ctxt, &str);
1095 if (val != 0) {
1096 COPY_BUF(0,buffer,nbchars,val);
1097 }
1098 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1099 if (xmlParserDebugEntities)
1100 xmlGenericError(xmlGenericErrorContext,
1101 "String decoding Entity Reference: %.30s\n",
1102 str);
1103 ent = xmlParseStringEntityRef(ctxt, &str);
1104 if ((ent != NULL) &&
1105 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1106 if (ent->content != NULL) {
1107 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1108 } else {
1109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1110 ctxt->sax->error(ctxt->userData,
1111 "internal error entity has no content\n");
1112 }
1113 } else if ((ent != NULL) && (ent->content != NULL)) {
1114 xmlChar *rep;
1115
1116 ctxt->depth++;
1117 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1118 0, 0, 0);
1119 ctxt->depth--;
1120 if (rep != NULL) {
1121 current = rep;
1122 while (*current != 0) { /* non input consuming loop */
1123 buffer[nbchars++] = *current++;
1124 if (nbchars >
1125 buffer_size - XML_PARSER_BUFFER_SIZE) {
1126 growBuffer(buffer);
1127 }
1128 }
1129 xmlFree(rep);
1130 }
1131 } else if (ent != NULL) {
1132 int i = xmlStrlen(ent->name);
1133 const xmlChar *cur = ent->name;
1134
1135 buffer[nbchars++] = '&';
1136 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1137 growBuffer(buffer);
1138 }
1139 for (;i > 0;i--)
1140 buffer[nbchars++] = *cur++;
1141 buffer[nbchars++] = ';';
1142 }
1143 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1144 if (xmlParserDebugEntities)
1145 xmlGenericError(xmlGenericErrorContext,
1146 "String decoding PE Reference: %.30s\n", str);
1147 ent = xmlParseStringPEReference(ctxt, &str);
1148 if (ent != NULL) {
1149 xmlChar *rep;
1150
1151 ctxt->depth++;
1152 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1153 0, 0, 0);
1154 ctxt->depth--;
1155 if (rep != NULL) {
1156 current = rep;
1157 while (*current != 0) { /* non input consuming loop */
1158 buffer[nbchars++] = *current++;
1159 if (nbchars >
1160 buffer_size - XML_PARSER_BUFFER_SIZE) {
1161 growBuffer(buffer);
1162 }
1163 }
1164 xmlFree(rep);
1165 }
1166 }
1167 } else {
1168 COPY_BUF(l,buffer,nbchars,c);
1169 str += l;
1170 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1171 growBuffer(buffer);
1172 }
1173 }
1174 c = CUR_SCHAR(str, l);
1175 }
1176 buffer[nbchars++] = 0;
1177 return(buffer);
1178}
1179
1180
1181/************************************************************************
1182 * *
1183 * Commodity functions to handle xmlChars *
1184 * *
1185 ************************************************************************/
1186
1187/**
1188 * xmlStrndup:
1189 * @cur: the input xmlChar *
1190 * @len: the len of @cur
1191 *
1192 * a strndup for array of xmlChar's
1193 *
1194 * Returns a new xmlChar * or NULL
1195 */
1196xmlChar *
1197xmlStrndup(const xmlChar *cur, int len) {
1198 xmlChar *ret;
1199
1200 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001201 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001202 if (ret == NULL) {
1203 xmlGenericError(xmlGenericErrorContext,
1204 "malloc of %ld byte failed\n",
1205 (len + 1) * (long)sizeof(xmlChar));
1206 return(NULL);
1207 }
1208 memcpy(ret, cur, len * sizeof(xmlChar));
1209 ret[len] = 0;
1210 return(ret);
1211}
1212
1213/**
1214 * xmlStrdup:
1215 * @cur: the input xmlChar *
1216 *
1217 * a strdup for array of xmlChar's. Since they are supposed to be
1218 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1219 * a termination mark of '0'.
1220 *
1221 * Returns a new xmlChar * or NULL
1222 */
1223xmlChar *
1224xmlStrdup(const xmlChar *cur) {
1225 const xmlChar *p = cur;
1226
1227 if (cur == NULL) return(NULL);
1228 while (*p != 0) p++; /* non input consuming */
1229 return(xmlStrndup(cur, p - cur));
1230}
1231
1232/**
1233 * xmlCharStrndup:
1234 * @cur: the input char *
1235 * @len: the len of @cur
1236 *
1237 * a strndup for char's to xmlChar's
1238 *
1239 * Returns a new xmlChar * or NULL
1240 */
1241
1242xmlChar *
1243xmlCharStrndup(const char *cur, int len) {
1244 int i;
1245 xmlChar *ret;
1246
1247 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001248 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001249 if (ret == NULL) {
1250 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1251 (len + 1) * (long)sizeof(xmlChar));
1252 return(NULL);
1253 }
1254 for (i = 0;i < len;i++)
1255 ret[i] = (xmlChar) cur[i];
1256 ret[len] = 0;
1257 return(ret);
1258}
1259
1260/**
1261 * xmlCharStrdup:
1262 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001263 *
1264 * a strdup for char's to xmlChar's
1265 *
1266 * Returns a new xmlChar * or NULL
1267 */
1268
1269xmlChar *
1270xmlCharStrdup(const char *cur) {
1271 const char *p = cur;
1272
1273 if (cur == NULL) return(NULL);
1274 while (*p != '\0') p++; /* non input consuming */
1275 return(xmlCharStrndup(cur, p - cur));
1276}
1277
1278/**
1279 * xmlStrcmp:
1280 * @str1: the first xmlChar *
1281 * @str2: the second xmlChar *
1282 *
1283 * a strcmp for xmlChar's
1284 *
1285 * Returns the integer result of the comparison
1286 */
1287
1288int
1289xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1290 register int tmp;
1291
1292 if (str1 == str2) return(0);
1293 if (str1 == NULL) return(-1);
1294 if (str2 == NULL) return(1);
1295 do {
1296 tmp = *str1++ - *str2;
1297 if (tmp != 0) return(tmp);
1298 } while (*str2++ != 0);
1299 return 0;
1300}
1301
1302/**
1303 * xmlStrEqual:
1304 * @str1: the first xmlChar *
1305 * @str2: the second xmlChar *
1306 *
1307 * Check if both string are equal of have same content
1308 * Should be a bit more readable and faster than xmlStrEqual()
1309 *
1310 * Returns 1 if they are equal, 0 if they are different
1311 */
1312
1313int
1314xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1315 if (str1 == str2) return(1);
1316 if (str1 == NULL) return(0);
1317 if (str2 == NULL) return(0);
1318 do {
1319 if (*str1++ != *str2) return(0);
1320 } while (*str2++);
1321 return(1);
1322}
1323
1324/**
1325 * xmlStrncmp:
1326 * @str1: the first xmlChar *
1327 * @str2: the second xmlChar *
1328 * @len: the max comparison length
1329 *
1330 * a strncmp for xmlChar's
1331 *
1332 * Returns the integer result of the comparison
1333 */
1334
1335int
1336xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1337 register int tmp;
1338
1339 if (len <= 0) return(0);
1340 if (str1 == str2) return(0);
1341 if (str1 == NULL) return(-1);
1342 if (str2 == NULL) return(1);
1343 do {
1344 tmp = *str1++ - *str2;
1345 if (tmp != 0 || --len == 0) return(tmp);
1346 } while (*str2++ != 0);
1347 return 0;
1348}
1349
Daniel Veillardb44025c2001-10-11 22:55:55 +00001350static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001351 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1352 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1353 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1354 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1355 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1356 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1357 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1358 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1359 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1360 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1361 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1362 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1363 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1364 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1365 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1366 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1367 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1368 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1369 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1370 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1371 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1372 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1373 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1374 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1375 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1376 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1377 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1378 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1379 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1380 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1381 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1382 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1383};
1384
1385/**
1386 * xmlStrcasecmp:
1387 * @str1: the first xmlChar *
1388 * @str2: the second xmlChar *
1389 *
1390 * a strcasecmp for xmlChar's
1391 *
1392 * Returns the integer result of the comparison
1393 */
1394
1395int
1396xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1397 register int tmp;
1398
1399 if (str1 == str2) return(0);
1400 if (str1 == NULL) return(-1);
1401 if (str2 == NULL) return(1);
1402 do {
1403 tmp = casemap[*str1++] - casemap[*str2];
1404 if (tmp != 0) return(tmp);
1405 } while (*str2++ != 0);
1406 return 0;
1407}
1408
1409/**
1410 * xmlStrncasecmp:
1411 * @str1: the first xmlChar *
1412 * @str2: the second xmlChar *
1413 * @len: the max comparison length
1414 *
1415 * a strncasecmp for xmlChar's
1416 *
1417 * Returns the integer result of the comparison
1418 */
1419
1420int
1421xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1422 register int tmp;
1423
1424 if (len <= 0) return(0);
1425 if (str1 == str2) return(0);
1426 if (str1 == NULL) return(-1);
1427 if (str2 == NULL) return(1);
1428 do {
1429 tmp = casemap[*str1++] - casemap[*str2];
1430 if (tmp != 0 || --len == 0) return(tmp);
1431 } while (*str2++ != 0);
1432 return 0;
1433}
1434
1435/**
1436 * xmlStrchr:
1437 * @str: the xmlChar * array
1438 * @val: the xmlChar to search
1439 *
1440 * a strchr for xmlChar's
1441 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001442 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001443 */
1444
1445const xmlChar *
1446xmlStrchr(const xmlChar *str, xmlChar val) {
1447 if (str == NULL) return(NULL);
1448 while (*str != 0) { /* non input consuming */
1449 if (*str == val) return((xmlChar *) str);
1450 str++;
1451 }
1452 return(NULL);
1453}
1454
1455/**
1456 * xmlStrstr:
1457 * @str: the xmlChar * array (haystack)
1458 * @val: the xmlChar to search (needle)
1459 *
1460 * a strstr for xmlChar's
1461 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001462 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001463 */
1464
1465const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001466xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001467 int n;
1468
1469 if (str == NULL) return(NULL);
1470 if (val == NULL) return(NULL);
1471 n = xmlStrlen(val);
1472
1473 if (n == 0) return(str);
1474 while (*str != 0) { /* non input consuming */
1475 if (*str == *val) {
1476 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1477 }
1478 str++;
1479 }
1480 return(NULL);
1481}
1482
1483/**
1484 * xmlStrcasestr:
1485 * @str: the xmlChar * array (haystack)
1486 * @val: the xmlChar to search (needle)
1487 *
1488 * a case-ignoring strstr for xmlChar's
1489 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001490 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001491 */
1492
1493const xmlChar *
1494xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1495 int n;
1496
1497 if (str == NULL) return(NULL);
1498 if (val == NULL) return(NULL);
1499 n = xmlStrlen(val);
1500
1501 if (n == 0) return(str);
1502 while (*str != 0) { /* non input consuming */
1503 if (casemap[*str] == casemap[*val])
1504 if (!xmlStrncasecmp(str, val, n)) return(str);
1505 str++;
1506 }
1507 return(NULL);
1508}
1509
1510/**
1511 * xmlStrsub:
1512 * @str: the xmlChar * array (haystack)
1513 * @start: the index of the first char (zero based)
1514 * @len: the length of the substring
1515 *
1516 * Extract a substring of a given string
1517 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001518 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001519 */
1520
1521xmlChar *
1522xmlStrsub(const xmlChar *str, int start, int len) {
1523 int i;
1524
1525 if (str == NULL) return(NULL);
1526 if (start < 0) return(NULL);
1527 if (len < 0) return(NULL);
1528
1529 for (i = 0;i < start;i++) {
1530 if (*str == 0) return(NULL);
1531 str++;
1532 }
1533 if (*str == 0) return(NULL);
1534 return(xmlStrndup(str, len));
1535}
1536
1537/**
1538 * xmlStrlen:
1539 * @str: the xmlChar * array
1540 *
1541 * length of a xmlChar's string
1542 *
1543 * Returns the number of xmlChar contained in the ARRAY.
1544 */
1545
1546int
1547xmlStrlen(const xmlChar *str) {
1548 int len = 0;
1549
1550 if (str == NULL) return(0);
1551 while (*str != 0) { /* non input consuming */
1552 str++;
1553 len++;
1554 }
1555 return(len);
1556}
1557
1558/**
1559 * xmlStrncat:
1560 * @cur: the original xmlChar * array
1561 * @add: the xmlChar * array added
1562 * @len: the length of @add
1563 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001564 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001565 * first bytes of @add.
1566 *
1567 * Returns a new xmlChar *, the original @cur is reallocated if needed
1568 * and should not be freed
1569 */
1570
1571xmlChar *
1572xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1573 int size;
1574 xmlChar *ret;
1575
1576 if ((add == NULL) || (len == 0))
1577 return(cur);
1578 if (cur == NULL)
1579 return(xmlStrndup(add, len));
1580
1581 size = xmlStrlen(cur);
1582 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1583 if (ret == NULL) {
1584 xmlGenericError(xmlGenericErrorContext,
1585 "xmlStrncat: realloc of %ld byte failed\n",
1586 (size + len + 1) * (long)sizeof(xmlChar));
1587 return(cur);
1588 }
1589 memcpy(&ret[size], add, len * sizeof(xmlChar));
1590 ret[size + len] = 0;
1591 return(ret);
1592}
1593
1594/**
1595 * xmlStrcat:
1596 * @cur: the original xmlChar * array
1597 * @add: the xmlChar * array added
1598 *
1599 * a strcat for array of xmlChar's. Since they are supposed to be
1600 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1601 * a termination mark of '0'.
1602 *
1603 * Returns a new xmlChar * containing the concatenated string.
1604 */
1605xmlChar *
1606xmlStrcat(xmlChar *cur, const xmlChar *add) {
1607 const xmlChar *p = add;
1608
1609 if (add == NULL) return(cur);
1610 if (cur == NULL)
1611 return(xmlStrdup(add));
1612
1613 while (*p != 0) p++; /* non input consuming */
1614 return(xmlStrncat(cur, add, p - add));
1615}
1616
1617/************************************************************************
1618 * *
1619 * Commodity functions, cleanup needed ? *
1620 * *
1621 ************************************************************************/
1622
1623/**
1624 * areBlanks:
1625 * @ctxt: an XML parser context
1626 * @str: a xmlChar *
1627 * @len: the size of @str
1628 *
1629 * Is this a sequence of blank chars that one can ignore ?
1630 *
1631 * Returns 1 if ignorable 0 otherwise.
1632 */
1633
1634static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1635 int i, ret;
1636 xmlNodePtr lastChild;
1637
Daniel Veillard05c13a22001-09-09 08:38:09 +00001638 /*
1639 * Don't spend time trying to differentiate them, the same callback is
1640 * used !
1641 */
1642 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001643 return(0);
1644
Owen Taylor3473f882001-02-23 17:55:21 +00001645 /*
1646 * Check for xml:space value.
1647 */
1648 if (*(ctxt->space) == 1)
1649 return(0);
1650
1651 /*
1652 * Check that the string is made of blanks
1653 */
1654 for (i = 0;i < len;i++)
1655 if (!(IS_BLANK(str[i]))) return(0);
1656
1657 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001658 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001659 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001660 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001661 if (ctxt->myDoc != NULL) {
1662 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1663 if (ret == 0) return(1);
1664 if (ret == 1) return(0);
1665 }
1666
1667 /*
1668 * Otherwise, heuristic :-\
1669 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001671 if ((ctxt->node->children == NULL) &&
1672 (RAW == '<') && (NXT(1) == '/')) return(0);
1673
1674 lastChild = xmlGetLastChild(ctxt->node);
1675 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001676 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1677 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001678 } else if (xmlNodeIsText(lastChild))
1679 return(0);
1680 else if ((ctxt->node->children != NULL) &&
1681 (xmlNodeIsText(ctxt->node->children)))
1682 return(0);
1683 return(1);
1684}
1685
Owen Taylor3473f882001-02-23 17:55:21 +00001686/************************************************************************
1687 * *
1688 * Extra stuff for namespace support *
1689 * Relates to http://www.w3.org/TR/WD-xml-names *
1690 * *
1691 ************************************************************************/
1692
1693/**
1694 * xmlSplitQName:
1695 * @ctxt: an XML parser context
1696 * @name: an XML parser context
1697 * @prefix: a xmlChar **
1698 *
1699 * parse an UTF8 encoded XML qualified name string
1700 *
1701 * [NS 5] QName ::= (Prefix ':')? LocalPart
1702 *
1703 * [NS 6] Prefix ::= NCName
1704 *
1705 * [NS 7] LocalPart ::= NCName
1706 *
1707 * Returns the local part, and prefix is updated
1708 * to get the Prefix if any.
1709 */
1710
1711xmlChar *
1712xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1713 xmlChar buf[XML_MAX_NAMELEN + 5];
1714 xmlChar *buffer = NULL;
1715 int len = 0;
1716 int max = XML_MAX_NAMELEN;
1717 xmlChar *ret = NULL;
1718 const xmlChar *cur = name;
1719 int c;
1720
1721 *prefix = NULL;
1722
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00001723 if (cur == NULL) return(NULL);
1724
Owen Taylor3473f882001-02-23 17:55:21 +00001725#ifndef XML_XML_NAMESPACE
1726 /* xml: prefix is not really a namespace */
1727 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1728 (cur[2] == 'l') && (cur[3] == ':'))
1729 return(xmlStrdup(name));
1730#endif
1731
Daniel Veillard597bc482003-07-24 16:08:28 +00001732 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00001733 if (cur[0] == ':')
1734 return(xmlStrdup(name));
1735
1736 c = *cur++;
1737 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1738 buf[len++] = c;
1739 c = *cur++;
1740 }
1741 if (len >= max) {
1742 /*
1743 * Okay someone managed to make a huge name, so he's ready to pay
1744 * for the processing speed.
1745 */
1746 max = len * 2;
1747
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001748 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001749 if (buffer == NULL) {
1750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1751 ctxt->sax->error(ctxt->userData,
1752 "xmlSplitQName: out of memory\n");
1753 return(NULL);
1754 }
1755 memcpy(buffer, buf, len);
1756 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1757 if (len + 10 > max) {
1758 max *= 2;
1759 buffer = (xmlChar *) xmlRealloc(buffer,
1760 max * sizeof(xmlChar));
1761 if (buffer == NULL) {
1762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1763 ctxt->sax->error(ctxt->userData,
1764 "xmlSplitQName: out of memory\n");
1765 return(NULL);
1766 }
1767 }
1768 buffer[len++] = c;
1769 c = *cur++;
1770 }
1771 buffer[len] = 0;
1772 }
1773
Daniel Veillard597bc482003-07-24 16:08:28 +00001774 /* nasty but well=formed
1775 if ((c == ':') && (*cur == 0)) {
1776 return(xmlStrdup(name));
1777 } */
1778
Owen Taylor3473f882001-02-23 17:55:21 +00001779 if (buffer == NULL)
1780 ret = xmlStrndup(buf, len);
1781 else {
1782 ret = buffer;
1783 buffer = NULL;
1784 max = XML_MAX_NAMELEN;
1785 }
1786
1787
1788 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001789 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001790 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00001791 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00001792 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00001793 }
Owen Taylor3473f882001-02-23 17:55:21 +00001794 len = 0;
1795
Daniel Veillardbb284f42002-10-16 18:02:47 +00001796 /*
1797 * Check that the first character is proper to start
1798 * a new name
1799 */
1800 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1801 ((c >= 0x41) && (c <= 0x5A)) ||
1802 (c == '_') || (c == ':'))) {
1803 int l;
1804 int first = CUR_SCHAR(cur, l);
1805
1806 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001807 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1808 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001809 ctxt->sax->error(ctxt->userData,
1810 "Name %s is not XML Namespace compliant\n",
1811 name);
1812 }
1813 }
1814 cur++;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1817 buf[len++] = c;
1818 c = *cur++;
1819 }
1820 if (len >= max) {
1821 /*
1822 * Okay someone managed to make a huge name, so he's ready to pay
1823 * for the processing speed.
1824 */
1825 max = len * 2;
1826
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001827 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001828 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001829 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1830 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001831 ctxt->sax->error(ctxt->userData,
1832 "xmlSplitQName: out of memory\n");
1833 return(NULL);
1834 }
1835 memcpy(buffer, buf, len);
1836 while (c != 0) { /* tested bigname2.xml */
1837 if (len + 10 > max) {
1838 max *= 2;
1839 buffer = (xmlChar *) xmlRealloc(buffer,
1840 max * sizeof(xmlChar));
1841 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001842 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1843 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001844 ctxt->sax->error(ctxt->userData,
1845 "xmlSplitQName: out of memory\n");
1846 return(NULL);
1847 }
1848 }
1849 buffer[len++] = c;
1850 c = *cur++;
1851 }
1852 buffer[len] = 0;
1853 }
1854
1855 if (buffer == NULL)
1856 ret = xmlStrndup(buf, len);
1857 else {
1858 ret = buffer;
1859 }
1860 }
1861
1862 return(ret);
1863}
1864
1865/************************************************************************
1866 * *
1867 * The parser itself *
1868 * Relates to http://www.w3.org/TR/REC-xml *
1869 * *
1870 ************************************************************************/
1871
Daniel Veillard76d66f42001-05-16 21:05:17 +00001872static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001873/**
1874 * xmlParseName:
1875 * @ctxt: an XML parser context
1876 *
1877 * parse an XML name.
1878 *
1879 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1880 * CombiningChar | Extender
1881 *
1882 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1883 *
1884 * [6] Names ::= Name (S Name)*
1885 *
1886 * Returns the Name parsed or NULL
1887 */
1888
1889xmlChar *
1890xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001891 const xmlChar *in;
1892 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001893 int count = 0;
1894
1895 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001896
1897 /*
1898 * Accelerator for simple ASCII names
1899 */
1900 in = ctxt->input->cur;
1901 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1902 ((*in >= 0x41) && (*in <= 0x5A)) ||
1903 (*in == '_') || (*in == ':')) {
1904 in++;
1905 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1906 ((*in >= 0x41) && (*in <= 0x5A)) ||
1907 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001908 (*in == '_') || (*in == '-') ||
1909 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001910 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001911 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001912 count = in - ctxt->input->cur;
1913 ret = xmlStrndup(ctxt->input->cur, count);
1914 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001915 ctxt->nbChars += count;
1916 ctxt->input->col += count;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00001917 if (ret == NULL) {
1918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1919 ctxt->sax->error(ctxt->userData,
1920 "XML parser: out of memory\n");
1921 ctxt->errNo = XML_ERR_NO_MEMORY;
1922 ctxt->instate = XML_PARSER_EOF;
1923 ctxt->disableSAX = 1;
1924 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001925 return(ret);
1926 }
1927 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001928 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001929}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001930
Daniel Veillard46de64e2002-05-29 08:21:33 +00001931/**
1932 * xmlParseNameAndCompare:
1933 * @ctxt: an XML parser context
1934 *
1935 * parse an XML name and compares for match
1936 * (specialized for endtag parsing)
1937 *
1938 *
1939 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1940 * and the name for mismatch
1941 */
1942
Daniel Veillardf4862f02002-09-10 11:13:43 +00001943static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001944xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1945 const xmlChar *cmp = other;
1946 const xmlChar *in;
1947 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001948
1949 GROW;
1950
1951 in = ctxt->input->cur;
1952 while (*in != 0 && *in == *cmp) {
1953 ++in;
1954 ++cmp;
1955 }
1956 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1957 /* success */
1958 ctxt->input->cur = in;
1959 return (xmlChar*) 1;
1960 }
1961 /* failure (or end of input buffer), check with full function */
1962 ret = xmlParseName (ctxt);
1963 if (ret != 0 && xmlStrEqual (ret, other)) {
1964 xmlFree (ret);
1965 return (xmlChar*) 1;
1966 }
1967 return ret;
1968}
1969
Daniel Veillard76d66f42001-05-16 21:05:17 +00001970static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001971xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1972 xmlChar buf[XML_MAX_NAMELEN + 5];
1973 int len = 0, l;
1974 int c;
1975 int count = 0;
1976
1977 /*
1978 * Handler for more complex cases
1979 */
1980 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001981 c = CUR_CHAR(l);
1982 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1983 (!IS_LETTER(c) && (c != '_') &&
1984 (c != ':'))) {
1985 return(NULL);
1986 }
1987
1988 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1989 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1990 (c == '.') || (c == '-') ||
1991 (c == '_') || (c == ':') ||
1992 (IS_COMBINING(c)) ||
1993 (IS_EXTENDER(c)))) {
1994 if (count++ > 100) {
1995 count = 0;
1996 GROW;
1997 }
1998 COPY_BUF(l,buf,len,c);
1999 NEXTL(l);
2000 c = CUR_CHAR(l);
2001 if (len >= XML_MAX_NAMELEN) {
2002 /*
2003 * Okay someone managed to make a huge name, so he's ready to pay
2004 * for the processing speed.
2005 */
2006 xmlChar *buffer;
2007 int max = len * 2;
2008
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002009 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002010 if (buffer == NULL) {
2011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2012 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00002013 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002014 return(NULL);
2015 }
2016 memcpy(buffer, buf, len);
2017 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
2018 (c == '.') || (c == '-') ||
2019 (c == '_') || (c == ':') ||
2020 (IS_COMBINING(c)) ||
2021 (IS_EXTENDER(c))) {
2022 if (count++ > 100) {
2023 count = 0;
2024 GROW;
2025 }
2026 if (len + 10 > max) {
2027 max *= 2;
2028 buffer = (xmlChar *) xmlRealloc(buffer,
2029 max * sizeof(xmlChar));
2030 if (buffer == NULL) {
2031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2032 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00002033 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002034 return(NULL);
2035 }
2036 }
2037 COPY_BUF(l,buffer,len,c);
2038 NEXTL(l);
2039 c = CUR_CHAR(l);
2040 }
2041 buffer[len] = 0;
2042 return(buffer);
2043 }
2044 }
2045 return(xmlStrndup(buf, len));
2046}
2047
2048/**
2049 * xmlParseStringName:
2050 * @ctxt: an XML parser context
2051 * @str: a pointer to the string pointer (IN/OUT)
2052 *
2053 * parse an XML name.
2054 *
2055 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2056 * CombiningChar | Extender
2057 *
2058 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2059 *
2060 * [6] Names ::= Name (S Name)*
2061 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002062 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002063 * is updated to the current location in the string.
2064 */
2065
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002066static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002067xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2068 xmlChar buf[XML_MAX_NAMELEN + 5];
2069 const xmlChar *cur = *str;
2070 int len = 0, l;
2071 int c;
2072
2073 c = CUR_SCHAR(cur, l);
2074 if (!IS_LETTER(c) && (c != '_') &&
2075 (c != ':')) {
2076 return(NULL);
2077 }
2078
2079 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2080 (c == '.') || (c == '-') ||
2081 (c == '_') || (c == ':') ||
2082 (IS_COMBINING(c)) ||
2083 (IS_EXTENDER(c))) {
2084 COPY_BUF(l,buf,len,c);
2085 cur += l;
2086 c = CUR_SCHAR(cur, l);
2087 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2088 /*
2089 * Okay someone managed to make a huge name, so he's ready to pay
2090 * for the processing speed.
2091 */
2092 xmlChar *buffer;
2093 int max = len * 2;
2094
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002095 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002096 if (buffer == NULL) {
2097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2098 ctxt->sax->error(ctxt->userData,
2099 "xmlParseStringName: out of memory\n");
2100 return(NULL);
2101 }
2102 memcpy(buffer, buf, len);
2103 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2104 (c == '.') || (c == '-') ||
2105 (c == '_') || (c == ':') ||
2106 (IS_COMBINING(c)) ||
2107 (IS_EXTENDER(c))) {
2108 if (len + 10 > max) {
2109 max *= 2;
2110 buffer = (xmlChar *) xmlRealloc(buffer,
2111 max * sizeof(xmlChar));
2112 if (buffer == NULL) {
2113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2114 ctxt->sax->error(ctxt->userData,
2115 "xmlParseStringName: out of memory\n");
2116 return(NULL);
2117 }
2118 }
2119 COPY_BUF(l,buffer,len,c);
2120 cur += l;
2121 c = CUR_SCHAR(cur, l);
2122 }
2123 buffer[len] = 0;
2124 *str = cur;
2125 return(buffer);
2126 }
2127 }
2128 *str = cur;
2129 return(xmlStrndup(buf, len));
2130}
2131
2132/**
2133 * xmlParseNmtoken:
2134 * @ctxt: an XML parser context
2135 *
2136 * parse an XML Nmtoken.
2137 *
2138 * [7] Nmtoken ::= (NameChar)+
2139 *
2140 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2141 *
2142 * Returns the Nmtoken parsed or NULL
2143 */
2144
2145xmlChar *
2146xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2147 xmlChar buf[XML_MAX_NAMELEN + 5];
2148 int len = 0, l;
2149 int c;
2150 int count = 0;
2151
2152 GROW;
2153 c = CUR_CHAR(l);
2154
2155 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2156 (c == '.') || (c == '-') ||
2157 (c == '_') || (c == ':') ||
2158 (IS_COMBINING(c)) ||
2159 (IS_EXTENDER(c))) {
2160 if (count++ > 100) {
2161 count = 0;
2162 GROW;
2163 }
2164 COPY_BUF(l,buf,len,c);
2165 NEXTL(l);
2166 c = CUR_CHAR(l);
2167 if (len >= XML_MAX_NAMELEN) {
2168 /*
2169 * Okay someone managed to make a huge token, so he's ready to pay
2170 * for the processing speed.
2171 */
2172 xmlChar *buffer;
2173 int max = len * 2;
2174
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002175 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002176 if (buffer == NULL) {
2177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2178 ctxt->sax->error(ctxt->userData,
2179 "xmlParseNmtoken: out of memory\n");
2180 return(NULL);
2181 }
2182 memcpy(buffer, buf, len);
2183 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2184 (c == '.') || (c == '-') ||
2185 (c == '_') || (c == ':') ||
2186 (IS_COMBINING(c)) ||
2187 (IS_EXTENDER(c))) {
2188 if (count++ > 100) {
2189 count = 0;
2190 GROW;
2191 }
2192 if (len + 10 > max) {
2193 max *= 2;
2194 buffer = (xmlChar *) xmlRealloc(buffer,
2195 max * sizeof(xmlChar));
2196 if (buffer == NULL) {
2197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2198 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002199 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002200 return(NULL);
2201 }
2202 }
2203 COPY_BUF(l,buffer,len,c);
2204 NEXTL(l);
2205 c = CUR_CHAR(l);
2206 }
2207 buffer[len] = 0;
2208 return(buffer);
2209 }
2210 }
2211 if (len == 0)
2212 return(NULL);
2213 return(xmlStrndup(buf, len));
2214}
2215
2216/**
2217 * xmlParseEntityValue:
2218 * @ctxt: an XML parser context
2219 * @orig: if non-NULL store a copy of the original entity value
2220 *
2221 * parse a value for ENTITY declarations
2222 *
2223 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2224 * "'" ([^%&'] | PEReference | Reference)* "'"
2225 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002226 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002227 */
2228
2229xmlChar *
2230xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2231 xmlChar *buf = NULL;
2232 int len = 0;
2233 int size = XML_PARSER_BUFFER_SIZE;
2234 int c, l;
2235 xmlChar stop;
2236 xmlChar *ret = NULL;
2237 const xmlChar *cur = NULL;
2238 xmlParserInputPtr input;
2239
2240 if (RAW == '"') stop = '"';
2241 else if (RAW == '\'') stop = '\'';
2242 else {
2243 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2245 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2246 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002247 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002248 return(NULL);
2249 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002250 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002251 if (buf == NULL) {
2252 xmlGenericError(xmlGenericErrorContext,
2253 "malloc of %d byte failed\n", size);
2254 return(NULL);
2255 }
2256
2257 /*
2258 * The content of the entity definition is copied in a buffer.
2259 */
2260
2261 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2262 input = ctxt->input;
2263 GROW;
2264 NEXT;
2265 c = CUR_CHAR(l);
2266 /*
2267 * NOTE: 4.4.5 Included in Literal
2268 * When a parameter entity reference appears in a literal entity
2269 * value, ... a single or double quote character in the replacement
2270 * text is always treated as a normal data character and will not
2271 * terminate the literal.
2272 * In practice it means we stop the loop only when back at parsing
2273 * the initial entity and the quote is found
2274 */
2275 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2276 (ctxt->input != input))) {
2277 if (len + 5 >= size) {
2278 size *= 2;
2279 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2280 if (buf == NULL) {
2281 xmlGenericError(xmlGenericErrorContext,
2282 "realloc of %d byte failed\n", size);
2283 return(NULL);
2284 }
2285 }
2286 COPY_BUF(l,buf,len,c);
2287 NEXTL(l);
2288 /*
2289 * Pop-up of finished entities.
2290 */
2291 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2292 xmlPopInput(ctxt);
2293
2294 GROW;
2295 c = CUR_CHAR(l);
2296 if (c == 0) {
2297 GROW;
2298 c = CUR_CHAR(l);
2299 }
2300 }
2301 buf[len] = 0;
2302
2303 /*
2304 * Raise problem w.r.t. '&' and '%' being used in non-entities
2305 * reference constructs. Note Charref will be handled in
2306 * xmlStringDecodeEntities()
2307 */
2308 cur = buf;
2309 while (*cur != 0) { /* non input consuming */
2310 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2311 xmlChar *name;
2312 xmlChar tmp = *cur;
2313
2314 cur++;
2315 name = xmlParseStringName(ctxt, &cur);
2316 if ((name == NULL) || (*cur != ';')) {
2317 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2318 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2319 ctxt->sax->error(ctxt->userData,
2320 "EntityValue: '%c' forbidden except for entities references\n",
2321 tmp);
2322 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002323 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002324 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002325 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2326 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002327 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2329 ctxt->sax->error(ctxt->userData,
2330 "EntityValue: PEReferences forbidden in internal subset\n",
2331 tmp);
2332 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002333 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002334 }
2335 if (name != NULL)
2336 xmlFree(name);
2337 }
2338 cur++;
2339 }
2340
2341 /*
2342 * Then PEReference entities are substituted.
2343 */
2344 if (c != stop) {
2345 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2347 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2348 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002349 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002350 xmlFree(buf);
2351 } else {
2352 NEXT;
2353 /*
2354 * NOTE: 4.4.7 Bypassed
2355 * When a general entity reference appears in the EntityValue in
2356 * an entity declaration, it is bypassed and left as is.
2357 * so XML_SUBSTITUTE_REF is not set here.
2358 */
2359 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2360 0, 0, 0);
2361 if (orig != NULL)
2362 *orig = buf;
2363 else
2364 xmlFree(buf);
2365 }
2366
2367 return(ret);
2368}
2369
2370/**
2371 * xmlParseAttValue:
2372 * @ctxt: an XML parser context
2373 *
2374 * parse a value for an attribute
2375 * Note: the parser won't do substitution of entities here, this
2376 * will be handled later in xmlStringGetNodeList
2377 *
2378 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2379 * "'" ([^<&'] | Reference)* "'"
2380 *
2381 * 3.3.3 Attribute-Value Normalization:
2382 * Before the value of an attribute is passed to the application or
2383 * checked for validity, the XML processor must normalize it as follows:
2384 * - a character reference is processed by appending the referenced
2385 * character to the attribute value
2386 * - an entity reference is processed by recursively processing the
2387 * replacement text of the entity
2388 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2389 * appending #x20 to the normalized value, except that only a single
2390 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2391 * parsed entity or the literal entity value of an internal parsed entity
2392 * - other characters are processed by appending them to the normalized value
2393 * If the declared value is not CDATA, then the XML processor must further
2394 * process the normalized attribute value by discarding any leading and
2395 * trailing space (#x20) characters, and by replacing sequences of space
2396 * (#x20) characters by a single space (#x20) character.
2397 * All attributes for which no declaration has been read should be treated
2398 * by a non-validating parser as if declared CDATA.
2399 *
2400 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2401 */
2402
2403xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002404xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2405
2406xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002407xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2408 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002409 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002410 xmlChar *ret = NULL;
2411 SHRINK;
2412 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002413 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002414 if (*in != '"' && *in != '\'') {
2415 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2417 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2418 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002419 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002420 return(NULL);
2421 }
2422 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2423 limit = *in;
2424 ++in;
2425
2426 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2427 *in != '&' && *in != '<'
2428 ) {
2429 ++in;
2430 }
2431 if (*in != limit) {
2432 return xmlParseAttValueComplex(ctxt);
2433 }
2434 ++in;
2435 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2436 CUR_PTR = in;
2437 return ret;
2438}
2439
Daniel Veillard01c13b52002-12-10 15:19:08 +00002440/**
2441 * xmlParseAttValueComplex:
2442 * @ctxt: an XML parser context
2443 *
2444 * parse a value for an attribute, this is the fallback function
2445 * of xmlParseAttValue() when the attribute parsing requires handling
2446 * of non-ASCII characters.
2447 *
2448 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2449 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002450xmlChar *
2451xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2452 xmlChar limit = 0;
2453 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002454 int len = 0;
2455 int buf_size = 0;
2456 int c, l;
2457 xmlChar *current = NULL;
2458 xmlEntityPtr ent;
2459
2460
2461 SHRINK;
2462 if (NXT(0) == '"') {
2463 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2464 limit = '"';
2465 NEXT;
2466 } else if (NXT(0) == '\'') {
2467 limit = '\'';
2468 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2469 NEXT;
2470 } else {
2471 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2473 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2474 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002475 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002476 return(NULL);
2477 }
2478
2479 /*
2480 * allocate a translation buffer.
2481 */
2482 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002483 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002484 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002485 xmlGenericError(xmlGenericErrorContext,
2486 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002487 return(NULL);
2488 }
2489
2490 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002491 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002492 */
2493 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002494 while ((NXT(0) != limit) && /* checked */
2495 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002496 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002497 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002498 if (NXT(1) == '#') {
2499 int val = xmlParseCharRef(ctxt);
2500 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002501 if (ctxt->replaceEntities) {
2502 if (len > buf_size - 10) {
2503 growBuffer(buf);
2504 }
2505 buf[len++] = '&';
2506 } else {
2507 /*
2508 * The reparsing will be done in xmlStringGetNodeList()
2509 * called by the attribute() function in SAX.c
2510 */
2511 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002512
Daniel Veillard319a7422001-09-11 09:27:09 +00002513 if (len > buf_size - 10) {
2514 growBuffer(buf);
2515 }
2516 current = &buffer[0];
2517 while (*current != 0) { /* non input consuming */
2518 buf[len++] = *current++;
2519 }
Owen Taylor3473f882001-02-23 17:55:21 +00002520 }
2521 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002522 if (len > buf_size - 10) {
2523 growBuffer(buf);
2524 }
Owen Taylor3473f882001-02-23 17:55:21 +00002525 len += xmlCopyChar(0, &buf[len], val);
2526 }
2527 } else {
2528 ent = xmlParseEntityRef(ctxt);
2529 if ((ent != NULL) &&
2530 (ctxt->replaceEntities != 0)) {
2531 xmlChar *rep;
2532
2533 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2534 rep = xmlStringDecodeEntities(ctxt, ent->content,
2535 XML_SUBSTITUTE_REF, 0, 0, 0);
2536 if (rep != NULL) {
2537 current = rep;
2538 while (*current != 0) { /* non input consuming */
2539 buf[len++] = *current++;
2540 if (len > buf_size - 10) {
2541 growBuffer(buf);
2542 }
2543 }
2544 xmlFree(rep);
2545 }
2546 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002547 if (len > buf_size - 10) {
2548 growBuffer(buf);
2549 }
Owen Taylor3473f882001-02-23 17:55:21 +00002550 if (ent->content != NULL)
2551 buf[len++] = ent->content[0];
2552 }
2553 } else if (ent != NULL) {
2554 int i = xmlStrlen(ent->name);
2555 const xmlChar *cur = ent->name;
2556
2557 /*
2558 * This may look absurd but is needed to detect
2559 * entities problems
2560 */
2561 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2562 (ent->content != NULL)) {
2563 xmlChar *rep;
2564 rep = xmlStringDecodeEntities(ctxt, ent->content,
2565 XML_SUBSTITUTE_REF, 0, 0, 0);
2566 if (rep != NULL)
2567 xmlFree(rep);
2568 }
2569
2570 /*
2571 * Just output the reference
2572 */
2573 buf[len++] = '&';
2574 if (len > buf_size - i - 10) {
2575 growBuffer(buf);
2576 }
2577 for (;i > 0;i--)
2578 buf[len++] = *cur++;
2579 buf[len++] = ';';
2580 }
2581 }
2582 } else {
2583 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2584 COPY_BUF(l,buf,len,0x20);
2585 if (len > buf_size - 10) {
2586 growBuffer(buf);
2587 }
2588 } else {
2589 COPY_BUF(l,buf,len,c);
2590 if (len > buf_size - 10) {
2591 growBuffer(buf);
2592 }
2593 }
2594 NEXTL(l);
2595 }
2596 GROW;
2597 c = CUR_CHAR(l);
2598 }
2599 buf[len++] = 0;
2600 if (RAW == '<') {
2601 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2603 ctxt->sax->error(ctxt->userData,
2604 "Unescaped '<' not allowed in attributes values\n");
2605 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002606 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002607 } else if (RAW != limit) {
2608 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2610 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2611 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002612 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002613 } else
2614 NEXT;
2615 return(buf);
2616}
2617
2618/**
2619 * xmlParseSystemLiteral:
2620 * @ctxt: an XML parser context
2621 *
2622 * parse an XML Literal
2623 *
2624 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2625 *
2626 * Returns the SystemLiteral parsed or NULL
2627 */
2628
2629xmlChar *
2630xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2631 xmlChar *buf = NULL;
2632 int len = 0;
2633 int size = XML_PARSER_BUFFER_SIZE;
2634 int cur, l;
2635 xmlChar stop;
2636 int state = ctxt->instate;
2637 int count = 0;
2638
2639 SHRINK;
2640 if (RAW == '"') {
2641 NEXT;
2642 stop = '"';
2643 } else if (RAW == '\'') {
2644 NEXT;
2645 stop = '\'';
2646 } else {
2647 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2649 ctxt->sax->error(ctxt->userData,
2650 "SystemLiteral \" or ' expected\n");
2651 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002652 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002653 return(NULL);
2654 }
2655
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002656 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002657 if (buf == NULL) {
2658 xmlGenericError(xmlGenericErrorContext,
2659 "malloc of %d byte failed\n", size);
2660 return(NULL);
2661 }
2662 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2663 cur = CUR_CHAR(l);
2664 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2665 if (len + 5 >= size) {
2666 size *= 2;
2667 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2668 if (buf == NULL) {
2669 xmlGenericError(xmlGenericErrorContext,
2670 "realloc of %d byte failed\n", size);
2671 ctxt->instate = (xmlParserInputState) state;
2672 return(NULL);
2673 }
2674 }
2675 count++;
2676 if (count > 50) {
2677 GROW;
2678 count = 0;
2679 }
2680 COPY_BUF(l,buf,len,cur);
2681 NEXTL(l);
2682 cur = CUR_CHAR(l);
2683 if (cur == 0) {
2684 GROW;
2685 SHRINK;
2686 cur = CUR_CHAR(l);
2687 }
2688 }
2689 buf[len] = 0;
2690 ctxt->instate = (xmlParserInputState) state;
2691 if (!IS_CHAR(cur)) {
2692 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2694 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2695 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002696 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002697 } else {
2698 NEXT;
2699 }
2700 return(buf);
2701}
2702
2703/**
2704 * xmlParsePubidLiteral:
2705 * @ctxt: an XML parser context
2706 *
2707 * parse an XML public literal
2708 *
2709 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2710 *
2711 * Returns the PubidLiteral parsed or NULL.
2712 */
2713
2714xmlChar *
2715xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2716 xmlChar *buf = NULL;
2717 int len = 0;
2718 int size = XML_PARSER_BUFFER_SIZE;
2719 xmlChar cur;
2720 xmlChar stop;
2721 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002722 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002723
2724 SHRINK;
2725 if (RAW == '"') {
2726 NEXT;
2727 stop = '"';
2728 } else if (RAW == '\'') {
2729 NEXT;
2730 stop = '\'';
2731 } else {
2732 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2734 ctxt->sax->error(ctxt->userData,
2735 "SystemLiteral \" or ' expected\n");
2736 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002737 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002738 return(NULL);
2739 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002740 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002741 if (buf == NULL) {
2742 xmlGenericError(xmlGenericErrorContext,
2743 "malloc of %d byte failed\n", size);
2744 return(NULL);
2745 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002746 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002747 cur = CUR;
2748 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2749 if (len + 1 >= size) {
2750 size *= 2;
2751 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2752 if (buf == NULL) {
2753 xmlGenericError(xmlGenericErrorContext,
2754 "realloc of %d byte failed\n", size);
2755 return(NULL);
2756 }
2757 }
2758 buf[len++] = cur;
2759 count++;
2760 if (count > 50) {
2761 GROW;
2762 count = 0;
2763 }
2764 NEXT;
2765 cur = CUR;
2766 if (cur == 0) {
2767 GROW;
2768 SHRINK;
2769 cur = CUR;
2770 }
2771 }
2772 buf[len] = 0;
2773 if (cur != stop) {
2774 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2776 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2777 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002778 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002779 } else {
2780 NEXT;
2781 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002782 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002783 return(buf);
2784}
2785
Daniel Veillard48b2f892001-02-25 16:11:03 +00002786void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002787/**
2788 * xmlParseCharData:
2789 * @ctxt: an XML parser context
2790 * @cdata: int indicating whether we are within a CDATA section
2791 *
2792 * parse a CharData section.
2793 * if we are within a CDATA section ']]>' marks an end of section.
2794 *
2795 * The right angle bracket (>) may be represented using the string "&gt;",
2796 * and must, for compatibility, be escaped using "&gt;" or a character
2797 * reference when it appears in the string "]]>" in content, when that
2798 * string is not marking the end of a CDATA section.
2799 *
2800 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2801 */
2802
2803void
2804xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002805 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002806 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002807 int line = ctxt->input->line;
2808 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002809
2810 SHRINK;
2811 GROW;
2812 /*
2813 * Accelerated common case where input don't need to be
2814 * modified before passing it to the handler.
2815 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002816 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002817 in = ctxt->input->cur;
2818 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002819get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002820 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2821 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002822 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002823 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002824 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002825 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002826 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002827 ctxt->input->line++;
2828 in++;
2829 }
2830 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002831 }
2832 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002833 if ((in[1] == ']') && (in[2] == '>')) {
2834 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2836 ctxt->sax->error(ctxt->userData,
2837 "Sequence ']]>' not allowed in content\n");
2838 ctxt->input->cur = in;
2839 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002840 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002841 return;
2842 }
2843 in++;
2844 goto get_more;
2845 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002846 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002847 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002848 if (IS_BLANK(*ctxt->input->cur)) {
2849 const xmlChar *tmp = ctxt->input->cur;
2850 ctxt->input->cur = in;
2851 if (areBlanks(ctxt, tmp, nbchar)) {
2852 if (ctxt->sax->ignorableWhitespace != NULL)
2853 ctxt->sax->ignorableWhitespace(ctxt->userData,
2854 tmp, nbchar);
2855 } else {
2856 if (ctxt->sax->characters != NULL)
2857 ctxt->sax->characters(ctxt->userData,
2858 tmp, nbchar);
2859 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002860 line = ctxt->input->line;
2861 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002862 } else {
2863 if (ctxt->sax->characters != NULL)
2864 ctxt->sax->characters(ctxt->userData,
2865 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002866 line = ctxt->input->line;
2867 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002868 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002869 }
2870 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002871 if (*in == 0xD) {
2872 in++;
2873 if (*in == 0xA) {
2874 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002875 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002876 ctxt->input->line++;
2877 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002878 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002879 in--;
2880 }
2881 if (*in == '<') {
2882 return;
2883 }
2884 if (*in == '&') {
2885 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002886 }
2887 SHRINK;
2888 GROW;
2889 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002890 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002891 nbchar = 0;
2892 }
Daniel Veillard50582112001-03-26 22:52:16 +00002893 ctxt->input->line = line;
2894 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002895 xmlParseCharDataComplex(ctxt, cdata);
2896}
2897
Daniel Veillard01c13b52002-12-10 15:19:08 +00002898/**
2899 * xmlParseCharDataComplex:
2900 * @ctxt: an XML parser context
2901 * @cdata: int indicating whether we are within a CDATA section
2902 *
2903 * parse a CharData section.this is the fallback function
2904 * of xmlParseCharData() when the parsing requires handling
2905 * of non-ASCII characters.
2906 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002907void
2908xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002909 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2910 int nbchar = 0;
2911 int cur, l;
2912 int count = 0;
2913
2914 SHRINK;
2915 GROW;
2916 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002917 while ((cur != '<') && /* checked */
2918 (cur != '&') &&
2919 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002920 if ((cur == ']') && (NXT(1) == ']') &&
2921 (NXT(2) == '>')) {
2922 if (cdata) break;
2923 else {
2924 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2926 ctxt->sax->error(ctxt->userData,
2927 "Sequence ']]>' not allowed in content\n");
2928 /* Should this be relaxed ??? I see a "must here */
2929 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002930 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002931 }
2932 }
2933 COPY_BUF(l,buf,nbchar,cur);
2934 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2935 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002936 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002937 */
2938 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2939 if (areBlanks(ctxt, buf, nbchar)) {
2940 if (ctxt->sax->ignorableWhitespace != NULL)
2941 ctxt->sax->ignorableWhitespace(ctxt->userData,
2942 buf, nbchar);
2943 } else {
2944 if (ctxt->sax->characters != NULL)
2945 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2946 }
2947 }
2948 nbchar = 0;
2949 }
2950 count++;
2951 if (count > 50) {
2952 GROW;
2953 count = 0;
2954 }
2955 NEXTL(l);
2956 cur = CUR_CHAR(l);
2957 }
2958 if (nbchar != 0) {
2959 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002960 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002961 */
2962 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2963 if (areBlanks(ctxt, buf, nbchar)) {
2964 if (ctxt->sax->ignorableWhitespace != NULL)
2965 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2966 } else {
2967 if (ctxt->sax->characters != NULL)
2968 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2969 }
2970 }
2971 }
2972}
2973
2974/**
2975 * xmlParseExternalID:
2976 * @ctxt: an XML parser context
2977 * @publicID: a xmlChar** receiving PubidLiteral
2978 * @strict: indicate whether we should restrict parsing to only
2979 * production [75], see NOTE below
2980 *
2981 * Parse an External ID or a Public ID
2982 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002983 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002984 * 'PUBLIC' S PubidLiteral S SystemLiteral
2985 *
2986 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2987 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2988 *
2989 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2990 *
2991 * Returns the function returns SystemLiteral and in the second
2992 * case publicID receives PubidLiteral, is strict is off
2993 * it is possible to return NULL and have publicID set.
2994 */
2995
2996xmlChar *
2997xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2998 xmlChar *URI = NULL;
2999
3000 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003001
3002 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003003 if ((RAW == 'S') && (NXT(1) == 'Y') &&
3004 (NXT(2) == 'S') && (NXT(3) == 'T') &&
3005 (NXT(4) == 'E') && (NXT(5) == 'M')) {
3006 SKIP(6);
3007 if (!IS_BLANK(CUR)) {
3008 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3010 ctxt->sax->error(ctxt->userData,
3011 "Space required after 'SYSTEM'\n");
3012 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003013 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003014 }
3015 SKIP_BLANKS;
3016 URI = xmlParseSystemLiteral(ctxt);
3017 if (URI == NULL) {
3018 ctxt->errNo = XML_ERR_URI_REQUIRED;
3019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3020 ctxt->sax->error(ctxt->userData,
3021 "xmlParseExternalID: SYSTEM, no URI\n");
3022 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003023 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003024 }
3025 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3026 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3027 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3028 SKIP(6);
3029 if (!IS_BLANK(CUR)) {
3030 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3032 ctxt->sax->error(ctxt->userData,
3033 "Space required after 'PUBLIC'\n");
3034 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003035 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003036 }
3037 SKIP_BLANKS;
3038 *publicID = xmlParsePubidLiteral(ctxt);
3039 if (*publicID == NULL) {
3040 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
3041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3042 ctxt->sax->error(ctxt->userData,
3043 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
3044 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003045 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 }
3047 if (strict) {
3048 /*
3049 * We don't handle [83] so "S SystemLiteral" is required.
3050 */
3051 if (!IS_BLANK(CUR)) {
3052 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3054 ctxt->sax->error(ctxt->userData,
3055 "Space required after the Public Identifier\n");
3056 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003058 }
3059 } else {
3060 /*
3061 * We handle [83] so we return immediately, if
3062 * "S SystemLiteral" is not detected. From a purely parsing
3063 * point of view that's a nice mess.
3064 */
3065 const xmlChar *ptr;
3066 GROW;
3067
3068 ptr = CUR_PTR;
3069 if (!IS_BLANK(*ptr)) return(NULL);
3070
3071 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3072 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3073 }
3074 SKIP_BLANKS;
3075 URI = xmlParseSystemLiteral(ctxt);
3076 if (URI == NULL) {
3077 ctxt->errNo = XML_ERR_URI_REQUIRED;
3078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3079 ctxt->sax->error(ctxt->userData,
3080 "xmlParseExternalID: PUBLIC, no URI\n");
3081 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003082 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003083 }
3084 }
3085 return(URI);
3086}
3087
3088/**
3089 * xmlParseComment:
3090 * @ctxt: an XML parser context
3091 *
3092 * Skip an XML (SGML) comment <!-- .... -->
3093 * The spec says that "For compatibility, the string "--" (double-hyphen)
3094 * must not occur within comments. "
3095 *
3096 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3097 */
3098void
3099xmlParseComment(xmlParserCtxtPtr ctxt) {
3100 xmlChar *buf = NULL;
3101 int len;
3102 int size = XML_PARSER_BUFFER_SIZE;
3103 int q, ql;
3104 int r, rl;
3105 int cur, l;
3106 xmlParserInputState state;
3107 xmlParserInputPtr input = ctxt->input;
3108 int count = 0;
3109
3110 /*
3111 * Check that there is a comment right here.
3112 */
3113 if ((RAW != '<') || (NXT(1) != '!') ||
3114 (NXT(2) != '-') || (NXT(3) != '-')) return;
3115
3116 state = ctxt->instate;
3117 ctxt->instate = XML_PARSER_COMMENT;
3118 SHRINK;
3119 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003120 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003121 if (buf == NULL) {
3122 xmlGenericError(xmlGenericErrorContext,
3123 "malloc of %d byte failed\n", size);
3124 ctxt->instate = state;
3125 return;
3126 }
3127 q = CUR_CHAR(ql);
3128 NEXTL(ql);
3129 r = CUR_CHAR(rl);
3130 NEXTL(rl);
3131 cur = CUR_CHAR(l);
3132 len = 0;
3133 while (IS_CHAR(cur) && /* checked */
3134 ((cur != '>') ||
3135 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003136 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003137 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3139 ctxt->sax->error(ctxt->userData,
3140 "Comment must not contain '--' (double-hyphen)`\n");
3141 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003142 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003143 }
3144 if (len + 5 >= size) {
3145 size *= 2;
3146 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3147 if (buf == NULL) {
3148 xmlGenericError(xmlGenericErrorContext,
3149 "realloc of %d byte failed\n", size);
3150 ctxt->instate = state;
3151 return;
3152 }
3153 }
3154 COPY_BUF(ql,buf,len,q);
3155 q = r;
3156 ql = rl;
3157 r = cur;
3158 rl = l;
3159
3160 count++;
3161 if (count > 50) {
3162 GROW;
3163 count = 0;
3164 }
3165 NEXTL(l);
3166 cur = CUR_CHAR(l);
3167 if (cur == 0) {
3168 SHRINK;
3169 GROW;
3170 cur = CUR_CHAR(l);
3171 }
3172 }
3173 buf[len] = 0;
3174 if (!IS_CHAR(cur)) {
3175 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3177 ctxt->sax->error(ctxt->userData,
3178 "Comment not terminated \n<!--%.50s\n", buf);
3179 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003180 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003181 xmlFree(buf);
3182 } else {
3183 if (input != ctxt->input) {
3184 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3186 ctxt->sax->error(ctxt->userData,
3187"Comment doesn't start and stop in the same entity\n");
3188 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003189 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003190 }
3191 NEXT;
3192 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3193 (!ctxt->disableSAX))
3194 ctxt->sax->comment(ctxt->userData, buf);
3195 xmlFree(buf);
3196 }
3197 ctxt->instate = state;
3198}
3199
3200/**
3201 * xmlParsePITarget:
3202 * @ctxt: an XML parser context
3203 *
3204 * parse the name of a PI
3205 *
3206 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3207 *
3208 * Returns the PITarget name or NULL
3209 */
3210
3211xmlChar *
3212xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3213 xmlChar *name;
3214
3215 name = xmlParseName(ctxt);
3216 if ((name != NULL) &&
3217 ((name[0] == 'x') || (name[0] == 'X')) &&
3218 ((name[1] == 'm') || (name[1] == 'M')) &&
3219 ((name[2] == 'l') || (name[2] == 'L'))) {
3220 int i;
3221 if ((name[0] == 'x') && (name[1] == 'm') &&
3222 (name[2] == 'l') && (name[3] == 0)) {
3223 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3225 ctxt->sax->error(ctxt->userData,
3226 "XML declaration allowed only at the start of the document\n");
3227 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003228 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003229 return(name);
3230 } else if (name[3] == 0) {
3231 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3233 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3234 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003235 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003236 return(name);
3237 }
3238 for (i = 0;;i++) {
3239 if (xmlW3CPIs[i] == NULL) break;
3240 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3241 return(name);
3242 }
3243 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3244 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3245 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003246 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003247 }
3248 }
3249 return(name);
3250}
3251
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003252#ifdef LIBXML_CATALOG_ENABLED
3253/**
3254 * xmlParseCatalogPI:
3255 * @ctxt: an XML parser context
3256 * @catalog: the PI value string
3257 *
3258 * parse an XML Catalog Processing Instruction.
3259 *
3260 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3261 *
3262 * Occurs only if allowed by the user and if happening in the Misc
3263 * part of the document before any doctype informations
3264 * This will add the given catalog to the parsing context in order
3265 * to be used if there is a resolution need further down in the document
3266 */
3267
3268static void
3269xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3270 xmlChar *URL = NULL;
3271 const xmlChar *tmp, *base;
3272 xmlChar marker;
3273
3274 tmp = catalog;
3275 while (IS_BLANK(*tmp)) tmp++;
3276 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3277 goto error;
3278 tmp += 7;
3279 while (IS_BLANK(*tmp)) tmp++;
3280 if (*tmp != '=') {
3281 return;
3282 }
3283 tmp++;
3284 while (IS_BLANK(*tmp)) tmp++;
3285 marker = *tmp;
3286 if ((marker != '\'') && (marker != '"'))
3287 goto error;
3288 tmp++;
3289 base = tmp;
3290 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3291 if (*tmp == 0)
3292 goto error;
3293 URL = xmlStrndup(base, tmp - base);
3294 tmp++;
3295 while (IS_BLANK(*tmp)) tmp++;
3296 if (*tmp != 0)
3297 goto error;
3298
3299 if (URL != NULL) {
3300 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3301 xmlFree(URL);
3302 }
3303 return;
3304
3305error:
3306 ctxt->errNo = XML_WAR_CATALOG_PI;
3307 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3308 ctxt->sax->warning(ctxt->userData,
3309 "Catalog PI syntax error: %s\n", catalog);
3310 if (URL != NULL)
3311 xmlFree(URL);
3312}
3313#endif
3314
Owen Taylor3473f882001-02-23 17:55:21 +00003315/**
3316 * xmlParsePI:
3317 * @ctxt: an XML parser context
3318 *
3319 * parse an XML Processing Instruction.
3320 *
3321 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3322 *
3323 * The processing is transfered to SAX once parsed.
3324 */
3325
3326void
3327xmlParsePI(xmlParserCtxtPtr ctxt) {
3328 xmlChar *buf = NULL;
3329 int len = 0;
3330 int size = XML_PARSER_BUFFER_SIZE;
3331 int cur, l;
3332 xmlChar *target;
3333 xmlParserInputState state;
3334 int count = 0;
3335
3336 if ((RAW == '<') && (NXT(1) == '?')) {
3337 xmlParserInputPtr input = ctxt->input;
3338 state = ctxt->instate;
3339 ctxt->instate = XML_PARSER_PI;
3340 /*
3341 * this is a Processing Instruction.
3342 */
3343 SKIP(2);
3344 SHRINK;
3345
3346 /*
3347 * Parse the target name and check for special support like
3348 * namespace.
3349 */
3350 target = xmlParsePITarget(ctxt);
3351 if (target != NULL) {
3352 if ((RAW == '?') && (NXT(1) == '>')) {
3353 if (input != ctxt->input) {
3354 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3356 ctxt->sax->error(ctxt->userData,
3357 "PI declaration doesn't start and stop in the same entity\n");
3358 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003359 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003360 }
3361 SKIP(2);
3362
3363 /*
3364 * SAX: PI detected.
3365 */
3366 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3367 (ctxt->sax->processingInstruction != NULL))
3368 ctxt->sax->processingInstruction(ctxt->userData,
3369 target, NULL);
3370 ctxt->instate = state;
3371 xmlFree(target);
3372 return;
3373 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003374 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003375 if (buf == NULL) {
3376 xmlGenericError(xmlGenericErrorContext,
3377 "malloc of %d byte failed\n", size);
3378 ctxt->instate = state;
3379 return;
3380 }
3381 cur = CUR;
3382 if (!IS_BLANK(cur)) {
3383 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3385 ctxt->sax->error(ctxt->userData,
3386 "xmlParsePI: PI %s space expected\n", target);
3387 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003388 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003389 }
3390 SKIP_BLANKS;
3391 cur = CUR_CHAR(l);
3392 while (IS_CHAR(cur) && /* checked */
3393 ((cur != '?') || (NXT(1) != '>'))) {
3394 if (len + 5 >= size) {
3395 size *= 2;
3396 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3397 if (buf == NULL) {
3398 xmlGenericError(xmlGenericErrorContext,
3399 "realloc of %d byte failed\n", size);
3400 ctxt->instate = state;
3401 return;
3402 }
3403 }
3404 count++;
3405 if (count > 50) {
3406 GROW;
3407 count = 0;
3408 }
3409 COPY_BUF(l,buf,len,cur);
3410 NEXTL(l);
3411 cur = CUR_CHAR(l);
3412 if (cur == 0) {
3413 SHRINK;
3414 GROW;
3415 cur = CUR_CHAR(l);
3416 }
3417 }
3418 buf[len] = 0;
3419 if (cur != '?') {
3420 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3421 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3422 ctxt->sax->error(ctxt->userData,
3423 "xmlParsePI: PI %s never end ...\n", target);
3424 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003425 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003426 } else {
3427 if (input != ctxt->input) {
3428 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3430 ctxt->sax->error(ctxt->userData,
3431 "PI declaration doesn't start and stop in the same entity\n");
3432 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003433 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003434 }
3435 SKIP(2);
3436
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003437#ifdef LIBXML_CATALOG_ENABLED
3438 if (((state == XML_PARSER_MISC) ||
3439 (state == XML_PARSER_START)) &&
3440 (xmlStrEqual(target, XML_CATALOG_PI))) {
3441 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3442 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3443 (allow == XML_CATA_ALLOW_ALL))
3444 xmlParseCatalogPI(ctxt, buf);
3445 }
3446#endif
3447
3448
Owen Taylor3473f882001-02-23 17:55:21 +00003449 /*
3450 * SAX: PI detected.
3451 */
3452 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3453 (ctxt->sax->processingInstruction != NULL))
3454 ctxt->sax->processingInstruction(ctxt->userData,
3455 target, buf);
3456 }
3457 xmlFree(buf);
3458 xmlFree(target);
3459 } else {
3460 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3462 ctxt->sax->error(ctxt->userData,
3463 "xmlParsePI : no target name\n");
3464 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003465 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003466 }
3467 ctxt->instate = state;
3468 }
3469}
3470
3471/**
3472 * xmlParseNotationDecl:
3473 * @ctxt: an XML parser context
3474 *
3475 * parse a notation declaration
3476 *
3477 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3478 *
3479 * Hence there is actually 3 choices:
3480 * 'PUBLIC' S PubidLiteral
3481 * 'PUBLIC' S PubidLiteral S SystemLiteral
3482 * and 'SYSTEM' S SystemLiteral
3483 *
3484 * See the NOTE on xmlParseExternalID().
3485 */
3486
3487void
3488xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3489 xmlChar *name;
3490 xmlChar *Pubid;
3491 xmlChar *Systemid;
3492
3493 if ((RAW == '<') && (NXT(1) == '!') &&
3494 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3495 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3496 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3497 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3498 xmlParserInputPtr input = ctxt->input;
3499 SHRINK;
3500 SKIP(10);
3501 if (!IS_BLANK(CUR)) {
3502 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3503 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3504 ctxt->sax->error(ctxt->userData,
3505 "Space required after '<!NOTATION'\n");
3506 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003507 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003508 return;
3509 }
3510 SKIP_BLANKS;
3511
Daniel Veillard76d66f42001-05-16 21:05:17 +00003512 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003513 if (name == NULL) {
3514 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3516 ctxt->sax->error(ctxt->userData,
3517 "NOTATION: Name expected here\n");
3518 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003519 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003520 return;
3521 }
3522 if (!IS_BLANK(CUR)) {
3523 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3525 ctxt->sax->error(ctxt->userData,
3526 "Space required after the NOTATION name'\n");
3527 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003528 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003529 return;
3530 }
3531 SKIP_BLANKS;
3532
3533 /*
3534 * Parse the IDs.
3535 */
3536 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3537 SKIP_BLANKS;
3538
3539 if (RAW == '>') {
3540 if (input != ctxt->input) {
3541 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3542 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3543 ctxt->sax->error(ctxt->userData,
3544"Notation declaration doesn't start and stop in the same entity\n");
3545 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003546 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003547 }
3548 NEXT;
3549 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3550 (ctxt->sax->notationDecl != NULL))
3551 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3552 } else {
3553 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3555 ctxt->sax->error(ctxt->userData,
3556 "'>' required to close NOTATION declaration\n");
3557 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003558 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003559 }
3560 xmlFree(name);
3561 if (Systemid != NULL) xmlFree(Systemid);
3562 if (Pubid != NULL) xmlFree(Pubid);
3563 }
3564}
3565
3566/**
3567 * xmlParseEntityDecl:
3568 * @ctxt: an XML parser context
3569 *
3570 * parse <!ENTITY declarations
3571 *
3572 * [70] EntityDecl ::= GEDecl | PEDecl
3573 *
3574 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3575 *
3576 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3577 *
3578 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3579 *
3580 * [74] PEDef ::= EntityValue | ExternalID
3581 *
3582 * [76] NDataDecl ::= S 'NDATA' S Name
3583 *
3584 * [ VC: Notation Declared ]
3585 * The Name must match the declared name of a notation.
3586 */
3587
3588void
3589xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3590 xmlChar *name = NULL;
3591 xmlChar *value = NULL;
3592 xmlChar *URI = NULL, *literal = NULL;
3593 xmlChar *ndata = NULL;
3594 int isParameter = 0;
3595 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003596 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003597
3598 GROW;
3599 if ((RAW == '<') && (NXT(1) == '!') &&
3600 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3601 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3602 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3603 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003604 SHRINK;
3605 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003606 skipped = SKIP_BLANKS;
3607 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003608 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3610 ctxt->sax->error(ctxt->userData,
3611 "Space required after '<!ENTITY'\n");
3612 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003613 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003614 }
Owen Taylor3473f882001-02-23 17:55:21 +00003615
3616 if (RAW == '%') {
3617 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003618 skipped = SKIP_BLANKS;
3619 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003620 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3622 ctxt->sax->error(ctxt->userData,
3623 "Space required after '%'\n");
3624 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003625 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003626 }
Owen Taylor3473f882001-02-23 17:55:21 +00003627 isParameter = 1;
3628 }
3629
Daniel Veillard76d66f42001-05-16 21:05:17 +00003630 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003631 if (name == NULL) {
3632 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3634 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3635 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003636 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003637 return;
3638 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003639 skipped = SKIP_BLANKS;
3640 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003641 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3643 ctxt->sax->error(ctxt->userData,
3644 "Space required after the entity name\n");
3645 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003646 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003647 }
Owen Taylor3473f882001-02-23 17:55:21 +00003648
Daniel Veillardf5582f12002-06-11 10:08:16 +00003649 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003650 /*
3651 * handle the various case of definitions...
3652 */
3653 if (isParameter) {
3654 if ((RAW == '"') || (RAW == '\'')) {
3655 value = xmlParseEntityValue(ctxt, &orig);
3656 if (value) {
3657 if ((ctxt->sax != NULL) &&
3658 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3659 ctxt->sax->entityDecl(ctxt->userData, name,
3660 XML_INTERNAL_PARAMETER_ENTITY,
3661 NULL, NULL, value);
3662 }
3663 } else {
3664 URI = xmlParseExternalID(ctxt, &literal, 1);
3665 if ((URI == NULL) && (literal == NULL)) {
3666 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3668 ctxt->sax->error(ctxt->userData,
3669 "Entity value required\n");
3670 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003671 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003672 }
3673 if (URI) {
3674 xmlURIPtr uri;
3675
3676 uri = xmlParseURI((const char *) URI);
3677 if (uri == NULL) {
3678 ctxt->errNo = XML_ERR_INVALID_URI;
3679 if ((ctxt->sax != NULL) &&
3680 (!ctxt->disableSAX) &&
3681 (ctxt->sax->error != NULL))
3682 ctxt->sax->error(ctxt->userData,
3683 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003684 /*
3685 * This really ought to be a well formedness error
3686 * but the XML Core WG decided otherwise c.f. issue
3687 * E26 of the XML erratas.
3688 */
Owen Taylor3473f882001-02-23 17:55:21 +00003689 } else {
3690 if (uri->fragment != NULL) {
3691 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3692 if ((ctxt->sax != NULL) &&
3693 (!ctxt->disableSAX) &&
3694 (ctxt->sax->error != NULL))
3695 ctxt->sax->error(ctxt->userData,
3696 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003697 /*
3698 * Okay this is foolish to block those but not
3699 * invalid URIs.
3700 */
Owen Taylor3473f882001-02-23 17:55:21 +00003701 ctxt->wellFormed = 0;
3702 } else {
3703 if ((ctxt->sax != NULL) &&
3704 (!ctxt->disableSAX) &&
3705 (ctxt->sax->entityDecl != NULL))
3706 ctxt->sax->entityDecl(ctxt->userData, name,
3707 XML_EXTERNAL_PARAMETER_ENTITY,
3708 literal, URI, NULL);
3709 }
3710 xmlFreeURI(uri);
3711 }
3712 }
3713 }
3714 } else {
3715 if ((RAW == '"') || (RAW == '\'')) {
3716 value = xmlParseEntityValue(ctxt, &orig);
3717 if ((ctxt->sax != NULL) &&
3718 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3719 ctxt->sax->entityDecl(ctxt->userData, name,
3720 XML_INTERNAL_GENERAL_ENTITY,
3721 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003722 /*
3723 * For expat compatibility in SAX mode.
3724 */
3725 if ((ctxt->myDoc == NULL) ||
3726 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3727 if (ctxt->myDoc == NULL) {
3728 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3729 }
3730 if (ctxt->myDoc->intSubset == NULL)
3731 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3732 BAD_CAST "fake", NULL, NULL);
3733
3734 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3735 NULL, NULL, value);
3736 }
Owen Taylor3473f882001-02-23 17:55:21 +00003737 } else {
3738 URI = xmlParseExternalID(ctxt, &literal, 1);
3739 if ((URI == NULL) && (literal == NULL)) {
3740 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3742 ctxt->sax->error(ctxt->userData,
3743 "Entity value required\n");
3744 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003745 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003746 }
3747 if (URI) {
3748 xmlURIPtr uri;
3749
3750 uri = xmlParseURI((const char *)URI);
3751 if (uri == NULL) {
3752 ctxt->errNo = XML_ERR_INVALID_URI;
3753 if ((ctxt->sax != NULL) &&
3754 (!ctxt->disableSAX) &&
3755 (ctxt->sax->error != NULL))
3756 ctxt->sax->error(ctxt->userData,
3757 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003758 /*
3759 * This really ought to be a well formedness error
3760 * but the XML Core WG decided otherwise c.f. issue
3761 * E26 of the XML erratas.
3762 */
Owen Taylor3473f882001-02-23 17:55:21 +00003763 } else {
3764 if (uri->fragment != NULL) {
3765 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3766 if ((ctxt->sax != NULL) &&
3767 (!ctxt->disableSAX) &&
3768 (ctxt->sax->error != NULL))
3769 ctxt->sax->error(ctxt->userData,
3770 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003771 /*
3772 * Okay this is foolish to block those but not
3773 * invalid URIs.
3774 */
Owen Taylor3473f882001-02-23 17:55:21 +00003775 ctxt->wellFormed = 0;
3776 }
3777 xmlFreeURI(uri);
3778 }
3779 }
3780 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3781 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3783 ctxt->sax->error(ctxt->userData,
3784 "Space required before 'NDATA'\n");
3785 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003786 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003787 }
3788 SKIP_BLANKS;
3789 if ((RAW == 'N') && (NXT(1) == 'D') &&
3790 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3791 (NXT(4) == 'A')) {
3792 SKIP(5);
3793 if (!IS_BLANK(CUR)) {
3794 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3796 ctxt->sax->error(ctxt->userData,
3797 "Space required after 'NDATA'\n");
3798 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003799 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003800 }
3801 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003802 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003803 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3804 (ctxt->sax->unparsedEntityDecl != NULL))
3805 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3806 literal, URI, ndata);
3807 } else {
3808 if ((ctxt->sax != NULL) &&
3809 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3810 ctxt->sax->entityDecl(ctxt->userData, name,
3811 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3812 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003813 /*
3814 * For expat compatibility in SAX mode.
3815 * assuming the entity repalcement was asked for
3816 */
3817 if ((ctxt->replaceEntities != 0) &&
3818 ((ctxt->myDoc == NULL) ||
3819 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3820 if (ctxt->myDoc == NULL) {
3821 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3822 }
3823
3824 if (ctxt->myDoc->intSubset == NULL)
3825 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3826 BAD_CAST "fake", NULL, NULL);
3827 entityDecl(ctxt, name,
3828 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3829 literal, URI, NULL);
3830 }
Owen Taylor3473f882001-02-23 17:55:21 +00003831 }
3832 }
3833 }
3834 SKIP_BLANKS;
3835 if (RAW != '>') {
3836 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3838 ctxt->sax->error(ctxt->userData,
3839 "xmlParseEntityDecl: entity %s not terminated\n", name);
3840 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003841 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003842 } else {
3843 if (input != ctxt->input) {
3844 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3846 ctxt->sax->error(ctxt->userData,
3847"Entity declaration doesn't start and stop in the same entity\n");
3848 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003849 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003850 }
3851 NEXT;
3852 }
3853 if (orig != NULL) {
3854 /*
3855 * Ugly mechanism to save the raw entity value.
3856 */
3857 xmlEntityPtr cur = NULL;
3858
3859 if (isParameter) {
3860 if ((ctxt->sax != NULL) &&
3861 (ctxt->sax->getParameterEntity != NULL))
3862 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3863 } else {
3864 if ((ctxt->sax != NULL) &&
3865 (ctxt->sax->getEntity != NULL))
3866 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003867 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3868 cur = getEntity(ctxt, name);
3869 }
Owen Taylor3473f882001-02-23 17:55:21 +00003870 }
3871 if (cur != NULL) {
3872 if (cur->orig != NULL)
3873 xmlFree(orig);
3874 else
3875 cur->orig = orig;
3876 } else
3877 xmlFree(orig);
3878 }
3879 if (name != NULL) xmlFree(name);
3880 if (value != NULL) xmlFree(value);
3881 if (URI != NULL) xmlFree(URI);
3882 if (literal != NULL) xmlFree(literal);
3883 if (ndata != NULL) xmlFree(ndata);
3884 }
3885}
3886
3887/**
3888 * xmlParseDefaultDecl:
3889 * @ctxt: an XML parser context
3890 * @value: Receive a possible fixed default value for the attribute
3891 *
3892 * Parse an attribute default declaration
3893 *
3894 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3895 *
3896 * [ VC: Required Attribute ]
3897 * if the default declaration is the keyword #REQUIRED, then the
3898 * attribute must be specified for all elements of the type in the
3899 * attribute-list declaration.
3900 *
3901 * [ VC: Attribute Default Legal ]
3902 * The declared default value must meet the lexical constraints of
3903 * the declared attribute type c.f. xmlValidateAttributeDecl()
3904 *
3905 * [ VC: Fixed Attribute Default ]
3906 * if an attribute has a default value declared with the #FIXED
3907 * keyword, instances of that attribute must match the default value.
3908 *
3909 * [ WFC: No < in Attribute Values ]
3910 * handled in xmlParseAttValue()
3911 *
3912 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3913 * or XML_ATTRIBUTE_FIXED.
3914 */
3915
3916int
3917xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3918 int val;
3919 xmlChar *ret;
3920
3921 *value = NULL;
3922 if ((RAW == '#') && (NXT(1) == 'R') &&
3923 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3924 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3925 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3926 (NXT(8) == 'D')) {
3927 SKIP(9);
3928 return(XML_ATTRIBUTE_REQUIRED);
3929 }
3930 if ((RAW == '#') && (NXT(1) == 'I') &&
3931 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3932 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3933 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3934 SKIP(8);
3935 return(XML_ATTRIBUTE_IMPLIED);
3936 }
3937 val = XML_ATTRIBUTE_NONE;
3938 if ((RAW == '#') && (NXT(1) == 'F') &&
3939 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3940 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3941 SKIP(6);
3942 val = XML_ATTRIBUTE_FIXED;
3943 if (!IS_BLANK(CUR)) {
3944 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3946 ctxt->sax->error(ctxt->userData,
3947 "Space required after '#FIXED'\n");
3948 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003949 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003950 }
3951 SKIP_BLANKS;
3952 }
3953 ret = xmlParseAttValue(ctxt);
3954 ctxt->instate = XML_PARSER_DTD;
3955 if (ret == NULL) {
3956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3957 ctxt->sax->error(ctxt->userData,
3958 "Attribute default value declaration error\n");
3959 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003960 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003961 } else
3962 *value = ret;
3963 return(val);
3964}
3965
3966/**
3967 * xmlParseNotationType:
3968 * @ctxt: an XML parser context
3969 *
3970 * parse an Notation attribute type.
3971 *
3972 * Note: the leading 'NOTATION' S part has already being parsed...
3973 *
3974 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3975 *
3976 * [ VC: Notation Attributes ]
3977 * Values of this type must match one of the notation names included
3978 * in the declaration; all notation names in the declaration must be declared.
3979 *
3980 * Returns: the notation attribute tree built while parsing
3981 */
3982
3983xmlEnumerationPtr
3984xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3985 xmlChar *name;
3986 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3987
3988 if (RAW != '(') {
3989 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3991 ctxt->sax->error(ctxt->userData,
3992 "'(' required to start 'NOTATION'\n");
3993 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003994 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003995 return(NULL);
3996 }
3997 SHRINK;
3998 do {
3999 NEXT;
4000 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004001 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004002 if (name == NULL) {
4003 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4005 ctxt->sax->error(ctxt->userData,
4006 "Name expected in NOTATION declaration\n");
4007 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004008 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004009 return(ret);
4010 }
4011 cur = xmlCreateEnumeration(name);
4012 xmlFree(name);
4013 if (cur == NULL) return(ret);
4014 if (last == NULL) ret = last = cur;
4015 else {
4016 last->next = cur;
4017 last = cur;
4018 }
4019 SKIP_BLANKS;
4020 } while (RAW == '|');
4021 if (RAW != ')') {
4022 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
4023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4024 ctxt->sax->error(ctxt->userData,
4025 "')' required to finish NOTATION declaration\n");
4026 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004027 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004028 if ((last != NULL) && (last != ret))
4029 xmlFreeEnumeration(last);
4030 return(ret);
4031 }
4032 NEXT;
4033 return(ret);
4034}
4035
4036/**
4037 * xmlParseEnumerationType:
4038 * @ctxt: an XML parser context
4039 *
4040 * parse an Enumeration attribute type.
4041 *
4042 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4043 *
4044 * [ VC: Enumeration ]
4045 * Values of this type must match one of the Nmtoken tokens in
4046 * the declaration
4047 *
4048 * Returns: the enumeration attribute tree built while parsing
4049 */
4050
4051xmlEnumerationPtr
4052xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4053 xmlChar *name;
4054 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4055
4056 if (RAW != '(') {
4057 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4059 ctxt->sax->error(ctxt->userData,
4060 "'(' required to start ATTLIST enumeration\n");
4061 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004062 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004063 return(NULL);
4064 }
4065 SHRINK;
4066 do {
4067 NEXT;
4068 SKIP_BLANKS;
4069 name = xmlParseNmtoken(ctxt);
4070 if (name == NULL) {
4071 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4073 ctxt->sax->error(ctxt->userData,
4074 "NmToken expected in ATTLIST enumeration\n");
4075 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004076 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004077 return(ret);
4078 }
4079 cur = xmlCreateEnumeration(name);
4080 xmlFree(name);
4081 if (cur == NULL) return(ret);
4082 if (last == NULL) ret = last = cur;
4083 else {
4084 last->next = cur;
4085 last = cur;
4086 }
4087 SKIP_BLANKS;
4088 } while (RAW == '|');
4089 if (RAW != ')') {
4090 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4091 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4092 ctxt->sax->error(ctxt->userData,
4093 "')' required to finish ATTLIST enumeration\n");
4094 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004095 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004096 return(ret);
4097 }
4098 NEXT;
4099 return(ret);
4100}
4101
4102/**
4103 * xmlParseEnumeratedType:
4104 * @ctxt: an XML parser context
4105 * @tree: the enumeration tree built while parsing
4106 *
4107 * parse an Enumerated attribute type.
4108 *
4109 * [57] EnumeratedType ::= NotationType | Enumeration
4110 *
4111 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4112 *
4113 *
4114 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4115 */
4116
4117int
4118xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4119 if ((RAW == 'N') && (NXT(1) == 'O') &&
4120 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4121 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4122 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4123 SKIP(8);
4124 if (!IS_BLANK(CUR)) {
4125 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4127 ctxt->sax->error(ctxt->userData,
4128 "Space required after 'NOTATION'\n");
4129 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004130 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004131 return(0);
4132 }
4133 SKIP_BLANKS;
4134 *tree = xmlParseNotationType(ctxt);
4135 if (*tree == NULL) return(0);
4136 return(XML_ATTRIBUTE_NOTATION);
4137 }
4138 *tree = xmlParseEnumerationType(ctxt);
4139 if (*tree == NULL) return(0);
4140 return(XML_ATTRIBUTE_ENUMERATION);
4141}
4142
4143/**
4144 * xmlParseAttributeType:
4145 * @ctxt: an XML parser context
4146 * @tree: the enumeration tree built while parsing
4147 *
4148 * parse the Attribute list def for an element
4149 *
4150 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4151 *
4152 * [55] StringType ::= 'CDATA'
4153 *
4154 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4155 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4156 *
4157 * Validity constraints for attribute values syntax are checked in
4158 * xmlValidateAttributeValue()
4159 *
4160 * [ VC: ID ]
4161 * Values of type ID must match the Name production. A name must not
4162 * appear more than once in an XML document as a value of this type;
4163 * i.e., ID values must uniquely identify the elements which bear them.
4164 *
4165 * [ VC: One ID per Element Type ]
4166 * No element type may have more than one ID attribute specified.
4167 *
4168 * [ VC: ID Attribute Default ]
4169 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4170 *
4171 * [ VC: IDREF ]
4172 * Values of type IDREF must match the Name production, and values
4173 * of type IDREFS must match Names; each IDREF Name must match the value
4174 * of an ID attribute on some element in the XML document; i.e. IDREF
4175 * values must match the value of some ID attribute.
4176 *
4177 * [ VC: Entity Name ]
4178 * Values of type ENTITY must match the Name production, values
4179 * of type ENTITIES must match Names; each Entity Name must match the
4180 * name of an unparsed entity declared in the DTD.
4181 *
4182 * [ VC: Name Token ]
4183 * Values of type NMTOKEN must match the Nmtoken production; values
4184 * of type NMTOKENS must match Nmtokens.
4185 *
4186 * Returns the attribute type
4187 */
4188int
4189xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4190 SHRINK;
4191 if ((RAW == 'C') && (NXT(1) == 'D') &&
4192 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4193 (NXT(4) == 'A')) {
4194 SKIP(5);
4195 return(XML_ATTRIBUTE_CDATA);
4196 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4197 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4198 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4199 SKIP(6);
4200 return(XML_ATTRIBUTE_IDREFS);
4201 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4202 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4203 (NXT(4) == 'F')) {
4204 SKIP(5);
4205 return(XML_ATTRIBUTE_IDREF);
4206 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4207 SKIP(2);
4208 return(XML_ATTRIBUTE_ID);
4209 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4210 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4211 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4212 SKIP(6);
4213 return(XML_ATTRIBUTE_ENTITY);
4214 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4215 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4216 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4217 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4218 SKIP(8);
4219 return(XML_ATTRIBUTE_ENTITIES);
4220 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4221 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4222 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4223 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4224 SKIP(8);
4225 return(XML_ATTRIBUTE_NMTOKENS);
4226 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4227 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4228 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4229 (NXT(6) == 'N')) {
4230 SKIP(7);
4231 return(XML_ATTRIBUTE_NMTOKEN);
4232 }
4233 return(xmlParseEnumeratedType(ctxt, tree));
4234}
4235
4236/**
4237 * xmlParseAttributeListDecl:
4238 * @ctxt: an XML parser context
4239 *
4240 * : parse the Attribute list def for an element
4241 *
4242 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4243 *
4244 * [53] AttDef ::= S Name S AttType S DefaultDecl
4245 *
4246 */
4247void
4248xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4249 xmlChar *elemName;
4250 xmlChar *attrName;
4251 xmlEnumerationPtr tree;
4252
4253 if ((RAW == '<') && (NXT(1) == '!') &&
4254 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4255 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4256 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4257 (NXT(8) == 'T')) {
4258 xmlParserInputPtr input = ctxt->input;
4259
4260 SKIP(9);
4261 if (!IS_BLANK(CUR)) {
4262 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4264 ctxt->sax->error(ctxt->userData,
4265 "Space required after '<!ATTLIST'\n");
4266 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004267 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004268 }
4269 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004270 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004271 if (elemName == NULL) {
4272 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4274 ctxt->sax->error(ctxt->userData,
4275 "ATTLIST: no name for Element\n");
4276 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004277 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004278 return;
4279 }
4280 SKIP_BLANKS;
4281 GROW;
4282 while (RAW != '>') {
4283 const xmlChar *check = CUR_PTR;
4284 int type;
4285 int def;
4286 xmlChar *defaultValue = NULL;
4287
4288 GROW;
4289 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004290 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004291 if (attrName == NULL) {
4292 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4294 ctxt->sax->error(ctxt->userData,
4295 "ATTLIST: no name for Attribute\n");
4296 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004297 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004298 break;
4299 }
4300 GROW;
4301 if (!IS_BLANK(CUR)) {
4302 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4304 ctxt->sax->error(ctxt->userData,
4305 "Space required after the attribute name\n");
4306 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004307 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004308 if (attrName != NULL)
4309 xmlFree(attrName);
4310 if (defaultValue != NULL)
4311 xmlFree(defaultValue);
4312 break;
4313 }
4314 SKIP_BLANKS;
4315
4316 type = xmlParseAttributeType(ctxt, &tree);
4317 if (type <= 0) {
4318 if (attrName != NULL)
4319 xmlFree(attrName);
4320 if (defaultValue != NULL)
4321 xmlFree(defaultValue);
4322 break;
4323 }
4324
4325 GROW;
4326 if (!IS_BLANK(CUR)) {
4327 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4329 ctxt->sax->error(ctxt->userData,
4330 "Space required after the attribute type\n");
4331 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004332 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004333 if (attrName != NULL)
4334 xmlFree(attrName);
4335 if (defaultValue != NULL)
4336 xmlFree(defaultValue);
4337 if (tree != NULL)
4338 xmlFreeEnumeration(tree);
4339 break;
4340 }
4341 SKIP_BLANKS;
4342
4343 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4344 if (def <= 0) {
4345 if (attrName != NULL)
4346 xmlFree(attrName);
4347 if (defaultValue != NULL)
4348 xmlFree(defaultValue);
4349 if (tree != NULL)
4350 xmlFreeEnumeration(tree);
4351 break;
4352 }
4353
4354 GROW;
4355 if (RAW != '>') {
4356 if (!IS_BLANK(CUR)) {
4357 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4359 ctxt->sax->error(ctxt->userData,
4360 "Space required after the attribute default value\n");
4361 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004362 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004363 if (attrName != NULL)
4364 xmlFree(attrName);
4365 if (defaultValue != NULL)
4366 xmlFree(defaultValue);
4367 if (tree != NULL)
4368 xmlFreeEnumeration(tree);
4369 break;
4370 }
4371 SKIP_BLANKS;
4372 }
4373 if (check == CUR_PTR) {
4374 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4376 ctxt->sax->error(ctxt->userData,
4377 "xmlParseAttributeListDecl: detected internal error\n");
4378 if (attrName != NULL)
4379 xmlFree(attrName);
4380 if (defaultValue != NULL)
4381 xmlFree(defaultValue);
4382 if (tree != NULL)
4383 xmlFreeEnumeration(tree);
4384 break;
4385 }
4386 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4387 (ctxt->sax->attributeDecl != NULL))
4388 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4389 type, def, defaultValue, tree);
4390 if (attrName != NULL)
4391 xmlFree(attrName);
4392 if (defaultValue != NULL)
4393 xmlFree(defaultValue);
4394 GROW;
4395 }
4396 if (RAW == '>') {
4397 if (input != ctxt->input) {
4398 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4399 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4400 ctxt->sax->error(ctxt->userData,
4401"Attribute list declaration doesn't start and stop in the same entity\n");
4402 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004403 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004404 }
4405 NEXT;
4406 }
4407
4408 xmlFree(elemName);
4409 }
4410}
4411
4412/**
4413 * xmlParseElementMixedContentDecl:
4414 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004415 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004416 *
4417 * parse the declaration for a Mixed Element content
4418 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4419 *
4420 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4421 * '(' S? '#PCDATA' S? ')'
4422 *
4423 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4424 *
4425 * [ VC: No Duplicate Types ]
4426 * The same name must not appear more than once in a single
4427 * mixed-content declaration.
4428 *
4429 * returns: the list of the xmlElementContentPtr describing the element choices
4430 */
4431xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004432xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004433 xmlElementContentPtr ret = NULL, cur = NULL, n;
4434 xmlChar *elem = NULL;
4435
4436 GROW;
4437 if ((RAW == '#') && (NXT(1) == 'P') &&
4438 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4439 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4440 (NXT(6) == 'A')) {
4441 SKIP(7);
4442 SKIP_BLANKS;
4443 SHRINK;
4444 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004445 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4446 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4447 if (ctxt->vctxt.error != NULL)
4448 ctxt->vctxt.error(ctxt->vctxt.userData,
4449"Element content declaration doesn't start and stop in the same entity\n");
4450 ctxt->valid = 0;
4451 }
Owen Taylor3473f882001-02-23 17:55:21 +00004452 NEXT;
4453 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4454 if (RAW == '*') {
4455 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4456 NEXT;
4457 }
4458 return(ret);
4459 }
4460 if ((RAW == '(') || (RAW == '|')) {
4461 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4462 if (ret == NULL) return(NULL);
4463 }
4464 while (RAW == '|') {
4465 NEXT;
4466 if (elem == NULL) {
4467 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4468 if (ret == NULL) return(NULL);
4469 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004470 if (cur != NULL)
4471 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004472 cur = ret;
4473 } else {
4474 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4475 if (n == NULL) return(NULL);
4476 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004477 if (n->c1 != NULL)
4478 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004479 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004480 if (n != NULL)
4481 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004482 cur = n;
4483 xmlFree(elem);
4484 }
4485 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004486 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004487 if (elem == NULL) {
4488 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4490 ctxt->sax->error(ctxt->userData,
4491 "xmlParseElementMixedContentDecl : Name expected\n");
4492 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004493 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004494 xmlFreeElementContent(cur);
4495 return(NULL);
4496 }
4497 SKIP_BLANKS;
4498 GROW;
4499 }
4500 if ((RAW == ')') && (NXT(1) == '*')) {
4501 if (elem != NULL) {
4502 cur->c2 = xmlNewElementContent(elem,
4503 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004504 if (cur->c2 != NULL)
4505 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004506 xmlFree(elem);
4507 }
4508 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004509 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4510 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4511 if (ctxt->vctxt.error != NULL)
4512 ctxt->vctxt.error(ctxt->vctxt.userData,
4513"Element content declaration doesn't start and stop in the same entity\n");
4514 ctxt->valid = 0;
4515 }
Owen Taylor3473f882001-02-23 17:55:21 +00004516 SKIP(2);
4517 } else {
4518 if (elem != NULL) xmlFree(elem);
4519 xmlFreeElementContent(ret);
4520 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4522 ctxt->sax->error(ctxt->userData,
4523 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4524 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004525 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004526 return(NULL);
4527 }
4528
4529 } else {
4530 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4532 ctxt->sax->error(ctxt->userData,
4533 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4534 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004535 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004536 }
4537 return(ret);
4538}
4539
4540/**
4541 * xmlParseElementChildrenContentDecl:
4542 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004543 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004544 *
4545 * parse the declaration for a Mixed Element content
4546 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4547 *
4548 *
4549 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4550 *
4551 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4552 *
4553 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4554 *
4555 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4556 *
4557 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4558 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004559 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004560 * opening or closing parentheses in a choice, seq, or Mixed
4561 * construct is contained in the replacement text for a parameter
4562 * entity, both must be contained in the same replacement text. For
4563 * interoperability, if a parameter-entity reference appears in a
4564 * choice, seq, or Mixed construct, its replacement text should not
4565 * be empty, and neither the first nor last non-blank character of
4566 * the replacement text should be a connector (| or ,).
4567 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004568 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004569 * hierarchy.
4570 */
4571xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004572xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004573(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004574 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4575 xmlChar *elem;
4576 xmlChar type = 0;
4577
4578 SKIP_BLANKS;
4579 GROW;
4580 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004581 xmlParserInputPtr input = ctxt->input;
4582
Owen Taylor3473f882001-02-23 17:55:21 +00004583 /* Recurse on first child */
4584 NEXT;
4585 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004586 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004587 SKIP_BLANKS;
4588 GROW;
4589 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004590 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004591 if (elem == NULL) {
4592 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4594 ctxt->sax->error(ctxt->userData,
4595 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4596 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004597 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004598 return(NULL);
4599 }
4600 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004601 if (cur == NULL) {
4602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4603 ctxt->sax->error(ctxt->userData,
4604 "xmlParseElementChildrenContentDecl : out of memory\n");
4605 ctxt->errNo = XML_ERR_NO_MEMORY;
4606 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4607 xmlFree(elem);
4608 return(NULL);
4609 }
Owen Taylor3473f882001-02-23 17:55:21 +00004610 GROW;
4611 if (RAW == '?') {
4612 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4613 NEXT;
4614 } else if (RAW == '*') {
4615 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4616 NEXT;
4617 } else if (RAW == '+') {
4618 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4619 NEXT;
4620 } else {
4621 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4622 }
4623 xmlFree(elem);
4624 GROW;
4625 }
4626 SKIP_BLANKS;
4627 SHRINK;
4628 while (RAW != ')') {
4629 /*
4630 * Each loop we parse one separator and one element.
4631 */
4632 if (RAW == ',') {
4633 if (type == 0) type = CUR;
4634
4635 /*
4636 * Detect "Name | Name , Name" error
4637 */
4638 else if (type != CUR) {
4639 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4641 ctxt->sax->error(ctxt->userData,
4642 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4643 type);
4644 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004645 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004646 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004647 xmlFreeElementContent(last);
4648 if (ret != NULL)
4649 xmlFreeElementContent(ret);
4650 return(NULL);
4651 }
4652 NEXT;
4653
4654 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4655 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004656 if ((last != NULL) && (last != ret))
4657 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004658 xmlFreeElementContent(ret);
4659 return(NULL);
4660 }
4661 if (last == NULL) {
4662 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004663 if (ret != NULL)
4664 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004665 ret = cur = op;
4666 } else {
4667 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004668 if (op != NULL)
4669 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004670 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004671 if (last != NULL)
4672 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004673 cur =op;
4674 last = NULL;
4675 }
4676 } else if (RAW == '|') {
4677 if (type == 0) type = CUR;
4678
4679 /*
4680 * Detect "Name , Name | Name" error
4681 */
4682 else if (type != CUR) {
4683 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4684 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4685 ctxt->sax->error(ctxt->userData,
4686 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4687 type);
4688 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004689 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004690 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004691 xmlFreeElementContent(last);
4692 if (ret != NULL)
4693 xmlFreeElementContent(ret);
4694 return(NULL);
4695 }
4696 NEXT;
4697
4698 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4699 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004700 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004701 xmlFreeElementContent(last);
4702 if (ret != NULL)
4703 xmlFreeElementContent(ret);
4704 return(NULL);
4705 }
4706 if (last == NULL) {
4707 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004708 if (ret != NULL)
4709 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004710 ret = cur = op;
4711 } else {
4712 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004713 if (op != NULL)
4714 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004715 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004716 if (last != NULL)
4717 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004718 cur =op;
4719 last = NULL;
4720 }
4721 } else {
4722 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4724 ctxt->sax->error(ctxt->userData,
4725 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4726 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004727 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004728 if (ret != NULL)
4729 xmlFreeElementContent(ret);
4730 return(NULL);
4731 }
4732 GROW;
4733 SKIP_BLANKS;
4734 GROW;
4735 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004736 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004737 /* Recurse on second child */
4738 NEXT;
4739 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004740 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004741 SKIP_BLANKS;
4742 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004743 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004744 if (elem == NULL) {
4745 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4747 ctxt->sax->error(ctxt->userData,
4748 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4749 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004750 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004751 if (ret != NULL)
4752 xmlFreeElementContent(ret);
4753 return(NULL);
4754 }
4755 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4756 xmlFree(elem);
4757 if (RAW == '?') {
4758 last->ocur = XML_ELEMENT_CONTENT_OPT;
4759 NEXT;
4760 } else if (RAW == '*') {
4761 last->ocur = XML_ELEMENT_CONTENT_MULT;
4762 NEXT;
4763 } else if (RAW == '+') {
4764 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4765 NEXT;
4766 } else {
4767 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4768 }
4769 }
4770 SKIP_BLANKS;
4771 GROW;
4772 }
4773 if ((cur != NULL) && (last != NULL)) {
4774 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004775 if (last != NULL)
4776 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004777 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004778 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4779 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4780 if (ctxt->vctxt.error != NULL)
4781 ctxt->vctxt.error(ctxt->vctxt.userData,
4782"Element content declaration doesn't start and stop in the same entity\n");
4783 ctxt->valid = 0;
4784 }
Owen Taylor3473f882001-02-23 17:55:21 +00004785 NEXT;
4786 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004787 if (ret != NULL)
4788 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004789 NEXT;
4790 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004791 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004792 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004793 cur = ret;
4794 /*
4795 * Some normalization:
4796 * (a | b* | c?)* == (a | b | c)*
4797 */
4798 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4799 if ((cur->c1 != NULL) &&
4800 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4801 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4802 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4803 if ((cur->c2 != NULL) &&
4804 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4805 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4806 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4807 cur = cur->c2;
4808 }
4809 }
Owen Taylor3473f882001-02-23 17:55:21 +00004810 NEXT;
4811 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004812 if (ret != NULL) {
4813 int found = 0;
4814
Daniel Veillarde470df72001-04-18 21:41:07 +00004815 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004816 /*
4817 * Some normalization:
4818 * (a | b*)+ == (a | b)*
4819 * (a | b?)+ == (a | b)*
4820 */
4821 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4822 if ((cur->c1 != NULL) &&
4823 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4824 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4825 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4826 found = 1;
4827 }
4828 if ((cur->c2 != NULL) &&
4829 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4830 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4831 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4832 found = 1;
4833 }
4834 cur = cur->c2;
4835 }
4836 if (found)
4837 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4838 }
Owen Taylor3473f882001-02-23 17:55:21 +00004839 NEXT;
4840 }
4841 return(ret);
4842}
4843
4844/**
4845 * xmlParseElementContentDecl:
4846 * @ctxt: an XML parser context
4847 * @name: the name of the element being defined.
4848 * @result: the Element Content pointer will be stored here if any
4849 *
4850 * parse the declaration for an Element content either Mixed or Children,
4851 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4852 *
4853 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4854 *
4855 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4856 */
4857
4858int
4859xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4860 xmlElementContentPtr *result) {
4861
4862 xmlElementContentPtr tree = NULL;
4863 xmlParserInputPtr input = ctxt->input;
4864 int res;
4865
4866 *result = NULL;
4867
4868 if (RAW != '(') {
4869 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4871 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004872 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004873 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004874 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004875 return(-1);
4876 }
4877 NEXT;
4878 GROW;
4879 SKIP_BLANKS;
4880 if ((RAW == '#') && (NXT(1) == 'P') &&
4881 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4882 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4883 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004884 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004885 res = XML_ELEMENT_TYPE_MIXED;
4886 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004887 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004888 res = XML_ELEMENT_TYPE_ELEMENT;
4889 }
Owen Taylor3473f882001-02-23 17:55:21 +00004890 SKIP_BLANKS;
4891 *result = tree;
4892 return(res);
4893}
4894
4895/**
4896 * xmlParseElementDecl:
4897 * @ctxt: an XML parser context
4898 *
4899 * parse an Element declaration.
4900 *
4901 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4902 *
4903 * [ VC: Unique Element Type Declaration ]
4904 * No element type may be declared more than once
4905 *
4906 * Returns the type of the element, or -1 in case of error
4907 */
4908int
4909xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4910 xmlChar *name;
4911 int ret = -1;
4912 xmlElementContentPtr content = NULL;
4913
4914 GROW;
4915 if ((RAW == '<') && (NXT(1) == '!') &&
4916 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4917 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4918 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4919 (NXT(8) == 'T')) {
4920 xmlParserInputPtr input = ctxt->input;
4921
4922 SKIP(9);
4923 if (!IS_BLANK(CUR)) {
4924 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4926 ctxt->sax->error(ctxt->userData,
4927 "Space required after 'ELEMENT'\n");
4928 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004929 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004930 }
4931 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004932 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004933 if (name == NULL) {
4934 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4936 ctxt->sax->error(ctxt->userData,
4937 "xmlParseElementDecl: no name for Element\n");
4938 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004939 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004940 return(-1);
4941 }
4942 while ((RAW == 0) && (ctxt->inputNr > 1))
4943 xmlPopInput(ctxt);
4944 if (!IS_BLANK(CUR)) {
4945 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4947 ctxt->sax->error(ctxt->userData,
4948 "Space required after the element name\n");
4949 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004950 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004951 }
4952 SKIP_BLANKS;
4953 if ((RAW == 'E') && (NXT(1) == 'M') &&
4954 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4955 (NXT(4) == 'Y')) {
4956 SKIP(5);
4957 /*
4958 * Element must always be empty.
4959 */
4960 ret = XML_ELEMENT_TYPE_EMPTY;
4961 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4962 (NXT(2) == 'Y')) {
4963 SKIP(3);
4964 /*
4965 * Element is a generic container.
4966 */
4967 ret = XML_ELEMENT_TYPE_ANY;
4968 } else if (RAW == '(') {
4969 ret = xmlParseElementContentDecl(ctxt, name, &content);
4970 } else {
4971 /*
4972 * [ WFC: PEs in Internal Subset ] error handling.
4973 */
4974 if ((RAW == '%') && (ctxt->external == 0) &&
4975 (ctxt->inputNr == 1)) {
4976 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4978 ctxt->sax->error(ctxt->userData,
4979 "PEReference: forbidden within markup decl in internal subset\n");
4980 } else {
4981 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4983 ctxt->sax->error(ctxt->userData,
4984 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4985 }
4986 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004987 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004988 if (name != NULL) xmlFree(name);
4989 return(-1);
4990 }
4991
4992 SKIP_BLANKS;
4993 /*
4994 * Pop-up of finished entities.
4995 */
4996 while ((RAW == 0) && (ctxt->inputNr > 1))
4997 xmlPopInput(ctxt);
4998 SKIP_BLANKS;
4999
5000 if (RAW != '>') {
5001 ctxt->errNo = XML_ERR_GT_REQUIRED;
5002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5003 ctxt->sax->error(ctxt->userData,
5004 "xmlParseElementDecl: expected '>' at the end\n");
5005 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005006 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005007 } else {
5008 if (input != ctxt->input) {
5009 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5011 ctxt->sax->error(ctxt->userData,
5012"Element declaration doesn't start and stop in the same entity\n");
5013 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005014 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005015 }
5016
5017 NEXT;
5018 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5019 (ctxt->sax->elementDecl != NULL))
5020 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5021 content);
5022 }
5023 if (content != NULL) {
5024 xmlFreeElementContent(content);
5025 }
5026 if (name != NULL) {
5027 xmlFree(name);
5028 }
5029 }
5030 return(ret);
5031}
5032
5033/**
Owen Taylor3473f882001-02-23 17:55:21 +00005034 * xmlParseConditionalSections
5035 * @ctxt: an XML parser context
5036 *
5037 * [61] conditionalSect ::= includeSect | ignoreSect
5038 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5039 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5040 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5041 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5042 */
5043
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005044static void
Owen Taylor3473f882001-02-23 17:55:21 +00005045xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5046 SKIP(3);
5047 SKIP_BLANKS;
5048 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5049 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5050 (NXT(6) == 'E')) {
5051 SKIP(7);
5052 SKIP_BLANKS;
5053 if (RAW != '[') {
5054 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5056 ctxt->sax->error(ctxt->userData,
5057 "XML conditional section '[' expected\n");
5058 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005059 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005060 } else {
5061 NEXT;
5062 }
5063 if (xmlParserDebugEntities) {
5064 if ((ctxt->input != NULL) && (ctxt->input->filename))
5065 xmlGenericError(xmlGenericErrorContext,
5066 "%s(%d): ", ctxt->input->filename,
5067 ctxt->input->line);
5068 xmlGenericError(xmlGenericErrorContext,
5069 "Entering INCLUDE Conditional Section\n");
5070 }
5071
5072 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5073 (NXT(2) != '>'))) {
5074 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005075 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005076
5077 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5078 xmlParseConditionalSections(ctxt);
5079 } else if (IS_BLANK(CUR)) {
5080 NEXT;
5081 } else if (RAW == '%') {
5082 xmlParsePEReference(ctxt);
5083 } else
5084 xmlParseMarkupDecl(ctxt);
5085
5086 /*
5087 * Pop-up of finished entities.
5088 */
5089 while ((RAW == 0) && (ctxt->inputNr > 1))
5090 xmlPopInput(ctxt);
5091
Daniel Veillardfdc91562002-07-01 21:52:03 +00005092 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005093 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5095 ctxt->sax->error(ctxt->userData,
5096 "Content error in the external subset\n");
5097 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005098 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005099 break;
5100 }
5101 }
5102 if (xmlParserDebugEntities) {
5103 if ((ctxt->input != NULL) && (ctxt->input->filename))
5104 xmlGenericError(xmlGenericErrorContext,
5105 "%s(%d): ", ctxt->input->filename,
5106 ctxt->input->line);
5107 xmlGenericError(xmlGenericErrorContext,
5108 "Leaving INCLUDE Conditional Section\n");
5109 }
5110
5111 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5112 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5113 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005114 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005115 int depth = 0;
5116
5117 SKIP(6);
5118 SKIP_BLANKS;
5119 if (RAW != '[') {
5120 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5122 ctxt->sax->error(ctxt->userData,
5123 "XML conditional section '[' expected\n");
5124 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005125 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005126 } else {
5127 NEXT;
5128 }
5129 if (xmlParserDebugEntities) {
5130 if ((ctxt->input != NULL) && (ctxt->input->filename))
5131 xmlGenericError(xmlGenericErrorContext,
5132 "%s(%d): ", ctxt->input->filename,
5133 ctxt->input->line);
5134 xmlGenericError(xmlGenericErrorContext,
5135 "Entering IGNORE Conditional Section\n");
5136 }
5137
5138 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005139 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005140 * But disable SAX event generating DTD building in the meantime
5141 */
5142 state = ctxt->disableSAX;
5143 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005144 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005145 ctxt->instate = XML_PARSER_IGNORE;
5146
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005147 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005148 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5149 depth++;
5150 SKIP(3);
5151 continue;
5152 }
5153 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5154 if (--depth >= 0) SKIP(3);
5155 continue;
5156 }
5157 NEXT;
5158 continue;
5159 }
5160
5161 ctxt->disableSAX = state;
5162 ctxt->instate = instate;
5163
5164 if (xmlParserDebugEntities) {
5165 if ((ctxt->input != NULL) && (ctxt->input->filename))
5166 xmlGenericError(xmlGenericErrorContext,
5167 "%s(%d): ", ctxt->input->filename,
5168 ctxt->input->line);
5169 xmlGenericError(xmlGenericErrorContext,
5170 "Leaving IGNORE Conditional Section\n");
5171 }
5172
5173 } else {
5174 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5176 ctxt->sax->error(ctxt->userData,
5177 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5178 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005179 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005180 }
5181
5182 if (RAW == 0)
5183 SHRINK;
5184
5185 if (RAW == 0) {
5186 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5188 ctxt->sax->error(ctxt->userData,
5189 "XML conditional section not closed\n");
5190 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005191 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005192 } else {
5193 SKIP(3);
5194 }
5195}
5196
5197/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005198 * xmlParseMarkupDecl:
5199 * @ctxt: an XML parser context
5200 *
5201 * parse Markup declarations
5202 *
5203 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5204 * NotationDecl | PI | Comment
5205 *
5206 * [ VC: Proper Declaration/PE Nesting ]
5207 * Parameter-entity replacement text must be properly nested with
5208 * markup declarations. That is to say, if either the first character
5209 * or the last character of a markup declaration (markupdecl above) is
5210 * contained in the replacement text for a parameter-entity reference,
5211 * both must be contained in the same replacement text.
5212 *
5213 * [ WFC: PEs in Internal Subset ]
5214 * In the internal DTD subset, parameter-entity references can occur
5215 * only where markup declarations can occur, not within markup declarations.
5216 * (This does not apply to references that occur in external parameter
5217 * entities or to the external subset.)
5218 */
5219void
5220xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5221 GROW;
5222 xmlParseElementDecl(ctxt);
5223 xmlParseAttributeListDecl(ctxt);
5224 xmlParseEntityDecl(ctxt);
5225 xmlParseNotationDecl(ctxt);
5226 xmlParsePI(ctxt);
5227 xmlParseComment(ctxt);
5228 /*
5229 * This is only for internal subset. On external entities,
5230 * the replacement is done before parsing stage
5231 */
5232 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5233 xmlParsePEReference(ctxt);
5234
5235 /*
5236 * Conditional sections are allowed from entities included
5237 * by PE References in the internal subset.
5238 */
5239 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5240 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5241 xmlParseConditionalSections(ctxt);
5242 }
5243 }
5244
5245 ctxt->instate = XML_PARSER_DTD;
5246}
5247
5248/**
5249 * xmlParseTextDecl:
5250 * @ctxt: an XML parser context
5251 *
5252 * parse an XML declaration header for external entities
5253 *
5254 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5255 *
5256 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5257 */
5258
5259void
5260xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5261 xmlChar *version;
5262
5263 /*
5264 * We know that '<?xml' is here.
5265 */
5266 if ((RAW == '<') && (NXT(1) == '?') &&
5267 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5268 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5269 SKIP(5);
5270 } else {
5271 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5273 ctxt->sax->error(ctxt->userData,
5274 "Text declaration '<?xml' required\n");
5275 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005276 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005277
5278 return;
5279 }
5280
5281 if (!IS_BLANK(CUR)) {
5282 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5284 ctxt->sax->error(ctxt->userData,
5285 "Space needed after '<?xml'\n");
5286 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005287 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005288 }
5289 SKIP_BLANKS;
5290
5291 /*
5292 * We may have the VersionInfo here.
5293 */
5294 version = xmlParseVersionInfo(ctxt);
5295 if (version == NULL)
5296 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005297 else {
5298 if (!IS_BLANK(CUR)) {
5299 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5301 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5302 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005303 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005304 }
5305 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005306 ctxt->input->version = version;
5307
5308 /*
5309 * We must have the encoding declaration
5310 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005311 xmlParseEncodingDecl(ctxt);
5312 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5313 /*
5314 * The XML REC instructs us to stop parsing right here
5315 */
5316 return;
5317 }
5318
5319 SKIP_BLANKS;
5320 if ((RAW == '?') && (NXT(1) == '>')) {
5321 SKIP(2);
5322 } else if (RAW == '>') {
5323 /* Deprecated old WD ... */
5324 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5326 ctxt->sax->error(ctxt->userData,
5327 "XML declaration must end-up with '?>'\n");
5328 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005329 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005330 NEXT;
5331 } else {
5332 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5334 ctxt->sax->error(ctxt->userData,
5335 "parsing XML declaration: '?>' expected\n");
5336 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005337 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005338 MOVETO_ENDTAG(CUR_PTR);
5339 NEXT;
5340 }
5341}
5342
5343/**
Owen Taylor3473f882001-02-23 17:55:21 +00005344 * xmlParseExternalSubset:
5345 * @ctxt: an XML parser context
5346 * @ExternalID: the external identifier
5347 * @SystemID: the system identifier (or URL)
5348 *
5349 * parse Markup declarations from an external subset
5350 *
5351 * [30] extSubset ::= textDecl? extSubsetDecl
5352 *
5353 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5354 */
5355void
5356xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5357 const xmlChar *SystemID) {
5358 GROW;
5359 if ((RAW == '<') && (NXT(1) == '?') &&
5360 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5361 (NXT(4) == 'l')) {
5362 xmlParseTextDecl(ctxt);
5363 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5364 /*
5365 * The XML REC instructs us to stop parsing right here
5366 */
5367 ctxt->instate = XML_PARSER_EOF;
5368 return;
5369 }
5370 }
5371 if (ctxt->myDoc == NULL) {
5372 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5373 }
5374 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5375 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5376
5377 ctxt->instate = XML_PARSER_DTD;
5378 ctxt->external = 1;
5379 while (((RAW == '<') && (NXT(1) == '?')) ||
5380 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005381 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005382 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005383 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005384
5385 GROW;
5386 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5387 xmlParseConditionalSections(ctxt);
5388 } else if (IS_BLANK(CUR)) {
5389 NEXT;
5390 } else if (RAW == '%') {
5391 xmlParsePEReference(ctxt);
5392 } else
5393 xmlParseMarkupDecl(ctxt);
5394
5395 /*
5396 * Pop-up of finished entities.
5397 */
5398 while ((RAW == 0) && (ctxt->inputNr > 1))
5399 xmlPopInput(ctxt);
5400
Daniel Veillardfdc91562002-07-01 21:52:03 +00005401 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005402 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5404 ctxt->sax->error(ctxt->userData,
5405 "Content error in the external subset\n");
5406 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005407 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005408 break;
5409 }
5410 }
5411
5412 if (RAW != 0) {
5413 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5415 ctxt->sax->error(ctxt->userData,
5416 "Extra content at the end of the document\n");
5417 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005418 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005419 }
5420
5421}
5422
5423/**
5424 * xmlParseReference:
5425 * @ctxt: an XML parser context
5426 *
5427 * parse and handle entity references in content, depending on the SAX
5428 * interface, this may end-up in a call to character() if this is a
5429 * CharRef, a predefined entity, if there is no reference() callback.
5430 * or if the parser was asked to switch to that mode.
5431 *
5432 * [67] Reference ::= EntityRef | CharRef
5433 */
5434void
5435xmlParseReference(xmlParserCtxtPtr ctxt) {
5436 xmlEntityPtr ent;
5437 xmlChar *val;
5438 if (RAW != '&') return;
5439
5440 if (NXT(1) == '#') {
5441 int i = 0;
5442 xmlChar out[10];
5443 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005444 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005445
5446 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5447 /*
5448 * So we are using non-UTF-8 buffers
5449 * Check that the char fit on 8bits, if not
5450 * generate a CharRef.
5451 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005452 if (value <= 0xFF) {
5453 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005454 out[1] = 0;
5455 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5456 (!ctxt->disableSAX))
5457 ctxt->sax->characters(ctxt->userData, out, 1);
5458 } else {
5459 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005460 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005461 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005462 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005463 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5464 (!ctxt->disableSAX))
5465 ctxt->sax->reference(ctxt->userData, out);
5466 }
5467 } else {
5468 /*
5469 * Just encode the value in UTF-8
5470 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005471 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005472 out[i] = 0;
5473 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5474 (!ctxt->disableSAX))
5475 ctxt->sax->characters(ctxt->userData, out, i);
5476 }
5477 } else {
5478 ent = xmlParseEntityRef(ctxt);
5479 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005480 if (!ctxt->wellFormed)
5481 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005482 if ((ent->name != NULL) &&
5483 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5484 xmlNodePtr list = NULL;
5485 int ret;
5486
5487
5488 /*
5489 * The first reference to the entity trigger a parsing phase
5490 * where the ent->children is filled with the result from
5491 * the parsing.
5492 */
5493 if (ent->children == NULL) {
5494 xmlChar *value;
5495 value = ent->content;
5496
5497 /*
5498 * Check that this entity is well formed
5499 */
5500 if ((value != NULL) &&
5501 (value[1] == 0) && (value[0] == '<') &&
5502 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5503 /*
5504 * DONE: get definite answer on this !!!
5505 * Lots of entity decls are used to declare a single
5506 * char
5507 * <!ENTITY lt "<">
5508 * Which seems to be valid since
5509 * 2.4: The ampersand character (&) and the left angle
5510 * bracket (<) may appear in their literal form only
5511 * when used ... They are also legal within the literal
5512 * entity value of an internal entity declaration;i
5513 * see "4.3.2 Well-Formed Parsed Entities".
5514 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5515 * Looking at the OASIS test suite and James Clark
5516 * tests, this is broken. However the XML REC uses
5517 * it. Is the XML REC not well-formed ????
5518 * This is a hack to avoid this problem
5519 *
5520 * ANSWER: since lt gt amp .. are already defined,
5521 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005522 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005523 * is lousy but acceptable.
5524 */
5525 list = xmlNewDocText(ctxt->myDoc, value);
5526 if (list != NULL) {
5527 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5528 (ent->children == NULL)) {
5529 ent->children = list;
5530 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005531 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005532 list->parent = (xmlNodePtr) ent;
5533 } else {
5534 xmlFreeNodeList(list);
5535 }
5536 } else if (list != NULL) {
5537 xmlFreeNodeList(list);
5538 }
5539 } else {
5540 /*
5541 * 4.3.2: An internal general parsed entity is well-formed
5542 * if its replacement text matches the production labeled
5543 * content.
5544 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005545
5546 void *user_data;
5547 /*
5548 * This is a bit hackish but this seems the best
5549 * way to make sure both SAX and DOM entity support
5550 * behaves okay.
5551 */
5552 if (ctxt->userData == ctxt)
5553 user_data = NULL;
5554 else
5555 user_data = ctxt->userData;
5556
Owen Taylor3473f882001-02-23 17:55:21 +00005557 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5558 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005559 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5560 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005561 ctxt->depth--;
5562 } else if (ent->etype ==
5563 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5564 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005565 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005566 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005567 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005568 ctxt->depth--;
5569 } else {
5570 ret = -1;
5571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5572 ctxt->sax->error(ctxt->userData,
5573 "Internal: invalid entity type\n");
5574 }
5575 if (ret == XML_ERR_ENTITY_LOOP) {
5576 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5578 ctxt->sax->error(ctxt->userData,
5579 "Detected entity reference loop\n");
5580 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005581 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005582 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005583 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005584 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5585 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005586 (ent->children == NULL)) {
5587 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005588 if (ctxt->replaceEntities) {
5589 /*
5590 * Prune it directly in the generated document
5591 * except for single text nodes.
5592 */
5593 if ((list->type == XML_TEXT_NODE) &&
5594 (list->next == NULL)) {
5595 list->parent = (xmlNodePtr) ent;
5596 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005597 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005598 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005599 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005600 while (list != NULL) {
5601 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005602 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005603 if (list->next == NULL)
5604 ent->last = list;
5605 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005606 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005607 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005608 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5609 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005610 }
5611 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005612 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005613 while (list != NULL) {
5614 list->parent = (xmlNodePtr) ent;
5615 if (list->next == NULL)
5616 ent->last = list;
5617 list = list->next;
5618 }
Owen Taylor3473f882001-02-23 17:55:21 +00005619 }
5620 } else {
5621 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005622 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005623 }
5624 } else if (ret > 0) {
5625 ctxt->errNo = ret;
5626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5627 ctxt->sax->error(ctxt->userData,
5628 "Entity value required\n");
5629 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005630 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005631 } else if (list != NULL) {
5632 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005633 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005634 }
5635 }
5636 }
5637 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5638 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5639 /*
5640 * Create a node.
5641 */
5642 ctxt->sax->reference(ctxt->userData, ent->name);
5643 return;
5644 } else if (ctxt->replaceEntities) {
5645 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5646 /*
5647 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005648 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005649 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005650 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005651 if ((list == NULL) && (ent->owner == 0)) {
5652 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005653 cur = ent->children;
5654 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005655 nw = xmlCopyNode(cur, 1);
5656 if (nw != NULL) {
5657 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005658 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005659 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005660 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005661 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005662 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005663 if (cur == ent->last)
5664 break;
5665 cur = cur->next;
5666 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005667 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005668 xmlAddEntityReference(ent, firstChild, nw);
5669 } else if (list == NULL) {
5670 xmlNodePtr nw = NULL, cur, next, last,
5671 firstChild = NULL;
5672 /*
5673 * Copy the entity child list and make it the new
5674 * entity child list. The goal is to make sure any
5675 * ID or REF referenced will be the one from the
5676 * document content and not the entity copy.
5677 */
5678 cur = ent->children;
5679 ent->children = NULL;
5680 last = ent->last;
5681 ent->last = NULL;
5682 while (cur != NULL) {
5683 next = cur->next;
5684 cur->next = NULL;
5685 cur->parent = NULL;
5686 nw = xmlCopyNode(cur, 1);
5687 if (nw != NULL) {
5688 nw->_private = cur->_private;
5689 if (firstChild == NULL){
5690 firstChild = cur;
5691 }
5692 xmlAddChild((xmlNodePtr) ent, nw);
5693 xmlAddChild(ctxt->node, cur);
5694 }
5695 if (cur == last)
5696 break;
5697 cur = next;
5698 }
5699 ent->owner = 1;
5700 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5701 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005702 } else {
5703 /*
5704 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005705 * node with a possible previous text one which
5706 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005707 */
5708 if (ent->children->type == XML_TEXT_NODE)
5709 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5710 if ((ent->last != ent->children) &&
5711 (ent->last->type == XML_TEXT_NODE))
5712 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5713 xmlAddChildList(ctxt->node, ent->children);
5714 }
5715
Owen Taylor3473f882001-02-23 17:55:21 +00005716 /*
5717 * This is to avoid a nasty side effect, see
5718 * characters() in SAX.c
5719 */
5720 ctxt->nodemem = 0;
5721 ctxt->nodelen = 0;
5722 return;
5723 } else {
5724 /*
5725 * Probably running in SAX mode
5726 */
5727 xmlParserInputPtr input;
5728
5729 input = xmlNewEntityInputStream(ctxt, ent);
5730 xmlPushInput(ctxt, input);
5731 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5732 (RAW == '<') && (NXT(1) == '?') &&
5733 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5734 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5735 xmlParseTextDecl(ctxt);
5736 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5737 /*
5738 * The XML REC instructs us to stop parsing right here
5739 */
5740 ctxt->instate = XML_PARSER_EOF;
5741 return;
5742 }
5743 if (input->standalone == 1) {
5744 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5746 ctxt->sax->error(ctxt->userData,
5747 "external parsed entities cannot be standalone\n");
5748 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005749 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005750 }
5751 }
5752 return;
5753 }
5754 }
5755 } else {
5756 val = ent->content;
5757 if (val == NULL) return;
5758 /*
5759 * inline the entity.
5760 */
5761 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5762 (!ctxt->disableSAX))
5763 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5764 }
5765 }
5766}
5767
5768/**
5769 * xmlParseEntityRef:
5770 * @ctxt: an XML parser context
5771 *
5772 * parse ENTITY references declarations
5773 *
5774 * [68] EntityRef ::= '&' Name ';'
5775 *
5776 * [ WFC: Entity Declared ]
5777 * In a document without any DTD, a document with only an internal DTD
5778 * subset which contains no parameter entity references, or a document
5779 * with "standalone='yes'", the Name given in the entity reference
5780 * must match that in an entity declaration, except that well-formed
5781 * documents need not declare any of the following entities: amp, lt,
5782 * gt, apos, quot. The declaration of a parameter entity must precede
5783 * any reference to it. Similarly, the declaration of a general entity
5784 * must precede any reference to it which appears in a default value in an
5785 * attribute-list declaration. Note that if entities are declared in the
5786 * external subset or in external parameter entities, a non-validating
5787 * processor is not obligated to read and process their declarations;
5788 * for such documents, the rule that an entity must be declared is a
5789 * well-formedness constraint only if standalone='yes'.
5790 *
5791 * [ WFC: Parsed Entity ]
5792 * An entity reference must not contain the name of an unparsed entity
5793 *
5794 * Returns the xmlEntityPtr if found, or NULL otherwise.
5795 */
5796xmlEntityPtr
5797xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5798 xmlChar *name;
5799 xmlEntityPtr ent = NULL;
5800
5801 GROW;
5802
5803 if (RAW == '&') {
5804 NEXT;
5805 name = xmlParseName(ctxt);
5806 if (name == NULL) {
5807 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5809 ctxt->sax->error(ctxt->userData,
5810 "xmlParseEntityRef: no name\n");
5811 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005812 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005813 } else {
5814 if (RAW == ';') {
5815 NEXT;
5816 /*
5817 * Ask first SAX for entity resolution, otherwise try the
5818 * predefined set.
5819 */
5820 if (ctxt->sax != NULL) {
5821 if (ctxt->sax->getEntity != NULL)
5822 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005823 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005824 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005825 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5826 (ctxt->userData==ctxt)) {
Daniel Veillard5997aca2002-03-18 18:36:20 +00005827 ent = getEntity(ctxt, name);
5828 }
Owen Taylor3473f882001-02-23 17:55:21 +00005829 }
5830 /*
5831 * [ WFC: Entity Declared ]
5832 * In a document without any DTD, a document with only an
5833 * internal DTD subset which contains no parameter entity
5834 * references, or a document with "standalone='yes'", the
5835 * Name given in the entity reference must match that in an
5836 * entity declaration, except that well-formed documents
5837 * need not declare any of the following entities: amp, lt,
5838 * gt, apos, quot.
5839 * The declaration of a parameter entity must precede any
5840 * reference to it.
5841 * Similarly, the declaration of a general entity must
5842 * precede any reference to it which appears in a default
5843 * value in an attribute-list declaration. Note that if
5844 * entities are declared in the external subset or in
5845 * external parameter entities, a non-validating processor
5846 * is not obligated to read and process their declarations;
5847 * for such documents, the rule that an entity must be
5848 * declared is a well-formedness constraint only if
5849 * standalone='yes'.
5850 */
5851 if (ent == NULL) {
5852 if ((ctxt->standalone == 1) ||
5853 ((ctxt->hasExternalSubset == 0) &&
5854 (ctxt->hasPErefs == 0))) {
5855 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5857 ctxt->sax->error(ctxt->userData,
5858 "Entity '%s' not defined\n", name);
5859 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005860 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005861 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005862 } else {
5863 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005865 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005866 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005867 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005868 }
5869 }
5870
5871 /*
5872 * [ WFC: Parsed Entity ]
5873 * An entity reference must not contain the name of an
5874 * unparsed entity
5875 */
5876 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5877 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5879 ctxt->sax->error(ctxt->userData,
5880 "Entity reference to unparsed entity %s\n", name);
5881 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005882 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005883 }
5884
5885 /*
5886 * [ WFC: No External Entity References ]
5887 * Attribute values cannot contain direct or indirect
5888 * entity references to external entities.
5889 */
5890 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5891 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5892 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5893 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5894 ctxt->sax->error(ctxt->userData,
5895 "Attribute references external entity '%s'\n", name);
5896 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005897 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005898 }
5899 /*
5900 * [ WFC: No < in Attribute Values ]
5901 * The replacement text of any entity referred to directly or
5902 * indirectly in an attribute value (other than "&lt;") must
5903 * not contain a <.
5904 */
5905 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5906 (ent != NULL) &&
5907 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5908 (ent->content != NULL) &&
5909 (xmlStrchr(ent->content, '<'))) {
5910 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5912 ctxt->sax->error(ctxt->userData,
5913 "'<' in entity '%s' is not allowed in attributes values\n", name);
5914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005916 }
5917
5918 /*
5919 * Internal check, no parameter entities here ...
5920 */
5921 else {
5922 switch (ent->etype) {
5923 case XML_INTERNAL_PARAMETER_ENTITY:
5924 case XML_EXTERNAL_PARAMETER_ENTITY:
5925 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5927 ctxt->sax->error(ctxt->userData,
5928 "Attempt to reference the parameter entity '%s'\n", name);
5929 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005930 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005931 break;
5932 default:
5933 break;
5934 }
5935 }
5936
5937 /*
5938 * [ WFC: No Recursion ]
5939 * A parsed entity must not contain a recursive reference
5940 * to itself, either directly or indirectly.
5941 * Done somewhere else
5942 */
5943
5944 } else {
5945 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5947 ctxt->sax->error(ctxt->userData,
5948 "xmlParseEntityRef: expecting ';'\n");
5949 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005950 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005951 }
5952 xmlFree(name);
5953 }
5954 }
5955 return(ent);
5956}
5957
5958/**
5959 * xmlParseStringEntityRef:
5960 * @ctxt: an XML parser context
5961 * @str: a pointer to an index in the string
5962 *
5963 * parse ENTITY references declarations, but this version parses it from
5964 * a string value.
5965 *
5966 * [68] EntityRef ::= '&' Name ';'
5967 *
5968 * [ WFC: Entity Declared ]
5969 * In a document without any DTD, a document with only an internal DTD
5970 * subset which contains no parameter entity references, or a document
5971 * with "standalone='yes'", the Name given in the entity reference
5972 * must match that in an entity declaration, except that well-formed
5973 * documents need not declare any of the following entities: amp, lt,
5974 * gt, apos, quot. The declaration of a parameter entity must precede
5975 * any reference to it. Similarly, the declaration of a general entity
5976 * must precede any reference to it which appears in a default value in an
5977 * attribute-list declaration. Note that if entities are declared in the
5978 * external subset or in external parameter entities, a non-validating
5979 * processor is not obligated to read and process their declarations;
5980 * for such documents, the rule that an entity must be declared is a
5981 * well-formedness constraint only if standalone='yes'.
5982 *
5983 * [ WFC: Parsed Entity ]
5984 * An entity reference must not contain the name of an unparsed entity
5985 *
5986 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5987 * is updated to the current location in the string.
5988 */
5989xmlEntityPtr
5990xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5991 xmlChar *name;
5992 const xmlChar *ptr;
5993 xmlChar cur;
5994 xmlEntityPtr ent = NULL;
5995
5996 if ((str == NULL) || (*str == NULL))
5997 return(NULL);
5998 ptr = *str;
5999 cur = *ptr;
6000 if (cur == '&') {
6001 ptr++;
6002 cur = *ptr;
6003 name = xmlParseStringName(ctxt, &ptr);
6004 if (name == NULL) {
6005 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6007 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006008 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006011 } else {
6012 if (*ptr == ';') {
6013 ptr++;
6014 /*
6015 * Ask first SAX for entity resolution, otherwise try the
6016 * predefined set.
6017 */
6018 if (ctxt->sax != NULL) {
6019 if (ctxt->sax->getEntity != NULL)
6020 ent = ctxt->sax->getEntity(ctxt->userData, name);
6021 if (ent == NULL)
6022 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006023 if ((ent == NULL) && (ctxt->userData==ctxt)) {
6024 ent = getEntity(ctxt, name);
6025 }
Owen Taylor3473f882001-02-23 17:55:21 +00006026 }
6027 /*
6028 * [ WFC: Entity Declared ]
6029 * In a document without any DTD, a document with only an
6030 * internal DTD subset which contains no parameter entity
6031 * references, or a document with "standalone='yes'", the
6032 * Name given in the entity reference must match that in an
6033 * entity declaration, except that well-formed documents
6034 * need not declare any of the following entities: amp, lt,
6035 * gt, apos, quot.
6036 * The declaration of a parameter entity must precede any
6037 * reference to it.
6038 * Similarly, the declaration of a general entity must
6039 * precede any reference to it which appears in a default
6040 * value in an attribute-list declaration. Note that if
6041 * entities are declared in the external subset or in
6042 * external parameter entities, a non-validating processor
6043 * is not obligated to read and process their declarations;
6044 * for such documents, the rule that an entity must be
6045 * declared is a well-formedness constraint only if
6046 * standalone='yes'.
6047 */
6048 if (ent == NULL) {
6049 if ((ctxt->standalone == 1) ||
6050 ((ctxt->hasExternalSubset == 0) &&
6051 (ctxt->hasPErefs == 0))) {
6052 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6054 ctxt->sax->error(ctxt->userData,
6055 "Entity '%s' not defined\n", name);
6056 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006058 } else {
6059 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6060 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6061 ctxt->sax->warning(ctxt->userData,
6062 "Entity '%s' not defined\n", name);
6063 }
6064 }
6065
6066 /*
6067 * [ WFC: Parsed Entity ]
6068 * An entity reference must not contain the name of an
6069 * unparsed entity
6070 */
6071 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6072 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6074 ctxt->sax->error(ctxt->userData,
6075 "Entity reference to unparsed entity %s\n", name);
6076 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006077 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006078 }
6079
6080 /*
6081 * [ WFC: No External Entity References ]
6082 * Attribute values cannot contain direct or indirect
6083 * entity references to external entities.
6084 */
6085 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6086 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6087 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6089 ctxt->sax->error(ctxt->userData,
6090 "Attribute references external entity '%s'\n", name);
6091 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006092 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006093 }
6094 /*
6095 * [ WFC: No < in Attribute Values ]
6096 * The replacement text of any entity referred to directly or
6097 * indirectly in an attribute value (other than "&lt;") must
6098 * not contain a <.
6099 */
6100 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6101 (ent != NULL) &&
6102 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6103 (ent->content != NULL) &&
6104 (xmlStrchr(ent->content, '<'))) {
6105 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6106 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6107 ctxt->sax->error(ctxt->userData,
6108 "'<' in entity '%s' is not allowed in attributes values\n", name);
6109 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006110 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006111 }
6112
6113 /*
6114 * Internal check, no parameter entities here ...
6115 */
6116 else {
6117 switch (ent->etype) {
6118 case XML_INTERNAL_PARAMETER_ENTITY:
6119 case XML_EXTERNAL_PARAMETER_ENTITY:
6120 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6122 ctxt->sax->error(ctxt->userData,
6123 "Attempt to reference the parameter entity '%s'\n", name);
6124 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006125 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006126 break;
6127 default:
6128 break;
6129 }
6130 }
6131
6132 /*
6133 * [ WFC: No Recursion ]
6134 * A parsed entity must not contain a recursive reference
6135 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006136 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006137 */
6138
6139 } else {
6140 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6142 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006143 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006144 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006145 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006146 }
6147 xmlFree(name);
6148 }
6149 }
6150 *str = ptr;
6151 return(ent);
6152}
6153
6154/**
6155 * xmlParsePEReference:
6156 * @ctxt: an XML parser context
6157 *
6158 * parse PEReference declarations
6159 * The entity content is handled directly by pushing it's content as
6160 * a new input stream.
6161 *
6162 * [69] PEReference ::= '%' Name ';'
6163 *
6164 * [ WFC: No Recursion ]
6165 * A parsed entity must not contain a recursive
6166 * reference to itself, either directly or indirectly.
6167 *
6168 * [ WFC: Entity Declared ]
6169 * In a document without any DTD, a document with only an internal DTD
6170 * subset which contains no parameter entity references, or a document
6171 * with "standalone='yes'", ... ... The declaration of a parameter
6172 * entity must precede any reference to it...
6173 *
6174 * [ VC: Entity Declared ]
6175 * In a document with an external subset or external parameter entities
6176 * with "standalone='no'", ... ... The declaration of a parameter entity
6177 * must precede any reference to it...
6178 *
6179 * [ WFC: In DTD ]
6180 * Parameter-entity references may only appear in the DTD.
6181 * NOTE: misleading but this is handled.
6182 */
6183void
6184xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6185 xmlChar *name;
6186 xmlEntityPtr entity = NULL;
6187 xmlParserInputPtr input;
6188
6189 if (RAW == '%') {
6190 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006191 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006192 if (name == NULL) {
6193 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6194 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6195 ctxt->sax->error(ctxt->userData,
6196 "xmlParsePEReference: no name\n");
6197 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006198 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006199 } else {
6200 if (RAW == ';') {
6201 NEXT;
6202 if ((ctxt->sax != NULL) &&
6203 (ctxt->sax->getParameterEntity != NULL))
6204 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6205 name);
6206 if (entity == NULL) {
6207 /*
6208 * [ WFC: Entity Declared ]
6209 * In a document without any DTD, a document with only an
6210 * internal DTD subset which contains no parameter entity
6211 * references, or a document with "standalone='yes'", ...
6212 * ... The declaration of a parameter entity must precede
6213 * any reference to it...
6214 */
6215 if ((ctxt->standalone == 1) ||
6216 ((ctxt->hasExternalSubset == 0) &&
6217 (ctxt->hasPErefs == 0))) {
6218 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6219 if ((!ctxt->disableSAX) &&
6220 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6221 ctxt->sax->error(ctxt->userData,
6222 "PEReference: %%%s; not found\n", name);
6223 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006224 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006225 } else {
6226 /*
6227 * [ VC: Entity Declared ]
6228 * In a document with an external subset or external
6229 * parameter entities with "standalone='no'", ...
6230 * ... The declaration of a parameter entity must precede
6231 * any reference to it...
6232 */
6233 if ((!ctxt->disableSAX) &&
6234 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6235 ctxt->sax->warning(ctxt->userData,
6236 "PEReference: %%%s; not found\n", name);
6237 ctxt->valid = 0;
6238 }
6239 } else {
6240 /*
6241 * Internal checking in case the entity quest barfed
6242 */
6243 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6244 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6245 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6246 ctxt->sax->warning(ctxt->userData,
6247 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006248 } else if (ctxt->input->free != deallocblankswrapper) {
6249 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6250 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006251 } else {
6252 /*
6253 * TODO !!!
6254 * handle the extra spaces added before and after
6255 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6256 */
6257 input = xmlNewEntityInputStream(ctxt, entity);
6258 xmlPushInput(ctxt, input);
6259 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6260 (RAW == '<') && (NXT(1) == '?') &&
6261 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6262 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6263 xmlParseTextDecl(ctxt);
6264 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6265 /*
6266 * The XML REC instructs us to stop parsing
6267 * right here
6268 */
6269 ctxt->instate = XML_PARSER_EOF;
6270 xmlFree(name);
6271 return;
6272 }
6273 }
Owen Taylor3473f882001-02-23 17:55:21 +00006274 }
6275 }
6276 ctxt->hasPErefs = 1;
6277 } else {
6278 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6280 ctxt->sax->error(ctxt->userData,
6281 "xmlParsePEReference: expecting ';'\n");
6282 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006283 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006284 }
6285 xmlFree(name);
6286 }
6287 }
6288}
6289
6290/**
6291 * xmlParseStringPEReference:
6292 * @ctxt: an XML parser context
6293 * @str: a pointer to an index in the string
6294 *
6295 * parse PEReference declarations
6296 *
6297 * [69] PEReference ::= '%' Name ';'
6298 *
6299 * [ WFC: No Recursion ]
6300 * A parsed entity must not contain a recursive
6301 * reference to itself, either directly or indirectly.
6302 *
6303 * [ WFC: Entity Declared ]
6304 * In a document without any DTD, a document with only an internal DTD
6305 * subset which contains no parameter entity references, or a document
6306 * with "standalone='yes'", ... ... The declaration of a parameter
6307 * entity must precede any reference to it...
6308 *
6309 * [ VC: Entity Declared ]
6310 * In a document with an external subset or external parameter entities
6311 * with "standalone='no'", ... ... The declaration of a parameter entity
6312 * must precede any reference to it...
6313 *
6314 * [ WFC: In DTD ]
6315 * Parameter-entity references may only appear in the DTD.
6316 * NOTE: misleading but this is handled.
6317 *
6318 * Returns the string of the entity content.
6319 * str is updated to the current value of the index
6320 */
6321xmlEntityPtr
6322xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6323 const xmlChar *ptr;
6324 xmlChar cur;
6325 xmlChar *name;
6326 xmlEntityPtr entity = NULL;
6327
6328 if ((str == NULL) || (*str == NULL)) return(NULL);
6329 ptr = *str;
6330 cur = *ptr;
6331 if (cur == '%') {
6332 ptr++;
6333 cur = *ptr;
6334 name = xmlParseStringName(ctxt, &ptr);
6335 if (name == NULL) {
6336 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6338 ctxt->sax->error(ctxt->userData,
6339 "xmlParseStringPEReference: no name\n");
6340 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006341 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006342 } else {
6343 cur = *ptr;
6344 if (cur == ';') {
6345 ptr++;
6346 cur = *ptr;
6347 if ((ctxt->sax != NULL) &&
6348 (ctxt->sax->getParameterEntity != NULL))
6349 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6350 name);
6351 if (entity == NULL) {
6352 /*
6353 * [ WFC: Entity Declared ]
6354 * In a document without any DTD, a document with only an
6355 * internal DTD subset which contains no parameter entity
6356 * references, or a document with "standalone='yes'", ...
6357 * ... The declaration of a parameter entity must precede
6358 * any reference to it...
6359 */
6360 if ((ctxt->standalone == 1) ||
6361 ((ctxt->hasExternalSubset == 0) &&
6362 (ctxt->hasPErefs == 0))) {
6363 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6365 ctxt->sax->error(ctxt->userData,
6366 "PEReference: %%%s; not found\n", name);
6367 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006368 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006369 } else {
6370 /*
6371 * [ VC: Entity Declared ]
6372 * In a document with an external subset or external
6373 * parameter entities with "standalone='no'", ...
6374 * ... The declaration of a parameter entity must
6375 * precede any reference to it...
6376 */
6377 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6378 ctxt->sax->warning(ctxt->userData,
6379 "PEReference: %%%s; not found\n", name);
6380 ctxt->valid = 0;
6381 }
6382 } else {
6383 /*
6384 * Internal checking in case the entity quest barfed
6385 */
6386 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6387 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6388 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6389 ctxt->sax->warning(ctxt->userData,
6390 "Internal: %%%s; is not a parameter entity\n", name);
6391 }
6392 }
6393 ctxt->hasPErefs = 1;
6394 } else {
6395 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6397 ctxt->sax->error(ctxt->userData,
6398 "xmlParseStringPEReference: expecting ';'\n");
6399 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006400 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006401 }
6402 xmlFree(name);
6403 }
6404 }
6405 *str = ptr;
6406 return(entity);
6407}
6408
6409/**
6410 * xmlParseDocTypeDecl:
6411 * @ctxt: an XML parser context
6412 *
6413 * parse a DOCTYPE declaration
6414 *
6415 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6416 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6417 *
6418 * [ VC: Root Element Type ]
6419 * The Name in the document type declaration must match the element
6420 * type of the root element.
6421 */
6422
6423void
6424xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6425 xmlChar *name = NULL;
6426 xmlChar *ExternalID = NULL;
6427 xmlChar *URI = NULL;
6428
6429 /*
6430 * We know that '<!DOCTYPE' has been detected.
6431 */
6432 SKIP(9);
6433
6434 SKIP_BLANKS;
6435
6436 /*
6437 * Parse the DOCTYPE name.
6438 */
6439 name = xmlParseName(ctxt);
6440 if (name == NULL) {
6441 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6443 ctxt->sax->error(ctxt->userData,
6444 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6445 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006446 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006447 }
6448 ctxt->intSubName = name;
6449
6450 SKIP_BLANKS;
6451
6452 /*
6453 * Check for SystemID and ExternalID
6454 */
6455 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6456
6457 if ((URI != NULL) || (ExternalID != NULL)) {
6458 ctxt->hasExternalSubset = 1;
6459 }
6460 ctxt->extSubURI = URI;
6461 ctxt->extSubSystem = ExternalID;
6462
6463 SKIP_BLANKS;
6464
6465 /*
6466 * Create and update the internal subset.
6467 */
6468 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6469 (!ctxt->disableSAX))
6470 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6471
6472 /*
6473 * Is there any internal subset declarations ?
6474 * they are handled separately in xmlParseInternalSubset()
6475 */
6476 if (RAW == '[')
6477 return;
6478
6479 /*
6480 * We should be at the end of the DOCTYPE declaration.
6481 */
6482 if (RAW != '>') {
6483 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006485 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006486 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006487 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006488 }
6489 NEXT;
6490}
6491
6492/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006493 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006494 * @ctxt: an XML parser context
6495 *
6496 * parse the internal subset declaration
6497 *
6498 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6499 */
6500
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006501static void
Owen Taylor3473f882001-02-23 17:55:21 +00006502xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6503 /*
6504 * Is there any DTD definition ?
6505 */
6506 if (RAW == '[') {
6507 ctxt->instate = XML_PARSER_DTD;
6508 NEXT;
6509 /*
6510 * Parse the succession of Markup declarations and
6511 * PEReferences.
6512 * Subsequence (markupdecl | PEReference | S)*
6513 */
6514 while (RAW != ']') {
6515 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006516 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006517
6518 SKIP_BLANKS;
6519 xmlParseMarkupDecl(ctxt);
6520 xmlParsePEReference(ctxt);
6521
6522 /*
6523 * Pop-up of finished entities.
6524 */
6525 while ((RAW == 0) && (ctxt->inputNr > 1))
6526 xmlPopInput(ctxt);
6527
6528 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6529 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6531 ctxt->sax->error(ctxt->userData,
6532 "xmlParseInternalSubset: error detected in Markup declaration\n");
6533 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006534 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006535 break;
6536 }
6537 }
6538 if (RAW == ']') {
6539 NEXT;
6540 SKIP_BLANKS;
6541 }
6542 }
6543
6544 /*
6545 * We should be at the end of the DOCTYPE declaration.
6546 */
6547 if (RAW != '>') {
6548 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006550 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006553 }
6554 NEXT;
6555}
6556
6557/**
6558 * xmlParseAttribute:
6559 * @ctxt: an XML parser context
6560 * @value: a xmlChar ** used to store the value of the attribute
6561 *
6562 * parse an attribute
6563 *
6564 * [41] Attribute ::= Name Eq AttValue
6565 *
6566 * [ WFC: No External Entity References ]
6567 * Attribute values cannot contain direct or indirect entity references
6568 * to external entities.
6569 *
6570 * [ WFC: No < in Attribute Values ]
6571 * The replacement text of any entity referred to directly or indirectly in
6572 * an attribute value (other than "&lt;") must not contain a <.
6573 *
6574 * [ VC: Attribute Value Type ]
6575 * The attribute must have been declared; the value must be of the type
6576 * declared for it.
6577 *
6578 * [25] Eq ::= S? '=' S?
6579 *
6580 * With namespace:
6581 *
6582 * [NS 11] Attribute ::= QName Eq AttValue
6583 *
6584 * Also the case QName == xmlns:??? is handled independently as a namespace
6585 * definition.
6586 *
6587 * Returns the attribute name, and the value in *value.
6588 */
6589
6590xmlChar *
6591xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6592 xmlChar *name, *val;
6593
6594 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006595 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006596 name = xmlParseName(ctxt);
6597 if (name == NULL) {
6598 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6600 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6601 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006602 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006603 return(NULL);
6604 }
6605
6606 /*
6607 * read the value
6608 */
6609 SKIP_BLANKS;
6610 if (RAW == '=') {
6611 NEXT;
6612 SKIP_BLANKS;
6613 val = xmlParseAttValue(ctxt);
6614 ctxt->instate = XML_PARSER_CONTENT;
6615 } else {
6616 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6618 ctxt->sax->error(ctxt->userData,
6619 "Specification mandate value for attribute %s\n", name);
6620 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006621 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006622 xmlFree(name);
6623 return(NULL);
6624 }
6625
6626 /*
6627 * Check that xml:lang conforms to the specification
6628 * No more registered as an error, just generate a warning now
6629 * since this was deprecated in XML second edition
6630 */
6631 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6632 if (!xmlCheckLanguageID(val)) {
6633 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6634 ctxt->sax->warning(ctxt->userData,
6635 "Malformed value for xml:lang : %s\n", val);
6636 }
6637 }
6638
6639 /*
6640 * Check that xml:space conforms to the specification
6641 */
6642 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6643 if (xmlStrEqual(val, BAD_CAST "default"))
6644 *(ctxt->space) = 0;
6645 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6646 *(ctxt->space) = 1;
6647 else {
6648 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6650 ctxt->sax->error(ctxt->userData,
Daniel Veillard642104e2003-03-26 16:32:05 +00006651"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006652 val);
6653 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006654 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006655 }
6656 }
6657
6658 *value = val;
6659 return(name);
6660}
6661
6662/**
6663 * xmlParseStartTag:
6664 * @ctxt: an XML parser context
6665 *
6666 * parse a start of tag either for rule element or
6667 * EmptyElement. In both case we don't parse the tag closing chars.
6668 *
6669 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6670 *
6671 * [ WFC: Unique Att Spec ]
6672 * No attribute name may appear more than once in the same start-tag or
6673 * empty-element tag.
6674 *
6675 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6676 *
6677 * [ WFC: Unique Att Spec ]
6678 * No attribute name may appear more than once in the same start-tag or
6679 * empty-element tag.
6680 *
6681 * With namespace:
6682 *
6683 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6684 *
6685 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6686 *
6687 * Returns the element name parsed
6688 */
6689
6690xmlChar *
6691xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6692 xmlChar *name;
6693 xmlChar *attname;
6694 xmlChar *attvalue;
6695 const xmlChar **atts = NULL;
6696 int nbatts = 0;
6697 int maxatts = 0;
6698 int i;
6699
6700 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006701 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006702
6703 name = xmlParseName(ctxt);
6704 if (name == NULL) {
6705 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6707 ctxt->sax->error(ctxt->userData,
6708 "xmlParseStartTag: invalid element name\n");
6709 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006710 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006711 return(NULL);
6712 }
6713
6714 /*
6715 * Now parse the attributes, it ends up with the ending
6716 *
6717 * (S Attribute)* S?
6718 */
6719 SKIP_BLANKS;
6720 GROW;
6721
Daniel Veillard21a0f912001-02-25 19:54:14 +00006722 while ((RAW != '>') &&
6723 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard34ba3872003-07-15 13:34:05 +00006724 (IS_CHAR((unsigned int) RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006725 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006726 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006727
6728 attname = xmlParseAttribute(ctxt, &attvalue);
6729 if ((attname != NULL) && (attvalue != NULL)) {
6730 /*
6731 * [ WFC: Unique Att Spec ]
6732 * No attribute name may appear more than once in the same
6733 * start-tag or empty-element tag.
6734 */
6735 for (i = 0; i < nbatts;i += 2) {
6736 if (xmlStrEqual(atts[i], attname)) {
6737 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6739 ctxt->sax->error(ctxt->userData,
6740 "Attribute %s redefined\n",
6741 attname);
6742 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006743 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006744 xmlFree(attname);
6745 xmlFree(attvalue);
6746 goto failed;
6747 }
6748 }
6749
6750 /*
6751 * Add the pair to atts
6752 */
6753 if (atts == NULL) {
6754 maxatts = 10;
6755 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6756 if (atts == NULL) {
6757 xmlGenericError(xmlGenericErrorContext,
6758 "malloc of %ld byte failed\n",
6759 maxatts * (long)sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006760 if (attname != NULL)
6761 xmlFree(attname);
6762 if (attvalue != NULL)
6763 xmlFree(attvalue);
6764 ctxt->errNo = XML_ERR_NO_MEMORY;
6765 ctxt->instate = XML_PARSER_EOF;
6766 ctxt->disableSAX = 1;
6767 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006768 }
6769 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006770 const xmlChar **n;
6771
Owen Taylor3473f882001-02-23 17:55:21 +00006772 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006773 n = (const xmlChar **) xmlRealloc((void *) atts,
Owen Taylor3473f882001-02-23 17:55:21 +00006774 maxatts * sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006775 if (n == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006776 xmlGenericError(xmlGenericErrorContext,
6777 "realloc of %ld byte failed\n",
6778 maxatts * (long)sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006779 if (attname != NULL)
6780 xmlFree(attname);
6781 if (attvalue != NULL)
6782 xmlFree(attvalue);
6783 ctxt->errNo = XML_ERR_NO_MEMORY;
6784 ctxt->instate = XML_PARSER_EOF;
6785 ctxt->disableSAX = 1;
6786 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006787 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006788 atts = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006789 }
6790 atts[nbatts++] = attname;
6791 atts[nbatts++] = attvalue;
6792 atts[nbatts] = NULL;
6793 atts[nbatts + 1] = NULL;
6794 } else {
6795 if (attname != NULL)
6796 xmlFree(attname);
6797 if (attvalue != NULL)
6798 xmlFree(attvalue);
6799 }
6800
6801failed:
6802
Daniel Veillard3772de32002-12-17 10:31:45 +00006803 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006804 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6805 break;
6806 if (!IS_BLANK(RAW)) {
6807 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6809 ctxt->sax->error(ctxt->userData,
6810 "attributes construct error\n");
6811 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006812 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006813 }
6814 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006815 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6816 (attname == NULL) && (attvalue == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006817 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6819 ctxt->sax->error(ctxt->userData,
6820 "xmlParseStartTag: problem parsing attributes\n");
6821 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006822 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006823 break;
6824 }
6825 GROW;
6826 }
6827
6828 /*
6829 * SAX: Start of Element !
6830 */
6831 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6832 (!ctxt->disableSAX))
6833 ctxt->sax->startElement(ctxt->userData, name, atts);
6834
6835 if (atts != NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006836 for (i = 0;i < nbatts;i++)
6837 if (atts[i] != NULL)
6838 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006839 xmlFree((void *) atts);
6840 }
6841 return(name);
6842}
6843
6844/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006845 * xmlParseEndTagInternal:
Owen Taylor3473f882001-02-23 17:55:21 +00006846 * @ctxt: an XML parser context
6847 *
6848 * parse an end of tag
6849 *
6850 * [42] ETag ::= '</' Name S? '>'
6851 *
6852 * With namespace
6853 *
6854 * [NS 9] ETag ::= '</' QName S? '>'
6855 */
6856
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006857static void
6858xmlParseEndTagInternal(xmlParserCtxtPtr ctxt, int line) {
Owen Taylor3473f882001-02-23 17:55:21 +00006859 xmlChar *name;
6860 xmlChar *oldname;
6861
6862 GROW;
6863 if ((RAW != '<') || (NXT(1) != '/')) {
6864 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6866 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6867 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006868 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006869 return;
6870 }
6871 SKIP(2);
6872
Daniel Veillard46de64e2002-05-29 08:21:33 +00006873 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006874
6875 /*
6876 * We should definitely be at the ending "S? '>'" part
6877 */
6878 GROW;
6879 SKIP_BLANKS;
Daniel Veillard34ba3872003-07-15 13:34:05 +00006880 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006881 ctxt->errNo = XML_ERR_GT_REQUIRED;
6882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6883 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6884 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006885 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006886 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006887 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006888
6889 /*
6890 * [ WFC: Element Type Match ]
6891 * The Name in an element's end-tag must match the element type in the
6892 * start-tag.
6893 *
6894 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006895 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006896 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006898 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006899 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006900 "Opening and ending tag mismatch: %s line %d and %s\n",
6901 ctxt->name, line, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006902 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006903 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006904 "Ending tag error for: %s line %d\n", ctxt->name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00006905 }
6906
6907 }
6908 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006909 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006910 if (name != NULL)
6911 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006912 }
6913
6914 /*
6915 * SAX: End of Tag
6916 */
6917 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6918 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006919 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006920
Owen Taylor3473f882001-02-23 17:55:21 +00006921 oldname = namePop(ctxt);
6922 spacePop(ctxt);
6923 if (oldname != NULL) {
6924#ifdef DEBUG_STACK
6925 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6926#endif
6927 xmlFree(oldname);
6928 }
6929 return;
6930}
6931
6932/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006933 * xmlParseEndTag:
6934 * @ctxt: an XML parser context
6935 *
6936 * parse an end of tag
6937 *
6938 * [42] ETag ::= '</' Name S? '>'
6939 *
6940 * With namespace
6941 *
6942 * [NS 9] ETag ::= '</' QName S? '>'
6943 */
6944
6945void
6946xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6947 xmlParseEndTagInternal(ctxt, 0);
6948}
6949
6950/**
Owen Taylor3473f882001-02-23 17:55:21 +00006951 * xmlParseCDSect:
6952 * @ctxt: an XML parser context
6953 *
6954 * Parse escaped pure raw content.
6955 *
6956 * [18] CDSect ::= CDStart CData CDEnd
6957 *
6958 * [19] CDStart ::= '<![CDATA['
6959 *
6960 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6961 *
6962 * [21] CDEnd ::= ']]>'
6963 */
6964void
6965xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6966 xmlChar *buf = NULL;
6967 int len = 0;
6968 int size = XML_PARSER_BUFFER_SIZE;
6969 int r, rl;
6970 int s, sl;
6971 int cur, l;
6972 int count = 0;
6973
6974 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6975 (NXT(2) == '[') && (NXT(3) == 'C') &&
6976 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6977 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6978 (NXT(8) == '[')) {
6979 SKIP(9);
6980 } else
6981 return;
6982
6983 ctxt->instate = XML_PARSER_CDATA_SECTION;
6984 r = CUR_CHAR(rl);
6985 if (!IS_CHAR(r)) {
6986 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6988 ctxt->sax->error(ctxt->userData,
6989 "CData section not finished\n");
6990 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006991 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006992 ctxt->instate = XML_PARSER_CONTENT;
6993 return;
6994 }
6995 NEXTL(rl);
6996 s = CUR_CHAR(sl);
6997 if (!IS_CHAR(s)) {
6998 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7000 ctxt->sax->error(ctxt->userData,
7001 "CData section not finished\n");
7002 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007003 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007004 ctxt->instate = XML_PARSER_CONTENT;
7005 return;
7006 }
7007 NEXTL(sl);
7008 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007009 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007010 if (buf == NULL) {
7011 xmlGenericError(xmlGenericErrorContext,
7012 "malloc of %d byte failed\n", size);
7013 return;
7014 }
7015 while (IS_CHAR(cur) &&
7016 ((r != ']') || (s != ']') || (cur != '>'))) {
7017 if (len + 5 >= size) {
7018 size *= 2;
7019 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7020 if (buf == NULL) {
7021 xmlGenericError(xmlGenericErrorContext,
7022 "realloc of %d byte failed\n", size);
7023 return;
7024 }
7025 }
7026 COPY_BUF(rl,buf,len,r);
7027 r = s;
7028 rl = sl;
7029 s = cur;
7030 sl = l;
7031 count++;
7032 if (count > 50) {
7033 GROW;
7034 count = 0;
7035 }
7036 NEXTL(l);
7037 cur = CUR_CHAR(l);
7038 }
7039 buf[len] = 0;
7040 ctxt->instate = XML_PARSER_CONTENT;
7041 if (cur != '>') {
7042 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
7043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7044 ctxt->sax->error(ctxt->userData,
7045 "CData section not finished\n%.50s\n", buf);
7046 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007047 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007048 xmlFree(buf);
7049 return;
7050 }
7051 NEXTL(l);
7052
7053 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007054 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007055 */
7056 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7057 if (ctxt->sax->cdataBlock != NULL)
7058 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007059 else if (ctxt->sax->characters != NULL)
7060 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007061 }
7062 xmlFree(buf);
7063}
7064
7065/**
7066 * xmlParseContent:
7067 * @ctxt: an XML parser context
7068 *
7069 * Parse a content:
7070 *
7071 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7072 */
7073
7074void
7075xmlParseContent(xmlParserCtxtPtr ctxt) {
7076 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007077 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007078 ((RAW != '<') || (NXT(1) != '/'))) {
7079 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007080 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007081 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007082
7083 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007084 * First case : a Processing Instruction.
7085 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007086 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007087 xmlParsePI(ctxt);
7088 }
7089
7090 /*
7091 * Second case : a CDSection
7092 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007093 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007094 (NXT(2) == '[') && (NXT(3) == 'C') &&
7095 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7096 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7097 (NXT(8) == '[')) {
7098 xmlParseCDSect(ctxt);
7099 }
7100
7101 /*
7102 * Third case : a comment
7103 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007104 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007105 (NXT(2) == '-') && (NXT(3) == '-')) {
7106 xmlParseComment(ctxt);
7107 ctxt->instate = XML_PARSER_CONTENT;
7108 }
7109
7110 /*
7111 * Fourth case : a sub-element.
7112 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007113 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007114 xmlParseElement(ctxt);
7115 }
7116
7117 /*
7118 * Fifth case : a reference. If if has not been resolved,
7119 * parsing returns it's Name, create the node
7120 */
7121
Daniel Veillard21a0f912001-02-25 19:54:14 +00007122 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007123 xmlParseReference(ctxt);
7124 }
7125
7126 /*
7127 * Last case, text. Note that References are handled directly.
7128 */
7129 else {
7130 xmlParseCharData(ctxt, 0);
7131 }
7132
7133 GROW;
7134 /*
7135 * Pop-up of finished entities.
7136 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007137 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007138 xmlPopInput(ctxt);
7139 SHRINK;
7140
Daniel Veillardfdc91562002-07-01 21:52:03 +00007141 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007142 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7144 ctxt->sax->error(ctxt->userData,
7145 "detected an error in element content\n");
7146 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007147 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007148 ctxt->instate = XML_PARSER_EOF;
7149 break;
7150 }
7151 }
7152}
7153
7154/**
7155 * xmlParseElement:
7156 * @ctxt: an XML parser context
7157 *
7158 * parse an XML element, this is highly recursive
7159 *
7160 * [39] element ::= EmptyElemTag | STag content ETag
7161 *
7162 * [ WFC: Element Type Match ]
7163 * The Name in an element's end-tag must match the element type in the
7164 * start-tag.
7165 *
7166 * [ VC: Element Valid ]
7167 * An element is valid if there is a declaration matching elementdecl
7168 * where the Name matches the element type and one of the following holds:
7169 * - The declaration matches EMPTY and the element has no content.
7170 * - The declaration matches children and the sequence of child elements
7171 * belongs to the language generated by the regular expression in the
7172 * content model, with optional white space (characters matching the
7173 * nonterminal S) between each pair of child elements.
7174 * - The declaration matches Mixed and the content consists of character
7175 * data and child elements whose types match names in the content model.
7176 * - The declaration matches ANY, and the types of any child elements have
7177 * been declared.
7178 */
7179
7180void
7181xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007182 xmlChar *name;
7183 xmlChar *oldname;
7184 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007185 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00007186 xmlNodePtr ret;
7187
7188 /* Capture start position */
7189 if (ctxt->record_info) {
7190 node_info.begin_pos = ctxt->input->consumed +
7191 (CUR_PTR - ctxt->input->base);
7192 node_info.begin_line = ctxt->input->line;
7193 }
7194
7195 if (ctxt->spaceNr == 0)
7196 spacePush(ctxt, -1);
7197 else
7198 spacePush(ctxt, *ctxt->space);
7199
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007200 line = ctxt->input->line;
Owen Taylor3473f882001-02-23 17:55:21 +00007201 name = xmlParseStartTag(ctxt);
7202 if (name == NULL) {
7203 spacePop(ctxt);
7204 return;
7205 }
7206 namePush(ctxt, name);
7207 ret = ctxt->node;
7208
7209 /*
7210 * [ VC: Root Element Type ]
7211 * The Name in the document type declaration must match the element
7212 * type of the root element.
7213 */
7214 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7215 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7216 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7217
7218 /*
7219 * Check for an Empty Element.
7220 */
7221 if ((RAW == '/') && (NXT(1) == '>')) {
7222 SKIP(2);
7223 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7224 (!ctxt->disableSAX))
7225 ctxt->sax->endElement(ctxt->userData, name);
7226 oldname = namePop(ctxt);
7227 spacePop(ctxt);
7228 if (oldname != NULL) {
7229#ifdef DEBUG_STACK
7230 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7231#endif
7232 xmlFree(oldname);
7233 }
7234 if ( ret != NULL && ctxt->record_info ) {
7235 node_info.end_pos = ctxt->input->consumed +
7236 (CUR_PTR - ctxt->input->base);
7237 node_info.end_line = ctxt->input->line;
7238 node_info.node = ret;
7239 xmlParserAddNodeInfo(ctxt, &node_info);
7240 }
7241 return;
7242 }
7243 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007244 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007245 } else {
7246 ctxt->errNo = XML_ERR_GT_REQUIRED;
7247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7248 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007249 "Couldn't find end of Start Tag %s line %d\n",
7250 name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007251 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007252 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007253
7254 /*
7255 * end of parsing of this node.
7256 */
7257 nodePop(ctxt);
7258 oldname = namePop(ctxt);
7259 spacePop(ctxt);
7260 if (oldname != NULL) {
7261#ifdef DEBUG_STACK
7262 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7263#endif
7264 xmlFree(oldname);
7265 }
7266
7267 /*
7268 * Capture end position and add node
7269 */
7270 if ( ret != NULL && ctxt->record_info ) {
7271 node_info.end_pos = ctxt->input->consumed +
7272 (CUR_PTR - ctxt->input->base);
7273 node_info.end_line = ctxt->input->line;
7274 node_info.node = ret;
7275 xmlParserAddNodeInfo(ctxt, &node_info);
7276 }
7277 return;
7278 }
7279
7280 /*
7281 * Parse the content of the element:
7282 */
7283 xmlParseContent(ctxt);
Daniel Veillard34ba3872003-07-15 13:34:05 +00007284 if (!IS_CHAR((unsigned int) RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007285 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7287 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007288 "Premature end of data in tag %s line %d\n", name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007289 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007290 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007291
7292 /*
7293 * end of parsing of this node.
7294 */
7295 nodePop(ctxt);
7296 oldname = namePop(ctxt);
7297 spacePop(ctxt);
7298 if (oldname != NULL) {
7299#ifdef DEBUG_STACK
7300 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7301#endif
7302 xmlFree(oldname);
7303 }
7304 return;
7305 }
7306
7307 /*
7308 * parse the end of tag: '</' should be here.
7309 */
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007310 xmlParseEndTagInternal(ctxt, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007311
7312 /*
7313 * Capture end position and add node
7314 */
7315 if ( ret != NULL && ctxt->record_info ) {
7316 node_info.end_pos = ctxt->input->consumed +
7317 (CUR_PTR - ctxt->input->base);
7318 node_info.end_line = ctxt->input->line;
7319 node_info.node = ret;
7320 xmlParserAddNodeInfo(ctxt, &node_info);
7321 }
7322}
7323
7324/**
7325 * xmlParseVersionNum:
7326 * @ctxt: an XML parser context
7327 *
7328 * parse the XML version value.
7329 *
7330 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7331 *
7332 * Returns the string giving the XML version number, or NULL
7333 */
7334xmlChar *
7335xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7336 xmlChar *buf = NULL;
7337 int len = 0;
7338 int size = 10;
7339 xmlChar cur;
7340
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007341 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007342 if (buf == NULL) {
7343 xmlGenericError(xmlGenericErrorContext,
7344 "malloc of %d byte failed\n", size);
7345 return(NULL);
7346 }
7347 cur = CUR;
7348 while (((cur >= 'a') && (cur <= 'z')) ||
7349 ((cur >= 'A') && (cur <= 'Z')) ||
7350 ((cur >= '0') && (cur <= '9')) ||
7351 (cur == '_') || (cur == '.') ||
7352 (cur == ':') || (cur == '-')) {
7353 if (len + 1 >= size) {
7354 size *= 2;
7355 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7356 if (buf == NULL) {
7357 xmlGenericError(xmlGenericErrorContext,
7358 "realloc of %d byte failed\n", size);
7359 return(NULL);
7360 }
7361 }
7362 buf[len++] = cur;
7363 NEXT;
7364 cur=CUR;
7365 }
7366 buf[len] = 0;
7367 return(buf);
7368}
7369
7370/**
7371 * xmlParseVersionInfo:
7372 * @ctxt: an XML parser context
7373 *
7374 * parse the XML version.
7375 *
7376 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7377 *
7378 * [25] Eq ::= S? '=' S?
7379 *
7380 * Returns the version string, e.g. "1.0"
7381 */
7382
7383xmlChar *
7384xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7385 xmlChar *version = NULL;
7386 const xmlChar *q;
7387
7388 if ((RAW == 'v') && (NXT(1) == 'e') &&
7389 (NXT(2) == 'r') && (NXT(3) == 's') &&
7390 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7391 (NXT(6) == 'n')) {
7392 SKIP(7);
7393 SKIP_BLANKS;
7394 if (RAW != '=') {
7395 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7397 ctxt->sax->error(ctxt->userData,
7398 "xmlParseVersionInfo : expected '='\n");
7399 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007400 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007401 return(NULL);
7402 }
7403 NEXT;
7404 SKIP_BLANKS;
7405 if (RAW == '"') {
7406 NEXT;
7407 q = CUR_PTR;
7408 version = xmlParseVersionNum(ctxt);
7409 if (RAW != '"') {
7410 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7412 ctxt->sax->error(ctxt->userData,
7413 "String not closed\n%.50s\n", q);
7414 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007415 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007416 } else
7417 NEXT;
7418 } else if (RAW == '\''){
7419 NEXT;
7420 q = CUR_PTR;
7421 version = xmlParseVersionNum(ctxt);
7422 if (RAW != '\'') {
7423 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7425 ctxt->sax->error(ctxt->userData,
7426 "String not closed\n%.50s\n", q);
7427 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007428 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007429 } else
7430 NEXT;
7431 } else {
7432 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7433 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7434 ctxt->sax->error(ctxt->userData,
7435 "xmlParseVersionInfo : expected ' or \"\n");
7436 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007437 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007438 }
7439 }
7440 return(version);
7441}
7442
7443/**
7444 * xmlParseEncName:
7445 * @ctxt: an XML parser context
7446 *
7447 * parse the XML encoding name
7448 *
7449 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7450 *
7451 * Returns the encoding name value or NULL
7452 */
7453xmlChar *
7454xmlParseEncName(xmlParserCtxtPtr ctxt) {
7455 xmlChar *buf = NULL;
7456 int len = 0;
7457 int size = 10;
7458 xmlChar cur;
7459
7460 cur = CUR;
7461 if (((cur >= 'a') && (cur <= 'z')) ||
7462 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007463 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007464 if (buf == NULL) {
7465 xmlGenericError(xmlGenericErrorContext,
7466 "malloc of %d byte failed\n", size);
7467 return(NULL);
7468 }
7469
7470 buf[len++] = cur;
7471 NEXT;
7472 cur = CUR;
7473 while (((cur >= 'a') && (cur <= 'z')) ||
7474 ((cur >= 'A') && (cur <= 'Z')) ||
7475 ((cur >= '0') && (cur <= '9')) ||
7476 (cur == '.') || (cur == '_') ||
7477 (cur == '-')) {
7478 if (len + 1 >= size) {
7479 size *= 2;
7480 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7481 if (buf == NULL) {
7482 xmlGenericError(xmlGenericErrorContext,
7483 "realloc of %d byte failed\n", size);
7484 return(NULL);
7485 }
7486 }
7487 buf[len++] = cur;
7488 NEXT;
7489 cur = CUR;
7490 if (cur == 0) {
7491 SHRINK;
7492 GROW;
7493 cur = CUR;
7494 }
7495 }
7496 buf[len] = 0;
7497 } else {
7498 ctxt->errNo = XML_ERR_ENCODING_NAME;
7499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7500 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7501 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007502 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007503 }
7504 return(buf);
7505}
7506
7507/**
7508 * xmlParseEncodingDecl:
7509 * @ctxt: an XML parser context
7510 *
7511 * parse the XML encoding declaration
7512 *
7513 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7514 *
7515 * this setups the conversion filters.
7516 *
7517 * Returns the encoding value or NULL
7518 */
7519
7520xmlChar *
7521xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7522 xmlChar *encoding = NULL;
7523 const xmlChar *q;
7524
7525 SKIP_BLANKS;
7526 if ((RAW == 'e') && (NXT(1) == 'n') &&
7527 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7528 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7529 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7530 SKIP(8);
7531 SKIP_BLANKS;
7532 if (RAW != '=') {
7533 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7535 ctxt->sax->error(ctxt->userData,
7536 "xmlParseEncodingDecl : expected '='\n");
7537 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007538 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007539 return(NULL);
7540 }
7541 NEXT;
7542 SKIP_BLANKS;
7543 if (RAW == '"') {
7544 NEXT;
7545 q = CUR_PTR;
7546 encoding = xmlParseEncName(ctxt);
7547 if (RAW != '"') {
7548 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7550 ctxt->sax->error(ctxt->userData,
7551 "String not closed\n%.50s\n", q);
7552 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007553 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007554 } else
7555 NEXT;
7556 } else if (RAW == '\''){
7557 NEXT;
7558 q = CUR_PTR;
7559 encoding = xmlParseEncName(ctxt);
7560 if (RAW != '\'') {
7561 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7562 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7563 ctxt->sax->error(ctxt->userData,
7564 "String not closed\n%.50s\n", q);
7565 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007566 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007567 } else
7568 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007569 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007570 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7572 ctxt->sax->error(ctxt->userData,
7573 "xmlParseEncodingDecl : expected ' or \"\n");
7574 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007575 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007576 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00007577 /*
7578 * UTF-16 encoding stwich has already taken place at this stage,
7579 * more over the little-endian/big-endian selection is already done
7580 */
7581 if ((encoding != NULL) &&
7582 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
7583 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
7584 xmlFree(encoding);
7585 encoding = NULL;
7586 }
Owen Taylor3473f882001-02-23 17:55:21 +00007587 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00007588 xmlCharEncodingHandlerPtr handler;
7589
7590 if (ctxt->input->encoding != NULL)
7591 xmlFree((xmlChar *) ctxt->input->encoding);
7592 ctxt->input->encoding = encoding;
7593
Daniel Veillarda6874ca2003-07-29 16:47:24 +00007594 handler = xmlFindCharEncodingHandler((const char *) encoding);
7595 if (handler != NULL) {
7596 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00007597 } else {
Daniel Veillarda6874ca2003-07-29 16:47:24 +00007598 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7600 ctxt->sax->error(ctxt->userData,
7601 "Unsupported encoding %s\n", encoding);
7602 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007603 }
7604 }
7605 }
7606 return(encoding);
7607}
7608
7609/**
7610 * xmlParseSDDecl:
7611 * @ctxt: an XML parser context
7612 *
7613 * parse the XML standalone declaration
7614 *
7615 * [32] SDDecl ::= S 'standalone' Eq
7616 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7617 *
7618 * [ VC: Standalone Document Declaration ]
7619 * TODO The standalone document declaration must have the value "no"
7620 * if any external markup declarations contain declarations of:
7621 * - attributes with default values, if elements to which these
7622 * attributes apply appear in the document without specifications
7623 * of values for these attributes, or
7624 * - entities (other than amp, lt, gt, apos, quot), if references
7625 * to those entities appear in the document, or
7626 * - attributes with values subject to normalization, where the
7627 * attribute appears in the document with a value which will change
7628 * as a result of normalization, or
7629 * - element types with element content, if white space occurs directly
7630 * within any instance of those types.
7631 *
7632 * Returns 1 if standalone, 0 otherwise
7633 */
7634
7635int
7636xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7637 int standalone = -1;
7638
7639 SKIP_BLANKS;
7640 if ((RAW == 's') && (NXT(1) == 't') &&
7641 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7642 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7643 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7644 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7645 SKIP(10);
7646 SKIP_BLANKS;
7647 if (RAW != '=') {
7648 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7650 ctxt->sax->error(ctxt->userData,
7651 "XML standalone declaration : expected '='\n");
7652 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007653 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007654 return(standalone);
7655 }
7656 NEXT;
7657 SKIP_BLANKS;
7658 if (RAW == '\''){
7659 NEXT;
7660 if ((RAW == 'n') && (NXT(1) == 'o')) {
7661 standalone = 0;
7662 SKIP(2);
7663 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7664 (NXT(2) == 's')) {
7665 standalone = 1;
7666 SKIP(3);
7667 } else {
7668 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7670 ctxt->sax->error(ctxt->userData,
7671 "standalone accepts only 'yes' or 'no'\n");
7672 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007673 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007674 }
7675 if (RAW != '\'') {
7676 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7678 ctxt->sax->error(ctxt->userData, "String not closed\n");
7679 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007680 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007681 } else
7682 NEXT;
7683 } else if (RAW == '"'){
7684 NEXT;
7685 if ((RAW == 'n') && (NXT(1) == 'o')) {
7686 standalone = 0;
7687 SKIP(2);
7688 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7689 (NXT(2) == 's')) {
7690 standalone = 1;
7691 SKIP(3);
7692 } else {
7693 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7695 ctxt->sax->error(ctxt->userData,
7696 "standalone accepts only 'yes' or 'no'\n");
7697 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007698 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007699 }
7700 if (RAW != '"') {
7701 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7703 ctxt->sax->error(ctxt->userData, "String not closed\n");
7704 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007705 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007706 } else
7707 NEXT;
7708 } else {
7709 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7711 ctxt->sax->error(ctxt->userData,
7712 "Standalone value not found\n");
7713 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007714 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007715 }
7716 }
7717 return(standalone);
7718}
7719
7720/**
7721 * xmlParseXMLDecl:
7722 * @ctxt: an XML parser context
7723 *
7724 * parse an XML declaration header
7725 *
7726 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7727 */
7728
7729void
7730xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7731 xmlChar *version;
7732
7733 /*
7734 * We know that '<?xml' is here.
7735 */
7736 SKIP(5);
7737
7738 if (!IS_BLANK(RAW)) {
7739 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7741 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7742 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007743 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007744 }
7745 SKIP_BLANKS;
7746
7747 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007748 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007749 */
7750 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007751 if (version == NULL) {
7752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7753 ctxt->sax->error(ctxt->userData,
7754 "Malformed declaration expecting version\n");
7755 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007756 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007757 } else {
7758 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7759 /*
7760 * TODO: Blueberry should be detected here
7761 */
7762 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7763 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7764 version);
7765 }
7766 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007767 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007768 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007769 }
Owen Taylor3473f882001-02-23 17:55:21 +00007770
7771 /*
7772 * We may have the encoding declaration
7773 */
7774 if (!IS_BLANK(RAW)) {
7775 if ((RAW == '?') && (NXT(1) == '>')) {
7776 SKIP(2);
7777 return;
7778 }
7779 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7781 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7782 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007783 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007784 }
7785 xmlParseEncodingDecl(ctxt);
7786 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7787 /*
7788 * The XML REC instructs us to stop parsing right here
7789 */
7790 return;
7791 }
7792
7793 /*
7794 * We may have the standalone status.
7795 */
7796 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7797 if ((RAW == '?') && (NXT(1) == '>')) {
7798 SKIP(2);
7799 return;
7800 }
7801 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7803 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7804 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007805 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007806 }
7807 SKIP_BLANKS;
7808 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7809
7810 SKIP_BLANKS;
7811 if ((RAW == '?') && (NXT(1) == '>')) {
7812 SKIP(2);
7813 } else if (RAW == '>') {
7814 /* Deprecated old WD ... */
7815 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7817 ctxt->sax->error(ctxt->userData,
7818 "XML declaration must end-up with '?>'\n");
7819 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007821 NEXT;
7822 } else {
7823 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7825 ctxt->sax->error(ctxt->userData,
7826 "parsing XML declaration: '?>' expected\n");
7827 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007828 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007829 MOVETO_ENDTAG(CUR_PTR);
7830 NEXT;
7831 }
7832}
7833
7834/**
7835 * xmlParseMisc:
7836 * @ctxt: an XML parser context
7837 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007838 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007839 *
7840 * [27] Misc ::= Comment | PI | S
7841 */
7842
7843void
7844xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007845 while (((RAW == '<') && (NXT(1) == '?')) ||
7846 ((RAW == '<') && (NXT(1) == '!') &&
7847 (NXT(2) == '-') && (NXT(3) == '-')) ||
7848 IS_BLANK(CUR)) {
7849 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007850 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007851 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007852 NEXT;
7853 } else
7854 xmlParseComment(ctxt);
7855 }
7856}
7857
7858/**
7859 * xmlParseDocument:
7860 * @ctxt: an XML parser context
7861 *
7862 * parse an XML document (and build a tree if using the standard SAX
7863 * interface).
7864 *
7865 * [1] document ::= prolog element Misc*
7866 *
7867 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7868 *
7869 * Returns 0, -1 in case of error. the parser context is augmented
7870 * as a result of the parsing.
7871 */
7872
7873int
7874xmlParseDocument(xmlParserCtxtPtr ctxt) {
7875 xmlChar start[4];
7876 xmlCharEncoding enc;
7877
7878 xmlInitParser();
7879
7880 GROW;
7881
7882 /*
7883 * SAX: beginning of the document processing.
7884 */
7885 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7886 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7887
Daniel Veillard50f34372001-08-03 12:06:36 +00007888 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007889 /*
7890 * Get the 4 first bytes and decode the charset
7891 * if enc != XML_CHAR_ENCODING_NONE
7892 * plug some encoding conversion routines.
7893 */
7894 start[0] = RAW;
7895 start[1] = NXT(1);
7896 start[2] = NXT(2);
7897 start[3] = NXT(3);
7898 enc = xmlDetectCharEncoding(start, 4);
7899 if (enc != XML_CHAR_ENCODING_NONE) {
7900 xmlSwitchEncoding(ctxt, enc);
7901 }
Owen Taylor3473f882001-02-23 17:55:21 +00007902 }
7903
7904
7905 if (CUR == 0) {
7906 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7908 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7909 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007910 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007911 }
7912
7913 /*
7914 * Check for the XMLDecl in the Prolog.
7915 */
7916 GROW;
7917 if ((RAW == '<') && (NXT(1) == '?') &&
7918 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7919 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7920
7921 /*
7922 * Note that we will switch encoding on the fly.
7923 */
7924 xmlParseXMLDecl(ctxt);
7925 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7926 /*
7927 * The XML REC instructs us to stop parsing right here
7928 */
7929 return(-1);
7930 }
7931 ctxt->standalone = ctxt->input->standalone;
7932 SKIP_BLANKS;
7933 } else {
7934 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7935 }
7936 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7937 ctxt->sax->startDocument(ctxt->userData);
7938
7939 /*
7940 * The Misc part of the Prolog
7941 */
7942 GROW;
7943 xmlParseMisc(ctxt);
7944
7945 /*
7946 * Then possibly doc type declaration(s) and more Misc
7947 * (doctypedecl Misc*)?
7948 */
7949 GROW;
7950 if ((RAW == '<') && (NXT(1) == '!') &&
7951 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7952 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7953 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7954 (NXT(8) == 'E')) {
7955
7956 ctxt->inSubset = 1;
7957 xmlParseDocTypeDecl(ctxt);
7958 if (RAW == '[') {
7959 ctxt->instate = XML_PARSER_DTD;
7960 xmlParseInternalSubset(ctxt);
7961 }
7962
7963 /*
7964 * Create and update the external subset.
7965 */
7966 ctxt->inSubset = 2;
7967 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7968 (!ctxt->disableSAX))
7969 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7970 ctxt->extSubSystem, ctxt->extSubURI);
7971 ctxt->inSubset = 0;
7972
7973
7974 ctxt->instate = XML_PARSER_PROLOG;
7975 xmlParseMisc(ctxt);
7976 }
7977
7978 /*
7979 * Time to start parsing the tree itself
7980 */
7981 GROW;
7982 if (RAW != '<') {
7983 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7985 ctxt->sax->error(ctxt->userData,
7986 "Start tag expected, '<' not found\n");
7987 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007988 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007989 ctxt->instate = XML_PARSER_EOF;
7990 } else {
7991 ctxt->instate = XML_PARSER_CONTENT;
7992 xmlParseElement(ctxt);
7993 ctxt->instate = XML_PARSER_EPILOG;
7994
7995
7996 /*
7997 * The Misc part at the end
7998 */
7999 xmlParseMisc(ctxt);
8000
Daniel Veillard561b7f82002-03-20 21:55:57 +00008001 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008002 ctxt->errNo = XML_ERR_DOCUMENT_END;
8003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8004 ctxt->sax->error(ctxt->userData,
8005 "Extra content at the end of the document\n");
8006 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008007 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008008 }
8009 ctxt->instate = XML_PARSER_EOF;
8010 }
8011
8012 /*
8013 * SAX: end of the document processing.
8014 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008015 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008016 ctxt->sax->endDocument(ctxt->userData);
8017
Daniel Veillard5997aca2002-03-18 18:36:20 +00008018 /*
8019 * Remove locally kept entity definitions if the tree was not built
8020 */
8021 if ((ctxt->myDoc != NULL) &&
8022 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8023 xmlFreeDoc(ctxt->myDoc);
8024 ctxt->myDoc = NULL;
8025 }
8026
Daniel Veillardc7612992002-02-17 22:47:37 +00008027 if (! ctxt->wellFormed) {
8028 ctxt->valid = 0;
8029 return(-1);
8030 }
Owen Taylor3473f882001-02-23 17:55:21 +00008031 return(0);
8032}
8033
8034/**
8035 * xmlParseExtParsedEnt:
8036 * @ctxt: an XML parser context
8037 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008038 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008039 * An external general parsed entity is well-formed if it matches the
8040 * production labeled extParsedEnt.
8041 *
8042 * [78] extParsedEnt ::= TextDecl? content
8043 *
8044 * Returns 0, -1 in case of error. the parser context is augmented
8045 * as a result of the parsing.
8046 */
8047
8048int
8049xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8050 xmlChar start[4];
8051 xmlCharEncoding enc;
8052
8053 xmlDefaultSAXHandlerInit();
8054
8055 GROW;
8056
8057 /*
8058 * SAX: beginning of the document processing.
8059 */
8060 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8061 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8062
8063 /*
8064 * Get the 4 first bytes and decode the charset
8065 * if enc != XML_CHAR_ENCODING_NONE
8066 * plug some encoding conversion routines.
8067 */
8068 start[0] = RAW;
8069 start[1] = NXT(1);
8070 start[2] = NXT(2);
8071 start[3] = NXT(3);
8072 enc = xmlDetectCharEncoding(start, 4);
8073 if (enc != XML_CHAR_ENCODING_NONE) {
8074 xmlSwitchEncoding(ctxt, enc);
8075 }
8076
8077
8078 if (CUR == 0) {
8079 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8081 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8082 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008083 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008084 }
8085
8086 /*
8087 * Check for the XMLDecl in the Prolog.
8088 */
8089 GROW;
8090 if ((RAW == '<') && (NXT(1) == '?') &&
8091 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8092 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8093
8094 /*
8095 * Note that we will switch encoding on the fly.
8096 */
8097 xmlParseXMLDecl(ctxt);
8098 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8099 /*
8100 * The XML REC instructs us to stop parsing right here
8101 */
8102 return(-1);
8103 }
8104 SKIP_BLANKS;
8105 } else {
8106 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8107 }
8108 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8109 ctxt->sax->startDocument(ctxt->userData);
8110
8111 /*
8112 * Doing validity checking on chunk doesn't make sense
8113 */
8114 ctxt->instate = XML_PARSER_CONTENT;
8115 ctxt->validate = 0;
8116 ctxt->loadsubset = 0;
8117 ctxt->depth = 0;
8118
8119 xmlParseContent(ctxt);
8120
8121 if ((RAW == '<') && (NXT(1) == '/')) {
8122 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8123 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8124 ctxt->sax->error(ctxt->userData,
8125 "chunk is not well balanced\n");
8126 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008127 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008128 } else if (RAW != 0) {
8129 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8131 ctxt->sax->error(ctxt->userData,
8132 "extra content at the end of well balanced chunk\n");
8133 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008134 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008135 }
8136
8137 /*
8138 * SAX: end of the document processing.
8139 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008140 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008141 ctxt->sax->endDocument(ctxt->userData);
8142
8143 if (! ctxt->wellFormed) return(-1);
8144 return(0);
8145}
8146
8147/************************************************************************
8148 * *
8149 * Progressive parsing interfaces *
8150 * *
8151 ************************************************************************/
8152
8153/**
8154 * xmlParseLookupSequence:
8155 * @ctxt: an XML parser context
8156 * @first: the first char to lookup
8157 * @next: the next char to lookup or zero
8158 * @third: the next char to lookup or zero
8159 *
8160 * Try to find if a sequence (first, next, third) or just (first next) or
8161 * (first) is available in the input stream.
8162 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8163 * to avoid rescanning sequences of bytes, it DOES change the state of the
8164 * parser, do not use liberally.
8165 *
8166 * Returns the index to the current parsing point if the full sequence
8167 * is available, -1 otherwise.
8168 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008169static int
Owen Taylor3473f882001-02-23 17:55:21 +00008170xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8171 xmlChar next, xmlChar third) {
8172 int base, len;
8173 xmlParserInputPtr in;
8174 const xmlChar *buf;
8175
8176 in = ctxt->input;
8177 if (in == NULL) return(-1);
8178 base = in->cur - in->base;
8179 if (base < 0) return(-1);
8180 if (ctxt->checkIndex > base)
8181 base = ctxt->checkIndex;
8182 if (in->buf == NULL) {
8183 buf = in->base;
8184 len = in->length;
8185 } else {
8186 buf = in->buf->buffer->content;
8187 len = in->buf->buffer->use;
8188 }
8189 /* take into account the sequence length */
8190 if (third) len -= 2;
8191 else if (next) len --;
8192 for (;base < len;base++) {
8193 if (buf[base] == first) {
8194 if (third != 0) {
8195 if ((buf[base + 1] != next) ||
8196 (buf[base + 2] != third)) continue;
8197 } else if (next != 0) {
8198 if (buf[base + 1] != next) continue;
8199 }
8200 ctxt->checkIndex = 0;
8201#ifdef DEBUG_PUSH
8202 if (next == 0)
8203 xmlGenericError(xmlGenericErrorContext,
8204 "PP: lookup '%c' found at %d\n",
8205 first, base);
8206 else if (third == 0)
8207 xmlGenericError(xmlGenericErrorContext,
8208 "PP: lookup '%c%c' found at %d\n",
8209 first, next, base);
8210 else
8211 xmlGenericError(xmlGenericErrorContext,
8212 "PP: lookup '%c%c%c' found at %d\n",
8213 first, next, third, base);
8214#endif
8215 return(base - (in->cur - in->base));
8216 }
8217 }
8218 ctxt->checkIndex = base;
8219#ifdef DEBUG_PUSH
8220 if (next == 0)
8221 xmlGenericError(xmlGenericErrorContext,
8222 "PP: lookup '%c' failed\n", first);
8223 else if (third == 0)
8224 xmlGenericError(xmlGenericErrorContext,
8225 "PP: lookup '%c%c' failed\n", first, next);
8226 else
8227 xmlGenericError(xmlGenericErrorContext,
8228 "PP: lookup '%c%c%c' failed\n", first, next, third);
8229#endif
8230 return(-1);
8231}
8232
8233/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008234 * xmlParseGetLasts:
8235 * @ctxt: an XML parser context
8236 * @lastlt: pointer to store the last '<' from the input
8237 * @lastgt: pointer to store the last '>' from the input
8238 *
8239 * Lookup the last < and > in the current chunk
8240 */
8241static void
8242xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8243 const xmlChar **lastgt) {
8244 const xmlChar *tmp;
8245
8246 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8247 xmlGenericError(xmlGenericErrorContext,
8248 "Internal error: xmlParseGetLasts\n");
8249 return;
8250 }
8251 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
8252 tmp = ctxt->input->end;
8253 tmp--;
8254 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8255 (*tmp != '>')) tmp--;
8256 if (tmp < ctxt->input->base) {
8257 *lastlt = NULL;
8258 *lastgt = NULL;
8259 } else if (*tmp == '<') {
8260 *lastlt = tmp;
8261 tmp--;
8262 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8263 if (tmp < ctxt->input->base)
8264 *lastgt = NULL;
8265 else
8266 *lastgt = tmp;
8267 } else {
8268 *lastgt = tmp;
8269 tmp--;
8270 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8271 if (tmp < ctxt->input->base)
8272 *lastlt = NULL;
8273 else
8274 *lastlt = tmp;
8275 }
8276
8277 } else {
8278 *lastlt = NULL;
8279 *lastgt = NULL;
8280 }
8281}
8282/**
Owen Taylor3473f882001-02-23 17:55:21 +00008283 * xmlParseTryOrFinish:
8284 * @ctxt: an XML parser context
8285 * @terminate: last chunk indicator
8286 *
8287 * Try to progress on parsing
8288 *
8289 * Returns zero if no parsing was possible
8290 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008291static int
Owen Taylor3473f882001-02-23 17:55:21 +00008292xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8293 int ret = 0;
8294 int avail;
8295 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008296 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008297
8298#ifdef DEBUG_PUSH
8299 switch (ctxt->instate) {
8300 case XML_PARSER_EOF:
8301 xmlGenericError(xmlGenericErrorContext,
8302 "PP: try EOF\n"); break;
8303 case XML_PARSER_START:
8304 xmlGenericError(xmlGenericErrorContext,
8305 "PP: try START\n"); break;
8306 case XML_PARSER_MISC:
8307 xmlGenericError(xmlGenericErrorContext,
8308 "PP: try MISC\n");break;
8309 case XML_PARSER_COMMENT:
8310 xmlGenericError(xmlGenericErrorContext,
8311 "PP: try COMMENT\n");break;
8312 case XML_PARSER_PROLOG:
8313 xmlGenericError(xmlGenericErrorContext,
8314 "PP: try PROLOG\n");break;
8315 case XML_PARSER_START_TAG:
8316 xmlGenericError(xmlGenericErrorContext,
8317 "PP: try START_TAG\n");break;
8318 case XML_PARSER_CONTENT:
8319 xmlGenericError(xmlGenericErrorContext,
8320 "PP: try CONTENT\n");break;
8321 case XML_PARSER_CDATA_SECTION:
8322 xmlGenericError(xmlGenericErrorContext,
8323 "PP: try CDATA_SECTION\n");break;
8324 case XML_PARSER_END_TAG:
8325 xmlGenericError(xmlGenericErrorContext,
8326 "PP: try END_TAG\n");break;
8327 case XML_PARSER_ENTITY_DECL:
8328 xmlGenericError(xmlGenericErrorContext,
8329 "PP: try ENTITY_DECL\n");break;
8330 case XML_PARSER_ENTITY_VALUE:
8331 xmlGenericError(xmlGenericErrorContext,
8332 "PP: try ENTITY_VALUE\n");break;
8333 case XML_PARSER_ATTRIBUTE_VALUE:
8334 xmlGenericError(xmlGenericErrorContext,
8335 "PP: try ATTRIBUTE_VALUE\n");break;
8336 case XML_PARSER_DTD:
8337 xmlGenericError(xmlGenericErrorContext,
8338 "PP: try DTD\n");break;
8339 case XML_PARSER_EPILOG:
8340 xmlGenericError(xmlGenericErrorContext,
8341 "PP: try EPILOG\n");break;
8342 case XML_PARSER_PI:
8343 xmlGenericError(xmlGenericErrorContext,
8344 "PP: try PI\n");break;
8345 case XML_PARSER_IGNORE:
8346 xmlGenericError(xmlGenericErrorContext,
8347 "PP: try IGNORE\n");break;
8348 }
8349#endif
8350
Daniel Veillarda880b122003-04-21 21:36:41 +00008351 if (ctxt->input->cur - ctxt->input->base > 4096) {
8352 xmlSHRINK(ctxt);
8353 ctxt->checkIndex = 0;
8354 }
8355 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008356
Daniel Veillarda880b122003-04-21 21:36:41 +00008357 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008358 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8359 return(0);
8360
8361
Owen Taylor3473f882001-02-23 17:55:21 +00008362 /*
8363 * Pop-up of finished entities.
8364 */
8365 while ((RAW == 0) && (ctxt->inputNr > 1))
8366 xmlPopInput(ctxt);
8367
8368 if (ctxt->input ==NULL) break;
8369 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008370 avail = ctxt->input->length -
8371 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008372 else {
8373 /*
8374 * If we are operating on converted input, try to flush
8375 * remainng chars to avoid them stalling in the non-converted
8376 * buffer.
8377 */
8378 if ((ctxt->input->buf->raw != NULL) &&
8379 (ctxt->input->buf->raw->use > 0)) {
8380 int base = ctxt->input->base -
8381 ctxt->input->buf->buffer->content;
8382 int current = ctxt->input->cur - ctxt->input->base;
8383
8384 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8385 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8386 ctxt->input->cur = ctxt->input->base + current;
8387 ctxt->input->end =
8388 &ctxt->input->buf->buffer->content[
8389 ctxt->input->buf->buffer->use];
8390 }
8391 avail = ctxt->input->buf->buffer->use -
8392 (ctxt->input->cur - ctxt->input->base);
8393 }
Owen Taylor3473f882001-02-23 17:55:21 +00008394 if (avail < 1)
8395 goto done;
8396 switch (ctxt->instate) {
8397 case XML_PARSER_EOF:
8398 /*
8399 * Document parsing is done !
8400 */
8401 goto done;
8402 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008403 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8404 xmlChar start[4];
8405 xmlCharEncoding enc;
8406
8407 /*
8408 * Very first chars read from the document flow.
8409 */
8410 if (avail < 4)
8411 goto done;
8412
8413 /*
8414 * Get the 4 first bytes and decode the charset
8415 * if enc != XML_CHAR_ENCODING_NONE
8416 * plug some encoding conversion routines.
8417 */
8418 start[0] = RAW;
8419 start[1] = NXT(1);
8420 start[2] = NXT(2);
8421 start[3] = NXT(3);
8422 enc = xmlDetectCharEncoding(start, 4);
8423 if (enc != XML_CHAR_ENCODING_NONE) {
8424 xmlSwitchEncoding(ctxt, enc);
8425 }
8426 break;
8427 }
Owen Taylor3473f882001-02-23 17:55:21 +00008428
8429 cur = ctxt->input->cur[0];
8430 next = ctxt->input->cur[1];
8431 if (cur == 0) {
8432 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8433 ctxt->sax->setDocumentLocator(ctxt->userData,
8434 &xmlDefaultSAXLocator);
8435 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8437 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8438 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008439 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008440 ctxt->instate = XML_PARSER_EOF;
8441#ifdef DEBUG_PUSH
8442 xmlGenericError(xmlGenericErrorContext,
8443 "PP: entering EOF\n");
8444#endif
8445 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8446 ctxt->sax->endDocument(ctxt->userData);
8447 goto done;
8448 }
8449 if ((cur == '<') && (next == '?')) {
8450 /* PI or XML decl */
8451 if (avail < 5) return(ret);
8452 if ((!terminate) &&
8453 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8454 return(ret);
8455 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8456 ctxt->sax->setDocumentLocator(ctxt->userData,
8457 &xmlDefaultSAXLocator);
8458 if ((ctxt->input->cur[2] == 'x') &&
8459 (ctxt->input->cur[3] == 'm') &&
8460 (ctxt->input->cur[4] == 'l') &&
8461 (IS_BLANK(ctxt->input->cur[5]))) {
8462 ret += 5;
8463#ifdef DEBUG_PUSH
8464 xmlGenericError(xmlGenericErrorContext,
8465 "PP: Parsing XML Decl\n");
8466#endif
8467 xmlParseXMLDecl(ctxt);
8468 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8469 /*
8470 * The XML REC instructs us to stop parsing right
8471 * here
8472 */
8473 ctxt->instate = XML_PARSER_EOF;
8474 return(0);
8475 }
8476 ctxt->standalone = ctxt->input->standalone;
8477 if ((ctxt->encoding == NULL) &&
8478 (ctxt->input->encoding != NULL))
8479 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8480 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8481 (!ctxt->disableSAX))
8482 ctxt->sax->startDocument(ctxt->userData);
8483 ctxt->instate = XML_PARSER_MISC;
8484#ifdef DEBUG_PUSH
8485 xmlGenericError(xmlGenericErrorContext,
8486 "PP: entering MISC\n");
8487#endif
8488 } else {
8489 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8490 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8491 (!ctxt->disableSAX))
8492 ctxt->sax->startDocument(ctxt->userData);
8493 ctxt->instate = XML_PARSER_MISC;
8494#ifdef DEBUG_PUSH
8495 xmlGenericError(xmlGenericErrorContext,
8496 "PP: entering MISC\n");
8497#endif
8498 }
8499 } else {
8500 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8501 ctxt->sax->setDocumentLocator(ctxt->userData,
8502 &xmlDefaultSAXLocator);
8503 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8504 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8505 (!ctxt->disableSAX))
8506 ctxt->sax->startDocument(ctxt->userData);
8507 ctxt->instate = XML_PARSER_MISC;
8508#ifdef DEBUG_PUSH
8509 xmlGenericError(xmlGenericErrorContext,
8510 "PP: entering MISC\n");
8511#endif
8512 }
8513 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00008514 case XML_PARSER_START_TAG: {
8515 xmlChar *name, *oldname;
8516
8517 if ((avail < 2) && (ctxt->inputNr == 1))
8518 goto done;
8519 cur = ctxt->input->cur[0];
8520 if (cur != '<') {
8521 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8523 ctxt->sax->error(ctxt->userData,
8524 "Start tag expect, '<' not found\n");
8525 ctxt->wellFormed = 0;
8526 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8527 ctxt->instate = XML_PARSER_EOF;
8528#ifdef DEBUG_PUSH
8529 xmlGenericError(xmlGenericErrorContext,
8530 "PP: entering EOF\n");
8531#endif
8532 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8533 ctxt->sax->endDocument(ctxt->userData);
8534 goto done;
8535 }
8536 if (!terminate) {
8537 if (ctxt->progressive) {
8538 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8539 goto done;
8540 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8541 goto done;
8542 }
8543 }
8544 if (ctxt->spaceNr == 0)
8545 spacePush(ctxt, -1);
8546 else
8547 spacePush(ctxt, *ctxt->space);
8548 name = xmlParseStartTag(ctxt);
8549 if (name == NULL) {
8550 spacePop(ctxt);
8551 ctxt->instate = XML_PARSER_EOF;
8552#ifdef DEBUG_PUSH
8553 xmlGenericError(xmlGenericErrorContext,
8554 "PP: entering EOF\n");
8555#endif
8556 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8557 ctxt->sax->endDocument(ctxt->userData);
8558 goto done;
8559 }
8560 namePush(ctxt, name);
8561
8562 /*
8563 * [ VC: Root Element Type ]
8564 * The Name in the document type declaration must match
8565 * the element type of the root element.
8566 */
8567 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8568 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8569 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8570
8571 /*
8572 * Check for an Empty Element.
8573 */
8574 if ((RAW == '/') && (NXT(1) == '>')) {
8575 SKIP(2);
8576 if ((ctxt->sax != NULL) &&
8577 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8578 ctxt->sax->endElement(ctxt->userData, name);
8579 oldname = namePop(ctxt);
8580 spacePop(ctxt);
8581 if (oldname != NULL) {
8582#ifdef DEBUG_STACK
8583 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8584#endif
8585 xmlFree(oldname);
8586 }
8587 if (ctxt->name == NULL) {
8588 ctxt->instate = XML_PARSER_EPILOG;
8589#ifdef DEBUG_PUSH
8590 xmlGenericError(xmlGenericErrorContext,
8591 "PP: entering EPILOG\n");
8592#endif
8593 } else {
8594 ctxt->instate = XML_PARSER_CONTENT;
8595#ifdef DEBUG_PUSH
8596 xmlGenericError(xmlGenericErrorContext,
8597 "PP: entering CONTENT\n");
8598#endif
8599 }
8600 break;
8601 }
8602 if (RAW == '>') {
8603 NEXT;
8604 } else {
8605 ctxt->errNo = XML_ERR_GT_REQUIRED;
8606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8607 ctxt->sax->error(ctxt->userData,
8608 "Couldn't find end of Start Tag %s\n",
8609 name);
8610 ctxt->wellFormed = 0;
8611 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8612
8613 /*
8614 * end of parsing of this node.
8615 */
8616 nodePop(ctxt);
8617 oldname = namePop(ctxt);
8618 spacePop(ctxt);
8619 if (oldname != NULL) {
8620#ifdef DEBUG_STACK
8621 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8622#endif
8623 xmlFree(oldname);
8624 }
8625 }
8626 ctxt->instate = XML_PARSER_CONTENT;
8627#ifdef DEBUG_PUSH
8628 xmlGenericError(xmlGenericErrorContext,
8629 "PP: entering CONTENT\n");
8630#endif
8631 break;
8632 }
8633 case XML_PARSER_CONTENT: {
8634 const xmlChar *test;
8635 unsigned int cons;
8636 if ((avail < 2) && (ctxt->inputNr == 1))
8637 goto done;
8638 cur = ctxt->input->cur[0];
8639 next = ctxt->input->cur[1];
8640
8641 test = CUR_PTR;
8642 cons = ctxt->input->consumed;
8643 if ((cur == '<') && (next == '/')) {
8644 ctxt->instate = XML_PARSER_END_TAG;
8645#ifdef DEBUG_PUSH
8646 xmlGenericError(xmlGenericErrorContext,
8647 "PP: entering END_TAG\n");
8648#endif
8649 break;
8650 } else if ((cur == '<') && (next == '?')) {
8651 if ((!terminate) &&
8652 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8653 goto done;
8654#ifdef DEBUG_PUSH
8655 xmlGenericError(xmlGenericErrorContext,
8656 "PP: Parsing PI\n");
8657#endif
8658 xmlParsePI(ctxt);
8659 } else if ((cur == '<') && (next != '!')) {
8660 ctxt->instate = XML_PARSER_START_TAG;
8661#ifdef DEBUG_PUSH
8662 xmlGenericError(xmlGenericErrorContext,
8663 "PP: entering START_TAG\n");
8664#endif
8665 break;
8666 } else if ((cur == '<') && (next == '!') &&
8667 (ctxt->input->cur[2] == '-') &&
8668 (ctxt->input->cur[3] == '-')) {
8669 if ((!terminate) &&
8670 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8671 goto done;
8672#ifdef DEBUG_PUSH
8673 xmlGenericError(xmlGenericErrorContext,
8674 "PP: Parsing Comment\n");
8675#endif
8676 xmlParseComment(ctxt);
8677 ctxt->instate = XML_PARSER_CONTENT;
8678 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8679 (ctxt->input->cur[2] == '[') &&
8680 (ctxt->input->cur[3] == 'C') &&
8681 (ctxt->input->cur[4] == 'D') &&
8682 (ctxt->input->cur[5] == 'A') &&
8683 (ctxt->input->cur[6] == 'T') &&
8684 (ctxt->input->cur[7] == 'A') &&
8685 (ctxt->input->cur[8] == '[')) {
8686 SKIP(9);
8687 ctxt->instate = XML_PARSER_CDATA_SECTION;
8688#ifdef DEBUG_PUSH
8689 xmlGenericError(xmlGenericErrorContext,
8690 "PP: entering CDATA_SECTION\n");
8691#endif
8692 break;
8693 } else if ((cur == '<') && (next == '!') &&
8694 (avail < 9)) {
8695 goto done;
8696 } else if (cur == '&') {
8697 if ((!terminate) &&
8698 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8699 goto done;
8700#ifdef DEBUG_PUSH
8701 xmlGenericError(xmlGenericErrorContext,
8702 "PP: Parsing Reference\n");
8703#endif
8704 xmlParseReference(ctxt);
8705 } else {
8706 /* TODO Avoid the extra copy, handle directly !!! */
8707 /*
8708 * Goal of the following test is:
8709 * - minimize calls to the SAX 'character' callback
8710 * when they are mergeable
8711 * - handle an problem for isBlank when we only parse
8712 * a sequence of blank chars and the next one is
8713 * not available to check against '<' presence.
8714 * - tries to homogenize the differences in SAX
8715 * callbacks between the push and pull versions
8716 * of the parser.
8717 */
8718 if ((ctxt->inputNr == 1) &&
8719 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8720 if (!terminate) {
8721 if (ctxt->progressive) {
8722 if ((lastlt == NULL) ||
8723 (ctxt->input->cur > lastlt))
8724 goto done;
8725 } else if (xmlParseLookupSequence(ctxt,
8726 '<', 0, 0) < 0) {
8727 goto done;
8728 }
8729 }
8730 }
8731 ctxt->checkIndex = 0;
8732#ifdef DEBUG_PUSH
8733 xmlGenericError(xmlGenericErrorContext,
8734 "PP: Parsing char data\n");
8735#endif
8736 xmlParseCharData(ctxt, 0);
8737 }
8738 /*
8739 * Pop-up of finished entities.
8740 */
8741 while ((RAW == 0) && (ctxt->inputNr > 1))
8742 xmlPopInput(ctxt);
8743 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8744 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8746 ctxt->sax->error(ctxt->userData,
8747 "detected an error in element content\n");
8748 ctxt->wellFormed = 0;
8749 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8750 ctxt->instate = XML_PARSER_EOF;
8751 break;
8752 }
8753 break;
8754 }
8755 case XML_PARSER_END_TAG:
8756 if (avail < 2)
8757 goto done;
8758 if (!terminate) {
8759 if (ctxt->progressive) {
8760 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8761 goto done;
8762 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8763 goto done;
8764 }
8765 }
8766 xmlParseEndTag(ctxt);
8767 if (ctxt->name == NULL) {
8768 ctxt->instate = XML_PARSER_EPILOG;
8769#ifdef DEBUG_PUSH
8770 xmlGenericError(xmlGenericErrorContext,
8771 "PP: entering EPILOG\n");
8772#endif
8773 } else {
8774 ctxt->instate = XML_PARSER_CONTENT;
8775#ifdef DEBUG_PUSH
8776 xmlGenericError(xmlGenericErrorContext,
8777 "PP: entering CONTENT\n");
8778#endif
8779 }
8780 break;
8781 case XML_PARSER_CDATA_SECTION: {
8782 /*
8783 * The Push mode need to have the SAX callback for
8784 * cdataBlock merge back contiguous callbacks.
8785 */
8786 int base;
8787
8788 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8789 if (base < 0) {
8790 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8791 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8792 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00008793 ctxt->sax->cdataBlock(ctxt->userData,
8794 ctxt->input->cur,
8795 XML_PARSER_BIG_BUFFER_SIZE);
8796 else if (ctxt->sax->characters != NULL)
8797 ctxt->sax->characters(ctxt->userData,
8798 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00008799 XML_PARSER_BIG_BUFFER_SIZE);
8800 }
8801 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8802 ctxt->checkIndex = 0;
8803 }
8804 goto done;
8805 } else {
8806 if ((ctxt->sax != NULL) && (base > 0) &&
8807 (!ctxt->disableSAX)) {
8808 if (ctxt->sax->cdataBlock != NULL)
8809 ctxt->sax->cdataBlock(ctxt->userData,
8810 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00008811 else if (ctxt->sax->characters != NULL)
8812 ctxt->sax->characters(ctxt->userData,
8813 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00008814 }
8815 SKIP(base + 3);
8816 ctxt->checkIndex = 0;
8817 ctxt->instate = XML_PARSER_CONTENT;
8818#ifdef DEBUG_PUSH
8819 xmlGenericError(xmlGenericErrorContext,
8820 "PP: entering CONTENT\n");
8821#endif
8822 }
8823 break;
8824 }
Owen Taylor3473f882001-02-23 17:55:21 +00008825 case XML_PARSER_MISC:
8826 SKIP_BLANKS;
8827 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008828 avail = ctxt->input->length -
8829 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00008830 else
Daniel Veillarda880b122003-04-21 21:36:41 +00008831 avail = ctxt->input->buf->buffer->use -
8832 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00008833 if (avail < 2)
8834 goto done;
8835 cur = ctxt->input->cur[0];
8836 next = ctxt->input->cur[1];
8837 if ((cur == '<') && (next == '?')) {
8838 if ((!terminate) &&
8839 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8840 goto done;
8841#ifdef DEBUG_PUSH
8842 xmlGenericError(xmlGenericErrorContext,
8843 "PP: Parsing PI\n");
8844#endif
8845 xmlParsePI(ctxt);
8846 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00008847 (ctxt->input->cur[2] == '-') &&
8848 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008849 if ((!terminate) &&
8850 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8851 goto done;
8852#ifdef DEBUG_PUSH
8853 xmlGenericError(xmlGenericErrorContext,
8854 "PP: Parsing Comment\n");
8855#endif
8856 xmlParseComment(ctxt);
8857 ctxt->instate = XML_PARSER_MISC;
8858 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00008859 (ctxt->input->cur[2] == 'D') &&
8860 (ctxt->input->cur[3] == 'O') &&
8861 (ctxt->input->cur[4] == 'C') &&
8862 (ctxt->input->cur[5] == 'T') &&
8863 (ctxt->input->cur[6] == 'Y') &&
8864 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008865 (ctxt->input->cur[8] == 'E')) {
8866 if ((!terminate) &&
8867 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8868 goto done;
8869#ifdef DEBUG_PUSH
8870 xmlGenericError(xmlGenericErrorContext,
8871 "PP: Parsing internal subset\n");
8872#endif
8873 ctxt->inSubset = 1;
8874 xmlParseDocTypeDecl(ctxt);
8875 if (RAW == '[') {
8876 ctxt->instate = XML_PARSER_DTD;
8877#ifdef DEBUG_PUSH
8878 xmlGenericError(xmlGenericErrorContext,
8879 "PP: entering DTD\n");
8880#endif
8881 } else {
8882 /*
8883 * Create and update the external subset.
8884 */
8885 ctxt->inSubset = 2;
8886 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8887 (ctxt->sax->externalSubset != NULL))
8888 ctxt->sax->externalSubset(ctxt->userData,
8889 ctxt->intSubName, ctxt->extSubSystem,
8890 ctxt->extSubURI);
8891 ctxt->inSubset = 0;
8892 ctxt->instate = XML_PARSER_PROLOG;
8893#ifdef DEBUG_PUSH
8894 xmlGenericError(xmlGenericErrorContext,
8895 "PP: entering PROLOG\n");
8896#endif
8897 }
8898 } else if ((cur == '<') && (next == '!') &&
8899 (avail < 9)) {
8900 goto done;
8901 } else {
8902 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00008903 ctxt->progressive = 1;
8904 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00008905#ifdef DEBUG_PUSH
8906 xmlGenericError(xmlGenericErrorContext,
8907 "PP: entering START_TAG\n");
8908#endif
8909 }
8910 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008911 case XML_PARSER_PROLOG:
8912 SKIP_BLANKS;
8913 if (ctxt->input->buf == NULL)
8914 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8915 else
8916 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8917 if (avail < 2)
8918 goto done;
8919 cur = ctxt->input->cur[0];
8920 next = ctxt->input->cur[1];
8921 if ((cur == '<') && (next == '?')) {
8922 if ((!terminate) &&
8923 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8924 goto done;
8925#ifdef DEBUG_PUSH
8926 xmlGenericError(xmlGenericErrorContext,
8927 "PP: Parsing PI\n");
8928#endif
8929 xmlParsePI(ctxt);
8930 } else if ((cur == '<') && (next == '!') &&
8931 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8932 if ((!terminate) &&
8933 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8934 goto done;
8935#ifdef DEBUG_PUSH
8936 xmlGenericError(xmlGenericErrorContext,
8937 "PP: Parsing Comment\n");
8938#endif
8939 xmlParseComment(ctxt);
8940 ctxt->instate = XML_PARSER_PROLOG;
8941 } else if ((cur == '<') && (next == '!') &&
8942 (avail < 4)) {
8943 goto done;
8944 } else {
8945 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00008946 ctxt->progressive = 1;
8947 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00008948#ifdef DEBUG_PUSH
8949 xmlGenericError(xmlGenericErrorContext,
8950 "PP: entering START_TAG\n");
8951#endif
8952 }
8953 break;
8954 case XML_PARSER_EPILOG:
8955 SKIP_BLANKS;
8956 if (ctxt->input->buf == NULL)
8957 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8958 else
8959 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8960 if (avail < 2)
8961 goto done;
8962 cur = ctxt->input->cur[0];
8963 next = ctxt->input->cur[1];
8964 if ((cur == '<') && (next == '?')) {
8965 if ((!terminate) &&
8966 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8967 goto done;
8968#ifdef DEBUG_PUSH
8969 xmlGenericError(xmlGenericErrorContext,
8970 "PP: Parsing PI\n");
8971#endif
8972 xmlParsePI(ctxt);
8973 ctxt->instate = XML_PARSER_EPILOG;
8974 } else if ((cur == '<') && (next == '!') &&
8975 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8976 if ((!terminate) &&
8977 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8978 goto done;
8979#ifdef DEBUG_PUSH
8980 xmlGenericError(xmlGenericErrorContext,
8981 "PP: Parsing Comment\n");
8982#endif
8983 xmlParseComment(ctxt);
8984 ctxt->instate = XML_PARSER_EPILOG;
8985 } else if ((cur == '<') && (next == '!') &&
8986 (avail < 4)) {
8987 goto done;
8988 } else {
8989 ctxt->errNo = XML_ERR_DOCUMENT_END;
8990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8991 ctxt->sax->error(ctxt->userData,
8992 "Extra content at the end of the document\n");
8993 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008994 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008995 ctxt->instate = XML_PARSER_EOF;
8996#ifdef DEBUG_PUSH
8997 xmlGenericError(xmlGenericErrorContext,
8998 "PP: entering EOF\n");
8999#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009000 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009001 ctxt->sax->endDocument(ctxt->userData);
9002 goto done;
9003 }
9004 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009005 case XML_PARSER_DTD: {
9006 /*
9007 * Sorry but progressive parsing of the internal subset
9008 * is not expected to be supported. We first check that
9009 * the full content of the internal subset is available and
9010 * the parsing is launched only at that point.
9011 * Internal subset ends up with "']' S? '>'" in an unescaped
9012 * section and not in a ']]>' sequence which are conditional
9013 * sections (whoever argued to keep that crap in XML deserve
9014 * a place in hell !).
9015 */
9016 int base, i;
9017 xmlChar *buf;
9018 xmlChar quote = 0;
9019
9020 base = ctxt->input->cur - ctxt->input->base;
9021 if (base < 0) return(0);
9022 if (ctxt->checkIndex > base)
9023 base = ctxt->checkIndex;
9024 buf = ctxt->input->buf->buffer->content;
9025 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9026 base++) {
9027 if (quote != 0) {
9028 if (buf[base] == quote)
9029 quote = 0;
9030 continue;
9031 }
9032 if (buf[base] == '"') {
9033 quote = '"';
9034 continue;
9035 }
9036 if (buf[base] == '\'') {
9037 quote = '\'';
9038 continue;
9039 }
9040 if (buf[base] == ']') {
9041 if ((unsigned int) base +1 >=
9042 ctxt->input->buf->buffer->use)
9043 break;
9044 if (buf[base + 1] == ']') {
9045 /* conditional crap, skip both ']' ! */
9046 base++;
9047 continue;
9048 }
9049 for (i = 0;
9050 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9051 i++) {
9052 if (buf[base + i] == '>')
9053 goto found_end_int_subset;
9054 }
9055 break;
9056 }
9057 }
9058 /*
9059 * We didn't found the end of the Internal subset
9060 */
9061 if (quote == 0)
9062 ctxt->checkIndex = base;
9063#ifdef DEBUG_PUSH
9064 if (next == 0)
9065 xmlGenericError(xmlGenericErrorContext,
9066 "PP: lookup of int subset end filed\n");
9067#endif
9068 goto done;
9069
9070found_end_int_subset:
9071 xmlParseInternalSubset(ctxt);
9072 ctxt->inSubset = 2;
9073 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9074 (ctxt->sax->externalSubset != NULL))
9075 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9076 ctxt->extSubSystem, ctxt->extSubURI);
9077 ctxt->inSubset = 0;
9078 ctxt->instate = XML_PARSER_PROLOG;
9079 ctxt->checkIndex = 0;
9080#ifdef DEBUG_PUSH
9081 xmlGenericError(xmlGenericErrorContext,
9082 "PP: entering PROLOG\n");
9083#endif
9084 break;
9085 }
9086 case XML_PARSER_COMMENT:
9087 xmlGenericError(xmlGenericErrorContext,
9088 "PP: internal error, state == COMMENT\n");
9089 ctxt->instate = XML_PARSER_CONTENT;
9090#ifdef DEBUG_PUSH
9091 xmlGenericError(xmlGenericErrorContext,
9092 "PP: entering CONTENT\n");
9093#endif
9094 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009095 case XML_PARSER_IGNORE:
9096 xmlGenericError(xmlGenericErrorContext,
9097 "PP: internal error, state == IGNORE");
9098 ctxt->instate = XML_PARSER_DTD;
9099#ifdef DEBUG_PUSH
9100 xmlGenericError(xmlGenericErrorContext,
9101 "PP: entering DTD\n");
9102#endif
9103 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009104 case XML_PARSER_PI:
9105 xmlGenericError(xmlGenericErrorContext,
9106 "PP: internal error, state == PI\n");
9107 ctxt->instate = XML_PARSER_CONTENT;
9108#ifdef DEBUG_PUSH
9109 xmlGenericError(xmlGenericErrorContext,
9110 "PP: entering CONTENT\n");
9111#endif
9112 break;
9113 case XML_PARSER_ENTITY_DECL:
9114 xmlGenericError(xmlGenericErrorContext,
9115 "PP: internal error, state == ENTITY_DECL\n");
9116 ctxt->instate = XML_PARSER_DTD;
9117#ifdef DEBUG_PUSH
9118 xmlGenericError(xmlGenericErrorContext,
9119 "PP: entering DTD\n");
9120#endif
9121 break;
9122 case XML_PARSER_ENTITY_VALUE:
9123 xmlGenericError(xmlGenericErrorContext,
9124 "PP: internal error, state == ENTITY_VALUE\n");
9125 ctxt->instate = XML_PARSER_CONTENT;
9126#ifdef DEBUG_PUSH
9127 xmlGenericError(xmlGenericErrorContext,
9128 "PP: entering DTD\n");
9129#endif
9130 break;
9131 case XML_PARSER_ATTRIBUTE_VALUE:
9132 xmlGenericError(xmlGenericErrorContext,
9133 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9134 ctxt->instate = XML_PARSER_START_TAG;
9135#ifdef DEBUG_PUSH
9136 xmlGenericError(xmlGenericErrorContext,
9137 "PP: entering START_TAG\n");
9138#endif
9139 break;
9140 case XML_PARSER_SYSTEM_LITERAL:
9141 xmlGenericError(xmlGenericErrorContext,
9142 "PP: internal error, state == SYSTEM_LITERAL\n");
9143 ctxt->instate = XML_PARSER_START_TAG;
9144#ifdef DEBUG_PUSH
9145 xmlGenericError(xmlGenericErrorContext,
9146 "PP: entering START_TAG\n");
9147#endif
9148 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009149 case XML_PARSER_PUBLIC_LITERAL:
9150 xmlGenericError(xmlGenericErrorContext,
9151 "PP: internal error, state == PUBLIC_LITERAL\n");
9152 ctxt->instate = XML_PARSER_START_TAG;
9153#ifdef DEBUG_PUSH
9154 xmlGenericError(xmlGenericErrorContext,
9155 "PP: entering START_TAG\n");
9156#endif
9157 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009158 }
9159 }
9160done:
9161#ifdef DEBUG_PUSH
9162 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9163#endif
9164 return(ret);
9165}
9166
9167/**
Owen Taylor3473f882001-02-23 17:55:21 +00009168 * xmlParseChunk:
9169 * @ctxt: an XML parser context
9170 * @chunk: an char array
9171 * @size: the size in byte of the chunk
9172 * @terminate: last chunk indicator
9173 *
9174 * Parse a Chunk of memory
9175 *
9176 * Returns zero if no error, the xmlParserErrors otherwise.
9177 */
9178int
9179xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9180 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009181 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9182 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009183 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9184 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9185 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9186 int cur = ctxt->input->cur - ctxt->input->base;
9187
9188 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9189 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9190 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009191 ctxt->input->end =
9192 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009193#ifdef DEBUG_PUSH
9194 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9195#endif
9196
Daniel Veillarda880b122003-04-21 21:36:41 +00009197#if 0
Owen Taylor3473f882001-02-23 17:55:21 +00009198 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9199 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda880b122003-04-21 21:36:41 +00009200#endif
Owen Taylor3473f882001-02-23 17:55:21 +00009201 } else if (ctxt->instate != XML_PARSER_EOF) {
9202 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9203 xmlParserInputBufferPtr in = ctxt->input->buf;
9204 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9205 (in->raw != NULL)) {
9206 int nbchars;
9207
9208 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9209 if (nbchars < 0) {
9210 xmlGenericError(xmlGenericErrorContext,
9211 "xmlParseChunk: encoder error\n");
9212 return(XML_ERR_INVALID_ENCODING);
9213 }
9214 }
9215 }
9216 }
9217 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009218 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9219 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009220 if (terminate) {
9221 /*
9222 * Check for termination
9223 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009224 int avail = 0;
9225 if (ctxt->input->buf == NULL)
9226 avail = ctxt->input->length -
9227 (ctxt->input->cur - ctxt->input->base);
9228 else
9229 avail = ctxt->input->buf->buffer->use -
9230 (ctxt->input->cur - ctxt->input->base);
9231
Owen Taylor3473f882001-02-23 17:55:21 +00009232 if ((ctxt->instate != XML_PARSER_EOF) &&
9233 (ctxt->instate != XML_PARSER_EPILOG)) {
9234 ctxt->errNo = XML_ERR_DOCUMENT_END;
9235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9236 ctxt->sax->error(ctxt->userData,
9237 "Extra content at the end of the document\n");
9238 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009239 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009240 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009241 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9242 ctxt->errNo = XML_ERR_DOCUMENT_END;
9243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9244 ctxt->sax->error(ctxt->userData,
9245 "Extra content at the end of the document\n");
9246 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009247 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009248
9249 }
Owen Taylor3473f882001-02-23 17:55:21 +00009250 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009251 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009252 ctxt->sax->endDocument(ctxt->userData);
9253 }
9254 ctxt->instate = XML_PARSER_EOF;
9255 }
9256 return((xmlParserErrors) ctxt->errNo);
9257}
9258
9259/************************************************************************
9260 * *
9261 * I/O front end functions to the parser *
9262 * *
9263 ************************************************************************/
9264
9265/**
9266 * xmlStopParser:
9267 * @ctxt: an XML parser context
9268 *
9269 * Blocks further parser processing
9270 */
9271void
9272xmlStopParser(xmlParserCtxtPtr ctxt) {
9273 ctxt->instate = XML_PARSER_EOF;
9274 if (ctxt->input != NULL)
9275 ctxt->input->cur = BAD_CAST"";
9276}
9277
9278/**
9279 * xmlCreatePushParserCtxt:
9280 * @sax: a SAX handler
9281 * @user_data: The user data returned on SAX callbacks
9282 * @chunk: a pointer to an array of chars
9283 * @size: number of chars in the array
9284 * @filename: an optional file name or URI
9285 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009286 * Create a parser context for using the XML parser in push mode.
9287 * If @buffer and @size are non-NULL, the data is used to detect
9288 * the encoding. The remaining characters will be parsed so they
9289 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009290 * To allow content encoding detection, @size should be >= 4
9291 * The value of @filename is used for fetching external entities
9292 * and error/warning reports.
9293 *
9294 * Returns the new parser context or NULL
9295 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009296
Owen Taylor3473f882001-02-23 17:55:21 +00009297xmlParserCtxtPtr
9298xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9299 const char *chunk, int size, const char *filename) {
9300 xmlParserCtxtPtr ctxt;
9301 xmlParserInputPtr inputStream;
9302 xmlParserInputBufferPtr buf;
9303 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9304
9305 /*
9306 * plug some encoding conversion routines
9307 */
9308 if ((chunk != NULL) && (size >= 4))
9309 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9310
9311 buf = xmlAllocParserInputBuffer(enc);
9312 if (buf == NULL) return(NULL);
9313
9314 ctxt = xmlNewParserCtxt();
9315 if (ctxt == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009316 xmlGenericError(xmlGenericErrorContext,
9317 "xml parser: out of memory\n");
9318 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009319 return(NULL);
9320 }
9321 if (sax != NULL) {
9322 if (ctxt->sax != &xmlDefaultSAXHandler)
9323 xmlFree(ctxt->sax);
9324 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9325 if (ctxt->sax == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009326 xmlGenericError(xmlGenericErrorContext,
9327 "xml parser: out of memory\n");
9328 xmlFreeParserInputBuffer(buf);
9329 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009330 return(NULL);
9331 }
9332 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9333 if (user_data != NULL)
9334 ctxt->userData = user_data;
9335 }
9336 if (filename == NULL) {
9337 ctxt->directory = NULL;
9338 } else {
9339 ctxt->directory = xmlParserGetDirectory(filename);
9340 }
9341
9342 inputStream = xmlNewInputStream(ctxt);
9343 if (inputStream == NULL) {
9344 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009345 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009346 return(NULL);
9347 }
9348
9349 if (filename == NULL)
9350 inputStream->filename = NULL;
9351 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009352 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009353 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009354 inputStream->buf = buf;
9355 inputStream->base = inputStream->buf->buffer->content;
9356 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009357 inputStream->end =
9358 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009359
9360 inputPush(ctxt, inputStream);
9361
9362 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9363 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009364 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9365 int cur = ctxt->input->cur - ctxt->input->base;
9366
Owen Taylor3473f882001-02-23 17:55:21 +00009367 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009368
9369 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9370 ctxt->input->cur = ctxt->input->base + cur;
9371 ctxt->input->end =
9372 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009373#ifdef DEBUG_PUSH
9374 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9375#endif
9376 }
9377
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009378 if (enc != XML_CHAR_ENCODING_NONE) {
9379 xmlSwitchEncoding(ctxt, enc);
9380 }
9381
Owen Taylor3473f882001-02-23 17:55:21 +00009382 return(ctxt);
9383}
9384
9385/**
9386 * xmlCreateIOParserCtxt:
9387 * @sax: a SAX handler
9388 * @user_data: The user data returned on SAX callbacks
9389 * @ioread: an I/O read function
9390 * @ioclose: an I/O close function
9391 * @ioctx: an I/O handler
9392 * @enc: the charset encoding if known
9393 *
9394 * Create a parser context for using the XML parser with an existing
9395 * I/O stream
9396 *
9397 * Returns the new parser context or NULL
9398 */
9399xmlParserCtxtPtr
9400xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9401 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9402 void *ioctx, xmlCharEncoding enc) {
9403 xmlParserCtxtPtr ctxt;
9404 xmlParserInputPtr inputStream;
9405 xmlParserInputBufferPtr buf;
9406
9407 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9408 if (buf == NULL) return(NULL);
9409
9410 ctxt = xmlNewParserCtxt();
9411 if (ctxt == NULL) {
9412 xmlFree(buf);
9413 return(NULL);
9414 }
9415 if (sax != NULL) {
9416 if (ctxt->sax != &xmlDefaultSAXHandler)
9417 xmlFree(ctxt->sax);
9418 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9419 if (ctxt->sax == NULL) {
9420 xmlFree(buf);
9421 xmlFree(ctxt);
9422 return(NULL);
9423 }
9424 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9425 if (user_data != NULL)
9426 ctxt->userData = user_data;
9427 }
9428
9429 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9430 if (inputStream == NULL) {
9431 xmlFreeParserCtxt(ctxt);
9432 return(NULL);
9433 }
9434 inputPush(ctxt, inputStream);
9435
9436 return(ctxt);
9437}
9438
9439/************************************************************************
9440 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009441 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009442 * *
9443 ************************************************************************/
9444
9445/**
9446 * xmlIOParseDTD:
9447 * @sax: the SAX handler block or NULL
9448 * @input: an Input Buffer
9449 * @enc: the charset encoding if known
9450 *
9451 * Load and parse a DTD
9452 *
9453 * Returns the resulting xmlDtdPtr or NULL in case of error.
9454 * @input will be freed at parsing end.
9455 */
9456
9457xmlDtdPtr
9458xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9459 xmlCharEncoding enc) {
9460 xmlDtdPtr ret = NULL;
9461 xmlParserCtxtPtr ctxt;
9462 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009463 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009464
9465 if (input == NULL)
9466 return(NULL);
9467
9468 ctxt = xmlNewParserCtxt();
9469 if (ctxt == NULL) {
9470 return(NULL);
9471 }
9472
9473 /*
9474 * Set-up the SAX context
9475 */
9476 if (sax != NULL) {
9477 if (ctxt->sax != NULL)
9478 xmlFree(ctxt->sax);
9479 ctxt->sax = sax;
9480 ctxt->userData = NULL;
9481 }
9482
9483 /*
9484 * generate a parser input from the I/O handler
9485 */
9486
9487 pinput = xmlNewIOInputStream(ctxt, input, enc);
9488 if (pinput == NULL) {
9489 if (sax != NULL) ctxt->sax = NULL;
9490 xmlFreeParserCtxt(ctxt);
9491 return(NULL);
9492 }
9493
9494 /*
9495 * plug some encoding conversion routines here.
9496 */
9497 xmlPushInput(ctxt, pinput);
9498
9499 pinput->filename = NULL;
9500 pinput->line = 1;
9501 pinput->col = 1;
9502 pinput->base = ctxt->input->cur;
9503 pinput->cur = ctxt->input->cur;
9504 pinput->free = NULL;
9505
9506 /*
9507 * let's parse that entity knowing it's an external subset.
9508 */
9509 ctxt->inSubset = 2;
9510 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9511 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9512 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009513
9514 if (enc == XML_CHAR_ENCODING_NONE) {
9515 /*
9516 * Get the 4 first bytes and decode the charset
9517 * if enc != XML_CHAR_ENCODING_NONE
9518 * plug some encoding conversion routines.
9519 */
9520 start[0] = RAW;
9521 start[1] = NXT(1);
9522 start[2] = NXT(2);
9523 start[3] = NXT(3);
9524 enc = xmlDetectCharEncoding(start, 4);
9525 if (enc != XML_CHAR_ENCODING_NONE) {
9526 xmlSwitchEncoding(ctxt, enc);
9527 }
9528 }
9529
Owen Taylor3473f882001-02-23 17:55:21 +00009530 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9531
9532 if (ctxt->myDoc != NULL) {
9533 if (ctxt->wellFormed) {
9534 ret = ctxt->myDoc->extSubset;
9535 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +00009536 if (ret != NULL) {
9537 xmlNodePtr tmp;
9538
9539 ret->doc = NULL;
9540 tmp = ret->children;
9541 while (tmp != NULL) {
9542 tmp->doc = NULL;
9543 tmp = tmp->next;
9544 }
9545 }
Owen Taylor3473f882001-02-23 17:55:21 +00009546 } else {
9547 ret = NULL;
9548 }
9549 xmlFreeDoc(ctxt->myDoc);
9550 ctxt->myDoc = NULL;
9551 }
9552 if (sax != NULL) ctxt->sax = NULL;
9553 xmlFreeParserCtxt(ctxt);
9554
9555 return(ret);
9556}
9557
9558/**
9559 * xmlSAXParseDTD:
9560 * @sax: the SAX handler block
9561 * @ExternalID: a NAME* containing the External ID of the DTD
9562 * @SystemID: a NAME* containing the URL to the DTD
9563 *
9564 * Load and parse an external subset.
9565 *
9566 * Returns the resulting xmlDtdPtr or NULL in case of error.
9567 */
9568
9569xmlDtdPtr
9570xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9571 const xmlChar *SystemID) {
9572 xmlDtdPtr ret = NULL;
9573 xmlParserCtxtPtr ctxt;
9574 xmlParserInputPtr input = NULL;
9575 xmlCharEncoding enc;
9576
9577 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9578
9579 ctxt = xmlNewParserCtxt();
9580 if (ctxt == NULL) {
9581 return(NULL);
9582 }
9583
9584 /*
9585 * Set-up the SAX context
9586 */
9587 if (sax != NULL) {
9588 if (ctxt->sax != NULL)
9589 xmlFree(ctxt->sax);
9590 ctxt->sax = sax;
9591 ctxt->userData = NULL;
9592 }
9593
9594 /*
9595 * Ask the Entity resolver to load the damn thing
9596 */
9597
9598 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +00009599 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +00009600 if (input == NULL) {
9601 if (sax != NULL) ctxt->sax = NULL;
9602 xmlFreeParserCtxt(ctxt);
9603 return(NULL);
9604 }
9605
9606 /*
9607 * plug some encoding conversion routines here.
9608 */
9609 xmlPushInput(ctxt, input);
9610 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9611 xmlSwitchEncoding(ctxt, enc);
9612
9613 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +00009614 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +00009615 input->line = 1;
9616 input->col = 1;
9617 input->base = ctxt->input->cur;
9618 input->cur = ctxt->input->cur;
9619 input->free = NULL;
9620
9621 /*
9622 * let's parse that entity knowing it's an external subset.
9623 */
9624 ctxt->inSubset = 2;
9625 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9626 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9627 ExternalID, SystemID);
9628 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9629
9630 if (ctxt->myDoc != NULL) {
9631 if (ctxt->wellFormed) {
9632 ret = ctxt->myDoc->extSubset;
9633 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +00009634 if (ret != NULL) {
9635 xmlNodePtr tmp;
9636
9637 ret->doc = NULL;
9638 tmp = ret->children;
9639 while (tmp != NULL) {
9640 tmp->doc = NULL;
9641 tmp = tmp->next;
9642 }
9643 }
Owen Taylor3473f882001-02-23 17:55:21 +00009644 } else {
9645 ret = NULL;
9646 }
9647 xmlFreeDoc(ctxt->myDoc);
9648 ctxt->myDoc = NULL;
9649 }
9650 if (sax != NULL) ctxt->sax = NULL;
9651 xmlFreeParserCtxt(ctxt);
9652
9653 return(ret);
9654}
9655
9656/**
9657 * xmlParseDTD:
9658 * @ExternalID: a NAME* containing the External ID of the DTD
9659 * @SystemID: a NAME* containing the URL to the DTD
9660 *
9661 * Load and parse an external subset.
9662 *
9663 * Returns the resulting xmlDtdPtr or NULL in case of error.
9664 */
9665
9666xmlDtdPtr
9667xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9668 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9669}
9670
9671/************************************************************************
9672 * *
9673 * Front ends when parsing an Entity *
9674 * *
9675 ************************************************************************/
9676
9677/**
Owen Taylor3473f882001-02-23 17:55:21 +00009678 * xmlParseCtxtExternalEntity:
9679 * @ctx: the existing parsing context
9680 * @URL: the URL for the entity to load
9681 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009682 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009683 *
9684 * Parse an external general entity within an existing parsing context
9685 * An external general parsed entity is well-formed if it matches the
9686 * production labeled extParsedEnt.
9687 *
9688 * [78] extParsedEnt ::= TextDecl? content
9689 *
9690 * Returns 0 if the entity is well formed, -1 in case of args problem and
9691 * the parser error code otherwise
9692 */
9693
9694int
9695xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009696 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009697 xmlParserCtxtPtr ctxt;
9698 xmlDocPtr newDoc;
9699 xmlSAXHandlerPtr oldsax = NULL;
9700 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009701 xmlChar start[4];
9702 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009703
9704 if (ctx->depth > 40) {
9705 return(XML_ERR_ENTITY_LOOP);
9706 }
9707
Daniel Veillardcda96922001-08-21 10:56:31 +00009708 if (lst != NULL)
9709 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009710 if ((URL == NULL) && (ID == NULL))
9711 return(-1);
9712 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9713 return(-1);
9714
9715
9716 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9717 if (ctxt == NULL) return(-1);
9718 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +00009719 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +00009720 oldsax = ctxt->sax;
9721 ctxt->sax = ctx->sax;
9722 newDoc = xmlNewDoc(BAD_CAST "1.0");
9723 if (newDoc == NULL) {
9724 xmlFreeParserCtxt(ctxt);
9725 return(-1);
9726 }
9727 if (ctx->myDoc != NULL) {
9728 newDoc->intSubset = ctx->myDoc->intSubset;
9729 newDoc->extSubset = ctx->myDoc->extSubset;
9730 }
9731 if (ctx->myDoc->URL != NULL) {
9732 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9733 }
9734 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9735 if (newDoc->children == NULL) {
9736 ctxt->sax = oldsax;
9737 xmlFreeParserCtxt(ctxt);
9738 newDoc->intSubset = NULL;
9739 newDoc->extSubset = NULL;
9740 xmlFreeDoc(newDoc);
9741 return(-1);
9742 }
9743 nodePush(ctxt, newDoc->children);
9744 if (ctx->myDoc == NULL) {
9745 ctxt->myDoc = newDoc;
9746 } else {
9747 ctxt->myDoc = ctx->myDoc;
9748 newDoc->children->doc = ctx->myDoc;
9749 }
9750
Daniel Veillard87a764e2001-06-20 17:41:10 +00009751 /*
9752 * Get the 4 first bytes and decode the charset
9753 * if enc != XML_CHAR_ENCODING_NONE
9754 * plug some encoding conversion routines.
9755 */
9756 GROW
9757 start[0] = RAW;
9758 start[1] = NXT(1);
9759 start[2] = NXT(2);
9760 start[3] = NXT(3);
9761 enc = xmlDetectCharEncoding(start, 4);
9762 if (enc != XML_CHAR_ENCODING_NONE) {
9763 xmlSwitchEncoding(ctxt, enc);
9764 }
9765
Owen Taylor3473f882001-02-23 17:55:21 +00009766 /*
9767 * Parse a possible text declaration first
9768 */
Owen Taylor3473f882001-02-23 17:55:21 +00009769 if ((RAW == '<') && (NXT(1) == '?') &&
9770 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9771 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9772 xmlParseTextDecl(ctxt);
9773 }
9774
9775 /*
9776 * Doing validity checking on chunk doesn't make sense
9777 */
9778 ctxt->instate = XML_PARSER_CONTENT;
9779 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009780 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009781 ctxt->loadsubset = ctx->loadsubset;
9782 ctxt->depth = ctx->depth + 1;
9783 ctxt->replaceEntities = ctx->replaceEntities;
9784 if (ctxt->validate) {
9785 ctxt->vctxt.error = ctx->vctxt.error;
9786 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009787 } else {
9788 ctxt->vctxt.error = NULL;
9789 ctxt->vctxt.warning = NULL;
9790 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009791 ctxt->vctxt.nodeTab = NULL;
9792 ctxt->vctxt.nodeNr = 0;
9793 ctxt->vctxt.nodeMax = 0;
9794 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009795
9796 xmlParseContent(ctxt);
9797
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009798 ctx->validate = ctxt->validate;
9799 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009800 if ((RAW == '<') && (NXT(1) == '/')) {
9801 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9803 ctxt->sax->error(ctxt->userData,
9804 "chunk is not well balanced\n");
9805 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009806 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009807 } else if (RAW != 0) {
9808 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9810 ctxt->sax->error(ctxt->userData,
9811 "extra content at the end of well balanced chunk\n");
9812 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009813 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009814 }
9815 if (ctxt->node != newDoc->children) {
9816 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9817 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9818 ctxt->sax->error(ctxt->userData,
9819 "chunk is not well balanced\n");
9820 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009821 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009822 }
9823
9824 if (!ctxt->wellFormed) {
9825 if (ctxt->errNo == 0)
9826 ret = 1;
9827 else
9828 ret = ctxt->errNo;
9829 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009830 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009831 xmlNodePtr cur;
9832
9833 /*
9834 * Return the newly created nodeset after unlinking it from
9835 * they pseudo parent.
9836 */
9837 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009838 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009839 while (cur != NULL) {
9840 cur->parent = NULL;
9841 cur = cur->next;
9842 }
9843 newDoc->children->children = NULL;
9844 }
9845 ret = 0;
9846 }
9847 ctxt->sax = oldsax;
9848 xmlFreeParserCtxt(ctxt);
9849 newDoc->intSubset = NULL;
9850 newDoc->extSubset = NULL;
9851 xmlFreeDoc(newDoc);
9852
9853 return(ret);
9854}
9855
9856/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009857 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009858 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009859 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009860 * @sax: the SAX handler bloc (possibly NULL)
9861 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9862 * @depth: Used for loop detection, use 0
9863 * @URL: the URL for the entity to load
9864 * @ID: the System ID for the entity to load
9865 * @list: the return value for the set of parsed nodes
9866 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009867 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009868 *
9869 * Returns 0 if the entity is well formed, -1 in case of args problem and
9870 * the parser error code otherwise
9871 */
9872
Daniel Veillard257d9102001-05-08 10:41:44 +00009873static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009874xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9875 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009876 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009877 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009878 xmlParserCtxtPtr ctxt;
9879 xmlDocPtr newDoc;
9880 xmlSAXHandlerPtr oldsax = NULL;
9881 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009882 xmlChar start[4];
9883 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009884
9885 if (depth > 40) {
9886 return(XML_ERR_ENTITY_LOOP);
9887 }
9888
9889
9890
9891 if (list != NULL)
9892 *list = NULL;
9893 if ((URL == NULL) && (ID == NULL))
9894 return(-1);
9895 if (doc == NULL) /* @@ relax but check for dereferences */
9896 return(-1);
9897
9898
9899 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9900 if (ctxt == NULL) return(-1);
9901 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009902 if (oldctxt != NULL) {
9903 ctxt->_private = oldctxt->_private;
9904 ctxt->loadsubset = oldctxt->loadsubset;
9905 ctxt->validate = oldctxt->validate;
9906 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009907 ctxt->record_info = oldctxt->record_info;
9908 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
9909 ctxt->node_seq.length = oldctxt->node_seq.length;
9910 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009911 } else {
9912 /*
9913 * Doing validity checking on chunk without context
9914 * doesn't make sense
9915 */
9916 ctxt->_private = NULL;
9917 ctxt->validate = 0;
9918 ctxt->external = 2;
9919 ctxt->loadsubset = 0;
9920 }
Owen Taylor3473f882001-02-23 17:55:21 +00009921 if (sax != NULL) {
9922 oldsax = ctxt->sax;
9923 ctxt->sax = sax;
9924 if (user_data != NULL)
9925 ctxt->userData = user_data;
9926 }
9927 newDoc = xmlNewDoc(BAD_CAST "1.0");
9928 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009929 ctxt->node_seq.maximum = 0;
9930 ctxt->node_seq.length = 0;
9931 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009932 xmlFreeParserCtxt(ctxt);
9933 return(-1);
9934 }
9935 if (doc != NULL) {
9936 newDoc->intSubset = doc->intSubset;
9937 newDoc->extSubset = doc->extSubset;
9938 }
9939 if (doc->URL != NULL) {
9940 newDoc->URL = xmlStrdup(doc->URL);
9941 }
9942 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9943 if (newDoc->children == NULL) {
9944 if (sax != NULL)
9945 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009946 ctxt->node_seq.maximum = 0;
9947 ctxt->node_seq.length = 0;
9948 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009949 xmlFreeParserCtxt(ctxt);
9950 newDoc->intSubset = NULL;
9951 newDoc->extSubset = NULL;
9952 xmlFreeDoc(newDoc);
9953 return(-1);
9954 }
9955 nodePush(ctxt, newDoc->children);
9956 if (doc == NULL) {
9957 ctxt->myDoc = newDoc;
9958 } else {
9959 ctxt->myDoc = doc;
9960 newDoc->children->doc = doc;
9961 }
9962
Daniel Veillard87a764e2001-06-20 17:41:10 +00009963 /*
9964 * Get the 4 first bytes and decode the charset
9965 * if enc != XML_CHAR_ENCODING_NONE
9966 * plug some encoding conversion routines.
9967 */
9968 GROW;
9969 start[0] = RAW;
9970 start[1] = NXT(1);
9971 start[2] = NXT(2);
9972 start[3] = NXT(3);
9973 enc = xmlDetectCharEncoding(start, 4);
9974 if (enc != XML_CHAR_ENCODING_NONE) {
9975 xmlSwitchEncoding(ctxt, enc);
9976 }
9977
Owen Taylor3473f882001-02-23 17:55:21 +00009978 /*
9979 * Parse a possible text declaration first
9980 */
Owen Taylor3473f882001-02-23 17:55:21 +00009981 if ((RAW == '<') && (NXT(1) == '?') &&
9982 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9983 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9984 xmlParseTextDecl(ctxt);
9985 }
9986
Owen Taylor3473f882001-02-23 17:55:21 +00009987 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009988 ctxt->depth = depth;
9989
9990 xmlParseContent(ctxt);
9991
Daniel Veillard561b7f82002-03-20 21:55:57 +00009992 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009993 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9995 ctxt->sax->error(ctxt->userData,
9996 "chunk is not well balanced\n");
9997 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009998 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009999 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +000010000 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10002 ctxt->sax->error(ctxt->userData,
10003 "extra content at the end of well balanced chunk\n");
10004 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010005 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010006 }
10007 if (ctxt->node != newDoc->children) {
10008 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10010 ctxt->sax->error(ctxt->userData,
10011 "chunk is not well balanced\n");
10012 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010013 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010014 }
10015
10016 if (!ctxt->wellFormed) {
10017 if (ctxt->errNo == 0)
10018 ret = 1;
10019 else
10020 ret = ctxt->errNo;
10021 } else {
10022 if (list != NULL) {
10023 xmlNodePtr cur;
10024
10025 /*
10026 * Return the newly created nodeset after unlinking it from
10027 * they pseudo parent.
10028 */
10029 cur = newDoc->children->children;
10030 *list = cur;
10031 while (cur != NULL) {
10032 cur->parent = NULL;
10033 cur = cur->next;
10034 }
10035 newDoc->children->children = NULL;
10036 }
10037 ret = 0;
10038 }
10039 if (sax != NULL)
10040 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010041 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10042 oldctxt->node_seq.length = ctxt->node_seq.length;
10043 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010044 ctxt->node_seq.maximum = 0;
10045 ctxt->node_seq.length = 0;
10046 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010047 xmlFreeParserCtxt(ctxt);
10048 newDoc->intSubset = NULL;
10049 newDoc->extSubset = NULL;
10050 xmlFreeDoc(newDoc);
10051
10052 return(ret);
10053}
10054
10055/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010056 * xmlParseExternalEntity:
10057 * @doc: the document the chunk pertains to
10058 * @sax: the SAX handler bloc (possibly NULL)
10059 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10060 * @depth: Used for loop detection, use 0
10061 * @URL: the URL for the entity to load
10062 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010063 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010064 *
10065 * Parse an external general entity
10066 * An external general parsed entity is well-formed if it matches the
10067 * production labeled extParsedEnt.
10068 *
10069 * [78] extParsedEnt ::= TextDecl? content
10070 *
10071 * Returns 0 if the entity is well formed, -1 in case of args problem and
10072 * the parser error code otherwise
10073 */
10074
10075int
10076xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010077 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010078 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010079 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010080}
10081
10082/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010083 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010084 * @doc: the document the chunk pertains to
10085 * @sax: the SAX handler bloc (possibly NULL)
10086 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10087 * @depth: Used for loop detection, use 0
10088 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010089 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010090 *
10091 * Parse a well-balanced chunk of an XML document
10092 * called by the parser
10093 * The allowed sequence for the Well Balanced Chunk is the one defined by
10094 * the content production in the XML grammar:
10095 *
10096 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10097 *
10098 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10099 * the parser error code otherwise
10100 */
10101
10102int
10103xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010104 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010105 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10106 depth, string, lst, 0 );
10107}
10108
10109/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010110 * xmlParseBalancedChunkMemoryInternal:
10111 * @oldctxt: the existing parsing context
10112 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10113 * @user_data: the user data field for the parser context
10114 * @lst: the return value for the set of parsed nodes
10115 *
10116 *
10117 * Parse a well-balanced chunk of an XML document
10118 * called by the parser
10119 * The allowed sequence for the Well Balanced Chunk is the one defined by
10120 * the content production in the XML grammar:
10121 *
10122 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10123 *
10124 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10125 * the parser error code otherwise
10126 *
10127 * In case recover is set to 1, the nodelist will not be empty even if
10128 * the parsed chunk is not well balanced.
10129 */
10130static int
10131xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10132 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10133 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010134 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010135 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010136 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010137 int size;
10138 int ret = 0;
10139
10140 if (oldctxt->depth > 40) {
10141 return(XML_ERR_ENTITY_LOOP);
10142 }
10143
10144
10145 if (lst != NULL)
10146 *lst = NULL;
10147 if (string == NULL)
10148 return(-1);
10149
10150 size = xmlStrlen(string);
10151
10152 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10153 if (ctxt == NULL) return(-1);
10154 if (user_data != NULL)
10155 ctxt->userData = user_data;
10156 else
10157 ctxt->userData = ctxt;
10158
10159 oldsax = ctxt->sax;
10160 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010161 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010162 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010163 newDoc = xmlNewDoc(BAD_CAST "1.0");
10164 if (newDoc == NULL) {
10165 ctxt->sax = oldsax;
10166 xmlFreeParserCtxt(ctxt);
10167 return(-1);
10168 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010169 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010170 } else {
10171 ctxt->myDoc = oldctxt->myDoc;
10172 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010173 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010174 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010175 BAD_CAST "pseudoroot", NULL);
10176 if (ctxt->myDoc->children == NULL) {
10177 ctxt->sax = oldsax;
10178 xmlFreeParserCtxt(ctxt);
10179 if (newDoc != NULL)
10180 xmlFreeDoc(newDoc);
10181 return(-1);
10182 }
10183 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010184 ctxt->instate = XML_PARSER_CONTENT;
10185 ctxt->depth = oldctxt->depth + 1;
10186
Daniel Veillard328f48c2002-11-15 15:24:34 +000010187 ctxt->validate = 0;
10188 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010189 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10190 /*
10191 * ID/IDREF registration will be done in xmlValidateElement below
10192 */
10193 ctxt->loadsubset |= XML_SKIP_IDS;
10194 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010195
Daniel Veillard68e9e742002-11-16 15:35:11 +000010196 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010197 if ((RAW == '<') && (NXT(1) == '/')) {
10198 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10200 ctxt->sax->error(ctxt->userData,
10201 "chunk is not well balanced\n");
10202 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010203 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010204 } else if (RAW != 0) {
10205 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10207 ctxt->sax->error(ctxt->userData,
10208 "extra content at the end of well balanced chunk\n");
10209 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010211 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010212 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010213 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10215 ctxt->sax->error(ctxt->userData,
10216 "chunk is not well balanced\n");
10217 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010218 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010219 }
10220
10221 if (!ctxt->wellFormed) {
10222 if (ctxt->errNo == 0)
10223 ret = 1;
10224 else
10225 ret = ctxt->errNo;
10226 } else {
10227 ret = 0;
10228 }
10229
10230 if ((lst != NULL) && (ret == 0)) {
10231 xmlNodePtr cur;
10232
10233 /*
10234 * Return the newly created nodeset after unlinking it from
10235 * they pseudo parent.
10236 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010237 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010238 *lst = cur;
10239 while (cur != NULL) {
Daniel Veillard8d589042003-02-04 15:07:21 +000010240 if (oldctxt->validate && oldctxt->wellFormed &&
10241 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10242 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10243 oldctxt->myDoc, cur);
10244 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010245 cur->parent = NULL;
10246 cur = cur->next;
10247 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010248 ctxt->myDoc->children->children = NULL;
10249 }
10250 if (ctxt->myDoc != NULL) {
10251 xmlFreeNode(ctxt->myDoc->children);
10252 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010253 }
10254
10255 ctxt->sax = oldsax;
10256 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010257 if (newDoc != NULL)
10258 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010259
10260 return(ret);
10261}
10262
10263/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010264 * xmlParseBalancedChunkMemoryRecover:
10265 * @doc: the document the chunk pertains to
10266 * @sax: the SAX handler bloc (possibly NULL)
10267 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10268 * @depth: Used for loop detection, use 0
10269 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10270 * @lst: the return value for the set of parsed nodes
10271 * @recover: return nodes even if the data is broken (use 0)
10272 *
10273 *
10274 * Parse a well-balanced chunk of an XML document
10275 * called by the parser
10276 * The allowed sequence for the Well Balanced Chunk is the one defined by
10277 * the content production in the XML grammar:
10278 *
10279 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10280 *
10281 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10282 * the parser error code otherwise
10283 *
10284 * In case recover is set to 1, the nodelist will not be empty even if
10285 * the parsed chunk is not well balanced.
10286 */
10287int
10288xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10289 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10290 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010291 xmlParserCtxtPtr ctxt;
10292 xmlDocPtr newDoc;
10293 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010294 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010295 int size;
10296 int ret = 0;
10297
10298 if (depth > 40) {
10299 return(XML_ERR_ENTITY_LOOP);
10300 }
10301
10302
Daniel Veillardcda96922001-08-21 10:56:31 +000010303 if (lst != NULL)
10304 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010305 if (string == NULL)
10306 return(-1);
10307
10308 size = xmlStrlen(string);
10309
10310 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10311 if (ctxt == NULL) return(-1);
10312 ctxt->userData = ctxt;
10313 if (sax != NULL) {
10314 oldsax = ctxt->sax;
10315 ctxt->sax = sax;
10316 if (user_data != NULL)
10317 ctxt->userData = user_data;
10318 }
10319 newDoc = xmlNewDoc(BAD_CAST "1.0");
10320 if (newDoc == NULL) {
10321 xmlFreeParserCtxt(ctxt);
10322 return(-1);
10323 }
10324 if (doc != NULL) {
10325 newDoc->intSubset = doc->intSubset;
10326 newDoc->extSubset = doc->extSubset;
10327 }
10328 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10329 if (newDoc->children == NULL) {
10330 if (sax != NULL)
10331 ctxt->sax = oldsax;
10332 xmlFreeParserCtxt(ctxt);
10333 newDoc->intSubset = NULL;
10334 newDoc->extSubset = NULL;
10335 xmlFreeDoc(newDoc);
10336 return(-1);
10337 }
10338 nodePush(ctxt, newDoc->children);
10339 if (doc == NULL) {
10340 ctxt->myDoc = newDoc;
10341 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010342 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010343 newDoc->children->doc = doc;
10344 }
10345 ctxt->instate = XML_PARSER_CONTENT;
10346 ctxt->depth = depth;
10347
10348 /*
10349 * Doing validity checking on chunk doesn't make sense
10350 */
10351 ctxt->validate = 0;
10352 ctxt->loadsubset = 0;
10353
Daniel Veillardb39bc392002-10-26 19:29:51 +000010354 if ( doc != NULL ){
10355 content = doc->children;
10356 doc->children = NULL;
10357 xmlParseContent(ctxt);
10358 doc->children = content;
10359 }
10360 else {
10361 xmlParseContent(ctxt);
10362 }
Owen Taylor3473f882001-02-23 17:55:21 +000010363 if ((RAW == '<') && (NXT(1) == '/')) {
10364 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10366 ctxt->sax->error(ctxt->userData,
10367 "chunk is not well balanced\n");
10368 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010369 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010370 } else if (RAW != 0) {
10371 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10373 ctxt->sax->error(ctxt->userData,
10374 "extra content at the end of well balanced chunk\n");
10375 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010376 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010377 }
10378 if (ctxt->node != newDoc->children) {
10379 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10381 ctxt->sax->error(ctxt->userData,
10382 "chunk is not well balanced\n");
10383 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010384 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010385 }
10386
10387 if (!ctxt->wellFormed) {
10388 if (ctxt->errNo == 0)
10389 ret = 1;
10390 else
10391 ret = ctxt->errNo;
10392 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010393 ret = 0;
10394 }
10395
10396 if (lst != NULL && (ret == 0 || recover == 1)) {
10397 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010398
10399 /*
10400 * Return the newly created nodeset after unlinking it from
10401 * they pseudo parent.
10402 */
10403 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010404 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010405 while (cur != NULL) {
10406 cur->parent = NULL;
10407 cur = cur->next;
10408 }
10409 newDoc->children->children = NULL;
10410 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010411
Owen Taylor3473f882001-02-23 17:55:21 +000010412 if (sax != NULL)
10413 ctxt->sax = oldsax;
10414 xmlFreeParserCtxt(ctxt);
10415 newDoc->intSubset = NULL;
10416 newDoc->extSubset = NULL;
10417 xmlFreeDoc(newDoc);
10418
10419 return(ret);
10420}
10421
10422/**
10423 * xmlSAXParseEntity:
10424 * @sax: the SAX handler block
10425 * @filename: the filename
10426 *
10427 * parse an XML external entity out of context and build a tree.
10428 * It use the given SAX function block to handle the parsing callback.
10429 * If sax is NULL, fallback to the default DOM tree building routines.
10430 *
10431 * [78] extParsedEnt ::= TextDecl? content
10432 *
10433 * This correspond to a "Well Balanced" chunk
10434 *
10435 * Returns the resulting document tree
10436 */
10437
10438xmlDocPtr
10439xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10440 xmlDocPtr ret;
10441 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010442
10443 ctxt = xmlCreateFileParserCtxt(filename);
10444 if (ctxt == NULL) {
10445 return(NULL);
10446 }
10447 if (sax != NULL) {
10448 if (ctxt->sax != NULL)
10449 xmlFree(ctxt->sax);
10450 ctxt->sax = sax;
10451 ctxt->userData = NULL;
10452 }
10453
Owen Taylor3473f882001-02-23 17:55:21 +000010454 xmlParseExtParsedEnt(ctxt);
10455
10456 if (ctxt->wellFormed)
10457 ret = ctxt->myDoc;
10458 else {
10459 ret = NULL;
10460 xmlFreeDoc(ctxt->myDoc);
10461 ctxt->myDoc = NULL;
10462 }
10463 if (sax != NULL)
10464 ctxt->sax = NULL;
10465 xmlFreeParserCtxt(ctxt);
10466
10467 return(ret);
10468}
10469
10470/**
10471 * xmlParseEntity:
10472 * @filename: the filename
10473 *
10474 * parse an XML external entity out of context and build a tree.
10475 *
10476 * [78] extParsedEnt ::= TextDecl? content
10477 *
10478 * This correspond to a "Well Balanced" chunk
10479 *
10480 * Returns the resulting document tree
10481 */
10482
10483xmlDocPtr
10484xmlParseEntity(const char *filename) {
10485 return(xmlSAXParseEntity(NULL, filename));
10486}
10487
10488/**
10489 * xmlCreateEntityParserCtxt:
10490 * @URL: the entity URL
10491 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010492 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010493 *
10494 * Create a parser context for an external entity
10495 * Automatic support for ZLIB/Compress compressed document is provided
10496 * by default if found at compile-time.
10497 *
10498 * Returns the new parser context or NULL
10499 */
10500xmlParserCtxtPtr
10501xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10502 const xmlChar *base) {
10503 xmlParserCtxtPtr ctxt;
10504 xmlParserInputPtr inputStream;
10505 char *directory = NULL;
10506 xmlChar *uri;
10507
10508 ctxt = xmlNewParserCtxt();
10509 if (ctxt == NULL) {
10510 return(NULL);
10511 }
10512
10513 uri = xmlBuildURI(URL, base);
10514
10515 if (uri == NULL) {
10516 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10517 if (inputStream == NULL) {
10518 xmlFreeParserCtxt(ctxt);
10519 return(NULL);
10520 }
10521
10522 inputPush(ctxt, inputStream);
10523
10524 if ((ctxt->directory == NULL) && (directory == NULL))
10525 directory = xmlParserGetDirectory((char *)URL);
10526 if ((ctxt->directory == NULL) && (directory != NULL))
10527 ctxt->directory = directory;
10528 } else {
10529 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10530 if (inputStream == NULL) {
10531 xmlFree(uri);
10532 xmlFreeParserCtxt(ctxt);
10533 return(NULL);
10534 }
10535
10536 inputPush(ctxt, inputStream);
10537
10538 if ((ctxt->directory == NULL) && (directory == NULL))
10539 directory = xmlParserGetDirectory((char *)uri);
10540 if ((ctxt->directory == NULL) && (directory != NULL))
10541 ctxt->directory = directory;
10542 xmlFree(uri);
10543 }
10544
10545 return(ctxt);
10546}
10547
10548/************************************************************************
10549 * *
10550 * Front ends when parsing from a file *
10551 * *
10552 ************************************************************************/
10553
10554/**
10555 * xmlCreateFileParserCtxt:
10556 * @filename: the filename
10557 *
10558 * Create a parser context for a file content.
10559 * Automatic support for ZLIB/Compress compressed document is provided
10560 * by default if found at compile-time.
10561 *
10562 * Returns the new parser context or NULL
10563 */
10564xmlParserCtxtPtr
10565xmlCreateFileParserCtxt(const char *filename)
10566{
10567 xmlParserCtxtPtr ctxt;
10568 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010569 char *directory = NULL;
10570
Owen Taylor3473f882001-02-23 17:55:21 +000010571 ctxt = xmlNewParserCtxt();
10572 if (ctxt == NULL) {
10573 if (xmlDefaultSAXHandler.error != NULL) {
10574 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10575 }
10576 return(NULL);
10577 }
10578
Igor Zlatkovicce076162003-02-23 13:39:39 +000010579
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000010580 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010581 if (inputStream == NULL) {
10582 xmlFreeParserCtxt(ctxt);
10583 return(NULL);
10584 }
10585
Owen Taylor3473f882001-02-23 17:55:21 +000010586 inputPush(ctxt, inputStream);
10587 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010588 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010589 if ((ctxt->directory == NULL) && (directory != NULL))
10590 ctxt->directory = directory;
10591
10592 return(ctxt);
10593}
10594
10595/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010596 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010597 * @sax: the SAX handler block
10598 * @filename: the filename
10599 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10600 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010601 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010602 *
10603 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10604 * compressed document is provided by default if found at compile-time.
10605 * It use the given SAX function block to handle the parsing callback.
10606 * If sax is NULL, fallback to the default DOM tree building routines.
10607 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010608 * User data (void *) is stored within the parser context in the
10609 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010610 *
Owen Taylor3473f882001-02-23 17:55:21 +000010611 * Returns the resulting document tree
10612 */
10613
10614xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010615xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10616 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010617 xmlDocPtr ret;
10618 xmlParserCtxtPtr ctxt;
10619 char *directory = NULL;
10620
Daniel Veillard635ef722001-10-29 11:48:19 +000010621 xmlInitParser();
10622
Owen Taylor3473f882001-02-23 17:55:21 +000010623 ctxt = xmlCreateFileParserCtxt(filename);
10624 if (ctxt == NULL) {
10625 return(NULL);
10626 }
10627 if (sax != NULL) {
10628 if (ctxt->sax != NULL)
10629 xmlFree(ctxt->sax);
10630 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010631 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010632 if (data!=NULL) {
10633 ctxt->_private=data;
10634 }
Owen Taylor3473f882001-02-23 17:55:21 +000010635
10636 if ((ctxt->directory == NULL) && (directory == NULL))
10637 directory = xmlParserGetDirectory(filename);
10638 if ((ctxt->directory == NULL) && (directory != NULL))
10639 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10640
Daniel Veillarddad3f682002-11-17 16:47:27 +000010641 ctxt->recovery = recovery;
10642
Owen Taylor3473f882001-02-23 17:55:21 +000010643 xmlParseDocument(ctxt);
10644
10645 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10646 else {
10647 ret = NULL;
10648 xmlFreeDoc(ctxt->myDoc);
10649 ctxt->myDoc = NULL;
10650 }
10651 if (sax != NULL)
10652 ctxt->sax = NULL;
10653 xmlFreeParserCtxt(ctxt);
10654
10655 return(ret);
10656}
10657
10658/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010659 * xmlSAXParseFile:
10660 * @sax: the SAX handler block
10661 * @filename: the filename
10662 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10663 * documents
10664 *
10665 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10666 * compressed document is provided by default if found at compile-time.
10667 * It use the given SAX function block to handle the parsing callback.
10668 * If sax is NULL, fallback to the default DOM tree building routines.
10669 *
10670 * Returns the resulting document tree
10671 */
10672
10673xmlDocPtr
10674xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10675 int recovery) {
10676 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10677}
10678
10679/**
Owen Taylor3473f882001-02-23 17:55:21 +000010680 * xmlRecoverDoc:
10681 * @cur: a pointer to an array of xmlChar
10682 *
10683 * parse an XML in-memory document and build a tree.
10684 * In the case the document is not Well Formed, a tree is built anyway
10685 *
10686 * Returns the resulting document tree
10687 */
10688
10689xmlDocPtr
10690xmlRecoverDoc(xmlChar *cur) {
10691 return(xmlSAXParseDoc(NULL, cur, 1));
10692}
10693
10694/**
10695 * xmlParseFile:
10696 * @filename: the filename
10697 *
10698 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10699 * compressed document is provided by default if found at compile-time.
10700 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010701 * Returns the resulting document tree if the file was wellformed,
10702 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010703 */
10704
10705xmlDocPtr
10706xmlParseFile(const char *filename) {
10707 return(xmlSAXParseFile(NULL, filename, 0));
10708}
10709
10710/**
10711 * xmlRecoverFile:
10712 * @filename: the filename
10713 *
10714 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10715 * compressed document is provided by default if found at compile-time.
10716 * In the case the document is not Well Formed, a tree is built anyway
10717 *
10718 * Returns the resulting document tree
10719 */
10720
10721xmlDocPtr
10722xmlRecoverFile(const char *filename) {
10723 return(xmlSAXParseFile(NULL, filename, 1));
10724}
10725
10726
10727/**
10728 * xmlSetupParserForBuffer:
10729 * @ctxt: an XML parser context
10730 * @buffer: a xmlChar * buffer
10731 * @filename: a file name
10732 *
10733 * Setup the parser context to parse a new buffer; Clears any prior
10734 * contents from the parser context. The buffer parameter must not be
10735 * NULL, but the filename parameter can be
10736 */
10737void
10738xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10739 const char* filename)
10740{
10741 xmlParserInputPtr input;
10742
10743 input = xmlNewInputStream(ctxt);
10744 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010745 xmlGenericError(xmlGenericErrorContext,
10746 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010747 xmlFree(ctxt);
10748 return;
10749 }
10750
10751 xmlClearParserCtxt(ctxt);
10752 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000010753 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010754 input->base = buffer;
10755 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010756 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010757 inputPush(ctxt, input);
10758}
10759
10760/**
10761 * xmlSAXUserParseFile:
10762 * @sax: a SAX handler
10763 * @user_data: The user data returned on SAX callbacks
10764 * @filename: a file name
10765 *
10766 * parse an XML file and call the given SAX handler routines.
10767 * Automatic support for ZLIB/Compress compressed document is provided
10768 *
10769 * Returns 0 in case of success or a error number otherwise
10770 */
10771int
10772xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10773 const char *filename) {
10774 int ret = 0;
10775 xmlParserCtxtPtr ctxt;
10776
10777 ctxt = xmlCreateFileParserCtxt(filename);
10778 if (ctxt == NULL) return -1;
10779 if (ctxt->sax != &xmlDefaultSAXHandler)
10780 xmlFree(ctxt->sax);
10781 ctxt->sax = sax;
10782 if (user_data != NULL)
10783 ctxt->userData = user_data;
10784
10785 xmlParseDocument(ctxt);
10786
10787 if (ctxt->wellFormed)
10788 ret = 0;
10789 else {
10790 if (ctxt->errNo != 0)
10791 ret = ctxt->errNo;
10792 else
10793 ret = -1;
10794 }
10795 if (sax != NULL)
10796 ctxt->sax = NULL;
10797 xmlFreeParserCtxt(ctxt);
10798
10799 return ret;
10800}
10801
10802/************************************************************************
10803 * *
10804 * Front ends when parsing from memory *
10805 * *
10806 ************************************************************************/
10807
10808/**
10809 * xmlCreateMemoryParserCtxt:
10810 * @buffer: a pointer to a char array
10811 * @size: the size of the array
10812 *
10813 * Create a parser context for an XML in-memory document.
10814 *
10815 * Returns the new parser context or NULL
10816 */
10817xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010818xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010819 xmlParserCtxtPtr ctxt;
10820 xmlParserInputPtr input;
10821 xmlParserInputBufferPtr buf;
10822
10823 if (buffer == NULL)
10824 return(NULL);
10825 if (size <= 0)
10826 return(NULL);
10827
10828 ctxt = xmlNewParserCtxt();
10829 if (ctxt == NULL)
10830 return(NULL);
10831
10832 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010833 if (buf == NULL) {
10834 xmlFreeParserCtxt(ctxt);
10835 return(NULL);
10836 }
Owen Taylor3473f882001-02-23 17:55:21 +000010837
10838 input = xmlNewInputStream(ctxt);
10839 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010840 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010841 xmlFreeParserCtxt(ctxt);
10842 return(NULL);
10843 }
10844
10845 input->filename = NULL;
10846 input->buf = buf;
10847 input->base = input->buf->buffer->content;
10848 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010849 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010850
10851 inputPush(ctxt, input);
10852 return(ctxt);
10853}
10854
10855/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010856 * xmlSAXParseMemoryWithData:
10857 * @sax: the SAX handler block
10858 * @buffer: an pointer to a char array
10859 * @size: the size of the array
10860 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10861 * documents
10862 * @data: the userdata
10863 *
10864 * parse an XML in-memory block and use the given SAX function block
10865 * to handle the parsing callback. If sax is NULL, fallback to the default
10866 * DOM tree building routines.
10867 *
10868 * User data (void *) is stored within the parser context in the
10869 * context's _private member, so it is available nearly everywhere in libxml
10870 *
10871 * Returns the resulting document tree
10872 */
10873
10874xmlDocPtr
10875xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10876 int size, int recovery, void *data) {
10877 xmlDocPtr ret;
10878 xmlParserCtxtPtr ctxt;
10879
10880 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10881 if (ctxt == NULL) return(NULL);
10882 if (sax != NULL) {
10883 if (ctxt->sax != NULL)
10884 xmlFree(ctxt->sax);
10885 ctxt->sax = sax;
10886 }
10887 if (data!=NULL) {
10888 ctxt->_private=data;
10889 }
10890
Daniel Veillardadba5f12003-04-04 16:09:01 +000010891 ctxt->recovery = recovery;
10892
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010893 xmlParseDocument(ctxt);
10894
10895 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10896 else {
10897 ret = NULL;
10898 xmlFreeDoc(ctxt->myDoc);
10899 ctxt->myDoc = NULL;
10900 }
10901 if (sax != NULL)
10902 ctxt->sax = NULL;
10903 xmlFreeParserCtxt(ctxt);
10904
10905 return(ret);
10906}
10907
10908/**
Owen Taylor3473f882001-02-23 17:55:21 +000010909 * xmlSAXParseMemory:
10910 * @sax: the SAX handler block
10911 * @buffer: an pointer to a char array
10912 * @size: the size of the array
10913 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10914 * documents
10915 *
10916 * parse an XML in-memory block and use the given SAX function block
10917 * to handle the parsing callback. If sax is NULL, fallback to the default
10918 * DOM tree building routines.
10919 *
10920 * Returns the resulting document tree
10921 */
10922xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010923xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10924 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010925 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010926}
10927
10928/**
10929 * xmlParseMemory:
10930 * @buffer: an pointer to a char array
10931 * @size: the size of the array
10932 *
10933 * parse an XML in-memory block and build a tree.
10934 *
10935 * Returns the resulting document tree
10936 */
10937
Daniel Veillard50822cb2001-07-26 20:05:51 +000010938xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010939 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10940}
10941
10942/**
10943 * xmlRecoverMemory:
10944 * @buffer: an pointer to a char array
10945 * @size: the size of the array
10946 *
10947 * parse an XML in-memory block and build a tree.
10948 * In the case the document is not Well Formed, a tree is built anyway
10949 *
10950 * Returns the resulting document tree
10951 */
10952
Daniel Veillard50822cb2001-07-26 20:05:51 +000010953xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010954 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10955}
10956
10957/**
10958 * xmlSAXUserParseMemory:
10959 * @sax: a SAX handler
10960 * @user_data: The user data returned on SAX callbacks
10961 * @buffer: an in-memory XML document input
10962 * @size: the length of the XML document in bytes
10963 *
10964 * A better SAX parsing routine.
10965 * parse an XML in-memory buffer and call the given SAX handler routines.
10966 *
10967 * Returns 0 in case of success or a error number otherwise
10968 */
10969int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010970 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010971 int ret = 0;
10972 xmlParserCtxtPtr ctxt;
10973 xmlSAXHandlerPtr oldsax = NULL;
10974
Daniel Veillard9e923512002-08-14 08:48:52 +000010975 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010976 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10977 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010978 oldsax = ctxt->sax;
10979 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010980 if (user_data != NULL)
10981 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010982
10983 xmlParseDocument(ctxt);
10984
10985 if (ctxt->wellFormed)
10986 ret = 0;
10987 else {
10988 if (ctxt->errNo != 0)
10989 ret = ctxt->errNo;
10990 else
10991 ret = -1;
10992 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010993 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010994 xmlFreeParserCtxt(ctxt);
10995
10996 return ret;
10997}
10998
10999/**
11000 * xmlCreateDocParserCtxt:
11001 * @cur: a pointer to an array of xmlChar
11002 *
11003 * Creates a parser context for an XML in-memory document.
11004 *
11005 * Returns the new parser context or NULL
11006 */
11007xmlParserCtxtPtr
11008xmlCreateDocParserCtxt(xmlChar *cur) {
11009 int len;
11010
11011 if (cur == NULL)
11012 return(NULL);
11013 len = xmlStrlen(cur);
11014 return(xmlCreateMemoryParserCtxt((char *)cur, len));
11015}
11016
11017/**
11018 * xmlSAXParseDoc:
11019 * @sax: the SAX handler block
11020 * @cur: a pointer to an array of xmlChar
11021 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11022 * documents
11023 *
11024 * parse an XML in-memory document and build a tree.
11025 * It use the given SAX function block to handle the parsing callback.
11026 * If sax is NULL, fallback to the default DOM tree building routines.
11027 *
11028 * Returns the resulting document tree
11029 */
11030
11031xmlDocPtr
11032xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11033 xmlDocPtr ret;
11034 xmlParserCtxtPtr ctxt;
11035
11036 if (cur == NULL) return(NULL);
11037
11038
11039 ctxt = xmlCreateDocParserCtxt(cur);
11040 if (ctxt == NULL) return(NULL);
11041 if (sax != NULL) {
11042 ctxt->sax = sax;
11043 ctxt->userData = NULL;
11044 }
11045
11046 xmlParseDocument(ctxt);
11047 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11048 else {
11049 ret = NULL;
11050 xmlFreeDoc(ctxt->myDoc);
11051 ctxt->myDoc = NULL;
11052 }
11053 if (sax != NULL)
11054 ctxt->sax = NULL;
11055 xmlFreeParserCtxt(ctxt);
11056
11057 return(ret);
11058}
11059
11060/**
11061 * xmlParseDoc:
11062 * @cur: a pointer to an array of xmlChar
11063 *
11064 * parse an XML in-memory document and build a tree.
11065 *
11066 * Returns the resulting document tree
11067 */
11068
11069xmlDocPtr
11070xmlParseDoc(xmlChar *cur) {
11071 return(xmlSAXParseDoc(NULL, cur, 0));
11072}
11073
Daniel Veillard8107a222002-01-13 14:10:10 +000011074/************************************************************************
11075 * *
11076 * Specific function to keep track of entities references *
11077 * and used by the XSLT debugger *
11078 * *
11079 ************************************************************************/
11080
11081static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11082
11083/**
11084 * xmlAddEntityReference:
11085 * @ent : A valid entity
11086 * @firstNode : A valid first node for children of entity
11087 * @lastNode : A valid last node of children entity
11088 *
11089 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11090 */
11091static void
11092xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11093 xmlNodePtr lastNode)
11094{
11095 if (xmlEntityRefFunc != NULL) {
11096 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11097 }
11098}
11099
11100
11101/**
11102 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011103 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011104 *
11105 * Set the function to call call back when a xml reference has been made
11106 */
11107void
11108xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11109{
11110 xmlEntityRefFunc = func;
11111}
Owen Taylor3473f882001-02-23 17:55:21 +000011112
11113/************************************************************************
11114 * *
11115 * Miscellaneous *
11116 * *
11117 ************************************************************************/
11118
11119#ifdef LIBXML_XPATH_ENABLED
11120#include <libxml/xpath.h>
11121#endif
11122
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011123extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011124static int xmlParserInitialized = 0;
11125
11126/**
11127 * xmlInitParser:
11128 *
11129 * Initialization function for the XML parser.
11130 * This is not reentrant. Call once before processing in case of
11131 * use in multithreaded programs.
11132 */
11133
11134void
11135xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011136 if (xmlParserInitialized != 0)
11137 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011138
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011139 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11140 (xmlGenericError == NULL))
11141 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011142 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011143 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011144 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011145 xmlInitCharEncodingHandlers();
11146 xmlInitializePredefinedEntities();
11147 xmlDefaultSAXHandlerInit();
11148 xmlRegisterDefaultInputCallbacks();
11149 xmlRegisterDefaultOutputCallbacks();
11150#ifdef LIBXML_HTML_ENABLED
11151 htmlInitAutoClose();
11152 htmlDefaultSAXHandlerInit();
11153#endif
11154#ifdef LIBXML_XPATH_ENABLED
11155 xmlXPathInit();
11156#endif
11157 xmlParserInitialized = 1;
11158}
11159
11160/**
11161 * xmlCleanupParser:
11162 *
11163 * Cleanup function for the XML parser. It tries to reclaim all
11164 * parsing related global memory allocated for the parser processing.
11165 * It doesn't deallocate any document related memory. Calling this
11166 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011167 * One should call xmlCleanupParser() only when the process has
11168 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011169 */
11170
11171void
11172xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000011173 xmlCleanupCharEncodingHandlers();
11174 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011175#ifdef LIBXML_CATALOG_ENABLED
11176 xmlCatalogCleanup();
11177#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011178 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011179 xmlCleanupGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011180 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011181}