blob: c1ce81e73a8d402f38330ebbf6704bf5e4db32ee [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard21a0f912001-02-25 19:54:14 +000088#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000089#define XML_PARSER_BUFFER_SIZE 100
90
Daniel Veillard5997aca2002-03-18 18:36:20 +000091#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
92
Owen Taylor3473f882001-02-23 17:55:21 +000093/*
Owen Taylor3473f882001-02-23 17:55:21 +000094 * List of XML prefixed PI allowed by W3C specs
95 */
96
Daniel Veillardb44025c2001-10-11 22:55:55 +000097static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000098 "xml-stylesheet",
99 NULL
100};
101
102/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000103xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
104 const xmlChar **str);
105
Daniel Veillard257d9102001-05-08 10:41:44 +0000106static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000107xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
108 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000109 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000111
Daniel Veillard8107a222002-01-13 14:10:10 +0000112static void
113xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
114 xmlNodePtr lastNode);
115
Daniel Veillard328f48c2002-11-15 15:24:34 +0000116static int
117xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
118 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000119/************************************************************************
120 * *
121 * Parser stacks related functions and macros *
122 * *
123 ************************************************************************/
124
125xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
126 const xmlChar ** str);
127
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128/**
129 * inputPush:
130 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000131 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000132 *
133 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000134 *
135 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000137extern int
138inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
139{
140 if (ctxt->inputNr >= ctxt->inputMax) {
141 ctxt->inputMax *= 2;
142 ctxt->inputTab =
143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
144 ctxt->inputMax *
145 sizeof(ctxt->inputTab[0]));
146 if (ctxt->inputTab == NULL) {
147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
148 return (0);
149 }
150 }
151 ctxt->inputTab[ctxt->inputNr] = value;
152 ctxt->input = value;
153 return (ctxt->inputNr++);
154}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000155/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000156 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000157 * @ctxt: an XML parser context
158 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000159 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000160 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000161 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000163extern xmlParserInputPtr
164inputPop(xmlParserCtxtPtr ctxt)
165{
166 xmlParserInputPtr ret;
167
168 if (ctxt->inputNr <= 0)
169 return (0);
170 ctxt->inputNr--;
171 if (ctxt->inputNr > 0)
172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
173 else
174 ctxt->input = NULL;
175 ret = ctxt->inputTab[ctxt->inputNr];
176 ctxt->inputTab[ctxt->inputNr] = 0;
177 return (ret);
178}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179/**
180 * nodePush:
181 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000182 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 *
184 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000185 *
186 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000187 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000188extern int
189nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
190{
191 if (ctxt->nodeNr >= ctxt->nodeMax) {
192 ctxt->nodeMax *= 2;
193 ctxt->nodeTab =
194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
195 ctxt->nodeMax *
196 sizeof(ctxt->nodeTab[0]));
197 if (ctxt->nodeTab == NULL) {
198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
199 return (0);
200 }
201 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202#ifdef MAX_DEPTH
203 if (ctxt->nodeNr > MAX_DEPTH) {
204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
205 ctxt->sax->error(ctxt->userData,
206 "Excessive depth in document: change MAX_DEPTH = %d\n",
207 MAX_DEPTH);
208 ctxt->wellFormed = 0;
209 ctxt->instate = XML_PARSER_EOF;
210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
211 return(0);
212 }
213#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000214 ctxt->nodeTab[ctxt->nodeNr] = value;
215 ctxt->node = value;
216 return (ctxt->nodeNr++);
217}
218/**
219 * nodePop:
220 * @ctxt: an XML parser context
221 *
222 * Pops the top element node from the node stack
223 *
224 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000226extern xmlNodePtr
227nodePop(xmlParserCtxtPtr ctxt)
228{
229 xmlNodePtr ret;
230
231 if (ctxt->nodeNr <= 0)
232 return (0);
233 ctxt->nodeNr--;
234 if (ctxt->nodeNr > 0)
235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
236 else
237 ctxt->node = NULL;
238 ret = ctxt->nodeTab[ctxt->nodeNr];
239 ctxt->nodeTab[ctxt->nodeNr] = 0;
240 return (ret);
241}
242/**
243 * namePush:
244 * @ctxt: an XML parser context
245 * @value: the element name
246 *
247 * Pushes a new element name on top of the name stack
248 *
249 * Returns 0 in case of error, the index in the stack otherwise
250 */
251extern int
252namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
253{
254 if (ctxt->nameNr >= ctxt->nameMax) {
255 ctxt->nameMax *= 2;
256 ctxt->nameTab =
257 (xmlChar * *)xmlRealloc(ctxt->nameTab,
258 ctxt->nameMax *
259 sizeof(ctxt->nameTab[0]));
260 if (ctxt->nameTab == NULL) {
261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
262 return (0);
263 }
264 }
265 ctxt->nameTab[ctxt->nameNr] = value;
266 ctxt->name = value;
267 return (ctxt->nameNr++);
268}
269/**
270 * namePop:
271 * @ctxt: an XML parser context
272 *
273 * Pops the top element name from the name stack
274 *
275 * Returns the name just removed
276 */
277extern xmlChar *
278namePop(xmlParserCtxtPtr ctxt)
279{
280 xmlChar *ret;
281
282 if (ctxt->nameNr <= 0)
283 return (0);
284 ctxt->nameNr--;
285 if (ctxt->nameNr > 0)
286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
287 else
288 ctxt->name = NULL;
289 ret = ctxt->nameTab[ctxt->nameNr];
290 ctxt->nameTab[ctxt->nameNr] = 0;
291 return (ret);
292}
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000294static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000295 if (ctxt->spaceNr >= ctxt->spaceMax) {
296 ctxt->spaceMax *= 2;
297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
299 if (ctxt->spaceTab == NULL) {
300 xmlGenericError(xmlGenericErrorContext,
301 "realloc failed !\n");
302 return(0);
303 }
304 }
305 ctxt->spaceTab[ctxt->spaceNr] = val;
306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
307 return(ctxt->spaceNr++);
308}
309
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000311 int ret;
312 if (ctxt->spaceNr <= 0) return(0);
313 ctxt->spaceNr--;
314 if (ctxt->spaceNr > 0)
315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
316 else
317 ctxt->space = NULL;
318 ret = ctxt->spaceTab[ctxt->spaceNr];
319 ctxt->spaceTab[ctxt->spaceNr] = -1;
320 return(ret);
321}
322
323/*
324 * Macros for accessing the content. Those should be used only by the parser,
325 * and not exported.
326 *
327 * Dirty macros, i.e. one often need to make assumption on the context to
328 * use them
329 *
330 * CUR_PTR return the current pointer to the xmlChar to be parsed.
331 * To be used with extreme caution since operations consuming
332 * characters may move the input buffer to a different location !
333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
334 * This should be used internally by the parser
335 * only to compare to ASCII values otherwise it would break when
336 * running with UTF-8 encoding.
337 * RAW same as CUR but in the input buffer, bypass any token
338 * extraction that may have been done
339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
340 * to compare on ASCII based substring.
341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +0000342 * strings without newlines within the parser.
343 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
344 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +0000345 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
346 *
347 * NEXT Skip to the next character, this does the proper decoding
348 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +0000349 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +0000350 * CUR_CHAR(l) returns the current unicode character (int), set l
351 * to the number of xmlChars used for the encoding [0-5].
352 * CUR_SCHAR same but operate on a string instead of the context
353 * COPY_BUF copy the current unicode char to the target buffer, increment
354 * the index
355 * GROW, SHRINK handling of input buffers
356 */
357
Daniel Veillardfdc91562002-07-01 21:52:03 +0000358#define RAW (*ctxt->input->cur)
359#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000360#define NXT(val) ctxt->input->cur[(val)]
361#define CUR_PTR ctxt->input->cur
362
363#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000364 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +0000365 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000366 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000367 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
368 xmlPopInput(ctxt); \
369 } while (0)
370
Daniel Veillarda880b122003-04-21 21:36:41 +0000371#define SHRINK if ((ctxt->progressive == 0) && \
372 (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK))\
Daniel Veillard46de64e2002-05-29 08:21:33 +0000373 xmlSHRINK (ctxt);
374
375static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
376 xmlParserInputShrink(ctxt->input);
377 if ((*ctxt->input->cur == 0) &&
378 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
379 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000380 }
Owen Taylor3473f882001-02-23 17:55:21 +0000381
Daniel Veillarda880b122003-04-21 21:36:41 +0000382#define GROW if ((ctxt->progressive == 0) && \
383 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +0000384 xmlGROW (ctxt);
385
386static void xmlGROW (xmlParserCtxtPtr ctxt) {
387 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
388 if ((*ctxt->input->cur == 0) &&
389 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
390 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +0000391}
Owen Taylor3473f882001-02-23 17:55:21 +0000392
393#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
394
395#define NEXT xmlNextChar(ctxt)
396
Daniel Veillard21a0f912001-02-25 19:54:14 +0000397#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000398 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000399 ctxt->input->cur++; \
400 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000401 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000402 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
403 }
404
Owen Taylor3473f882001-02-23 17:55:21 +0000405#define NEXTL(l) do { \
406 if (*(ctxt->input->cur) == '\n') { \
407 ctxt->input->line++; ctxt->input->col = 1; \
408 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000409 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000410 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000411 } while (0)
412
413#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
414#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
415
416#define COPY_BUF(l,b,i,v) \
417 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000418 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000419
420/**
421 * xmlSkipBlankChars:
422 * @ctxt: the XML parser context
423 *
424 * skip all blanks character found at that point in the input streams.
425 * It pops up finished entities in the process if allowable at that point.
426 *
427 * Returns the number of space chars skipped
428 */
429
430int
431xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000432 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000433
434 /*
435 * It's Okay to use CUR/NEXT here since all the blanks are on
436 * the ASCII range.
437 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000438 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
439 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000440 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000441 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000442 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000443 cur = ctxt->input->cur;
444 while (IS_BLANK(*cur)) {
445 if (*cur == '\n') {
446 ctxt->input->line++; ctxt->input->col = 1;
447 }
448 cur++;
449 res++;
450 if (*cur == 0) {
451 ctxt->input->cur = cur;
452 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
453 cur = ctxt->input->cur;
454 }
455 }
456 ctxt->input->cur = cur;
457 } else {
458 int cur;
459 do {
460 cur = CUR;
461 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
462 NEXT;
463 cur = CUR;
464 res++;
465 }
466 while ((cur == 0) && (ctxt->inputNr > 1) &&
467 (ctxt->instate != XML_PARSER_COMMENT)) {
468 xmlPopInput(ctxt);
469 cur = CUR;
470 }
471 /*
472 * Need to handle support of entities branching here
473 */
474 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
475 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
476 }
Owen Taylor3473f882001-02-23 17:55:21 +0000477 return(res);
478}
479
480/************************************************************************
481 * *
482 * Commodity functions to handle entities *
483 * *
484 ************************************************************************/
485
486/**
487 * xmlPopInput:
488 * @ctxt: an XML parser context
489 *
490 * xmlPopInput: the current input pointed by ctxt->input came to an end
491 * pop it and return the next char.
492 *
493 * Returns the current xmlChar in the parser context
494 */
495xmlChar
496xmlPopInput(xmlParserCtxtPtr ctxt) {
497 if (ctxt->inputNr == 1) return(0); /* End of main Input */
498 if (xmlParserDebugEntities)
499 xmlGenericError(xmlGenericErrorContext,
500 "Popping input %d\n", ctxt->inputNr);
501 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000502 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000503 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
504 return(xmlPopInput(ctxt));
505 return(CUR);
506}
507
508/**
509 * xmlPushInput:
510 * @ctxt: an XML parser context
511 * @input: an XML parser input fragment (entity, XML fragment ...).
512 *
513 * xmlPushInput: switch to a new input stream which is stacked on top
514 * of the previous one(s).
515 */
516void
517xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
518 if (input == NULL) return;
519
520 if (xmlParserDebugEntities) {
521 if ((ctxt->input != NULL) && (ctxt->input->filename))
522 xmlGenericError(xmlGenericErrorContext,
523 "%s(%d): ", ctxt->input->filename,
524 ctxt->input->line);
525 xmlGenericError(xmlGenericErrorContext,
526 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
527 }
528 inputPush(ctxt, input);
529 GROW;
530}
531
532/**
533 * xmlParseCharRef:
534 * @ctxt: an XML parser context
535 *
536 * parse Reference declarations
537 *
538 * [66] CharRef ::= '&#' [0-9]+ ';' |
539 * '&#x' [0-9a-fA-F]+ ';'
540 *
541 * [ WFC: Legal Character ]
542 * Characters referred to using character references must match the
543 * production for Char.
544 *
545 * Returns the value parsed (as an int), 0 in case of error
546 */
547int
548xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000549 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000550 int count = 0;
551
Owen Taylor3473f882001-02-23 17:55:21 +0000552 /*
553 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
554 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000555 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000556 (NXT(2) == 'x')) {
557 SKIP(3);
558 GROW;
559 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000560 if (count++ > 20) {
561 count = 0;
562 GROW;
563 }
564 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000565 val = val * 16 + (CUR - '0');
566 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
567 val = val * 16 + (CUR - 'a') + 10;
568 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
569 val = val * 16 + (CUR - 'A') + 10;
570 else {
571 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
573 ctxt->sax->error(ctxt->userData,
574 "xmlParseCharRef: invalid hexadecimal value\n");
575 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000576 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000577 val = 0;
578 break;
579 }
580 NEXT;
581 count++;
582 }
583 if (RAW == ';') {
584 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000585 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000586 ctxt->nbChars ++;
587 ctxt->input->cur++;
588 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000589 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000590 SKIP(2);
591 GROW;
592 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000593 if (count++ > 20) {
594 count = 0;
595 GROW;
596 }
597 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000598 val = val * 10 + (CUR - '0');
599 else {
600 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
602 ctxt->sax->error(ctxt->userData,
603 "xmlParseCharRef: invalid decimal value\n");
604 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000605 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000606 val = 0;
607 break;
608 }
609 NEXT;
610 count++;
611 }
612 if (RAW == ';') {
613 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000614 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000615 ctxt->nbChars ++;
616 ctxt->input->cur++;
617 }
618 } else {
619 ctxt->errNo = XML_ERR_INVALID_CHARREF;
620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
621 ctxt->sax->error(ctxt->userData,
622 "xmlParseCharRef: invalid value\n");
623 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000624 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000625 }
626
627 /*
628 * [ WFC: Legal Character ]
629 * Characters referred to using character references must match the
630 * production for Char.
631 */
632 if (IS_CHAR(val)) {
633 return(val);
634 } else {
635 ctxt->errNo = XML_ERR_INVALID_CHAR;
636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000637 ctxt->sax->error(ctxt->userData,
638 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000639 val);
640 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000641 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000642 }
643 return(0);
644}
645
646/**
647 * xmlParseStringCharRef:
648 * @ctxt: an XML parser context
649 * @str: a pointer to an index in the string
650 *
651 * parse Reference declarations, variant parsing from a string rather
652 * than an an input flow.
653 *
654 * [66] CharRef ::= '&#' [0-9]+ ';' |
655 * '&#x' [0-9a-fA-F]+ ';'
656 *
657 * [ WFC: Legal Character ]
658 * Characters referred to using character references must match the
659 * production for Char.
660 *
661 * Returns the value parsed (as an int), 0 in case of error, str will be
662 * updated to the current value of the index
663 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000664static int
Owen Taylor3473f882001-02-23 17:55:21 +0000665xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
666 const xmlChar *ptr;
667 xmlChar cur;
668 int val = 0;
669
670 if ((str == NULL) || (*str == NULL)) return(0);
671 ptr = *str;
672 cur = *ptr;
673 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
674 ptr += 3;
675 cur = *ptr;
676 while (cur != ';') { /* Non input consuming loop */
677 if ((cur >= '0') && (cur <= '9'))
678 val = val * 16 + (cur - '0');
679 else if ((cur >= 'a') && (cur <= 'f'))
680 val = val * 16 + (cur - 'a') + 10;
681 else if ((cur >= 'A') && (cur <= 'F'))
682 val = val * 16 + (cur - 'A') + 10;
683 else {
684 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
686 ctxt->sax->error(ctxt->userData,
687 "xmlParseStringCharRef: invalid hexadecimal value\n");
688 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000689 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000690 val = 0;
691 break;
692 }
693 ptr++;
694 cur = *ptr;
695 }
696 if (cur == ';')
697 ptr++;
698 } else if ((cur == '&') && (ptr[1] == '#')){
699 ptr += 2;
700 cur = *ptr;
701 while (cur != ';') { /* Non input consuming loops */
702 if ((cur >= '0') && (cur <= '9'))
703 val = val * 10 + (cur - '0');
704 else {
705 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
707 ctxt->sax->error(ctxt->userData,
708 "xmlParseStringCharRef: invalid decimal value\n");
709 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000710 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000711 val = 0;
712 break;
713 }
714 ptr++;
715 cur = *ptr;
716 }
717 if (cur == ';')
718 ptr++;
719 } else {
720 ctxt->errNo = XML_ERR_INVALID_CHARREF;
721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
722 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000723 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000724 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000725 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000726 return(0);
727 }
728 *str = ptr;
729
730 /*
731 * [ WFC: Legal Character ]
732 * Characters referred to using character references must match the
733 * production for Char.
734 */
735 if (IS_CHAR(val)) {
736 return(val);
737 } else {
738 ctxt->errNo = XML_ERR_INVALID_CHAR;
739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
740 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000741 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000742 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000743 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000744 }
745 return(0);
746}
747
748/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000749 * xmlNewBlanksWrapperInputStream:
750 * @ctxt: an XML parser context
751 * @entity: an Entity pointer
752 *
753 * Create a new input stream for wrapping
754 * blanks around a PEReference
755 *
756 * Returns the new input stream or NULL
757 */
758
759static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
760
Daniel Veillardf4862f02002-09-10 11:13:43 +0000761static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000762xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
763 xmlParserInputPtr input;
764 xmlChar *buffer;
765 size_t length;
766 if (entity == NULL) {
767 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
769 ctxt->sax->error(ctxt->userData,
770 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
771 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
772 return(NULL);
773 }
774 if (xmlParserDebugEntities)
775 xmlGenericError(xmlGenericErrorContext,
776 "new blanks wrapper for entity: %s\n", entity->name);
777 input = xmlNewInputStream(ctxt);
778 if (input == NULL) {
779 return(NULL);
780 }
781 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000782 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +0000783 if (buffer == NULL) {
784 return(NULL);
785 }
786 buffer [0] = ' ';
787 buffer [1] = '%';
788 buffer [length-3] = ';';
789 buffer [length-2] = ' ';
790 buffer [length-1] = 0;
791 memcpy(buffer + 2, entity->name, length - 5);
792 input->free = deallocblankswrapper;
793 input->base = buffer;
794 input->cur = buffer;
795 input->length = length;
796 input->end = &buffer[length];
797 return(input);
798}
799
800/**
Owen Taylor3473f882001-02-23 17:55:21 +0000801 * xmlParserHandlePEReference:
802 * @ctxt: the parser context
803 *
804 * [69] PEReference ::= '%' Name ';'
805 *
806 * [ WFC: No Recursion ]
807 * A parsed entity must not contain a recursive
808 * reference to itself, either directly or indirectly.
809 *
810 * [ WFC: Entity Declared ]
811 * In a document without any DTD, a document with only an internal DTD
812 * subset which contains no parameter entity references, or a document
813 * with "standalone='yes'", ... ... The declaration of a parameter
814 * entity must precede any reference to it...
815 *
816 * [ VC: Entity Declared ]
817 * In a document with an external subset or external parameter entities
818 * with "standalone='no'", ... ... The declaration of a parameter entity
819 * must precede any reference to it...
820 *
821 * [ WFC: In DTD ]
822 * Parameter-entity references may only appear in the DTD.
823 * NOTE: misleading but this is handled.
824 *
825 * A PEReference may have been detected in the current input stream
826 * the handling is done accordingly to
827 * http://www.w3.org/TR/REC-xml#entproc
828 * i.e.
829 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000830 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000831 */
832void
833xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
834 xmlChar *name;
835 xmlEntityPtr entity = NULL;
836 xmlParserInputPtr input;
837
Owen Taylor3473f882001-02-23 17:55:21 +0000838 if (RAW != '%') return;
839 switch(ctxt->instate) {
840 case XML_PARSER_CDATA_SECTION:
841 return;
842 case XML_PARSER_COMMENT:
843 return;
844 case XML_PARSER_START_TAG:
845 return;
846 case XML_PARSER_END_TAG:
847 return;
848 case XML_PARSER_EOF:
849 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
851 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
852 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000853 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000854 return;
855 case XML_PARSER_PROLOG:
856 case XML_PARSER_START:
857 case XML_PARSER_MISC:
858 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
860 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
861 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000862 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000863 return;
864 case XML_PARSER_ENTITY_DECL:
865 case XML_PARSER_CONTENT:
866 case XML_PARSER_ATTRIBUTE_VALUE:
867 case XML_PARSER_PI:
868 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000869 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000870 /* we just ignore it there */
871 return;
872 case XML_PARSER_EPILOG:
873 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
875 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
876 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000877 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000878 return;
879 case XML_PARSER_ENTITY_VALUE:
880 /*
881 * NOTE: in the case of entity values, we don't do the
882 * substitution here since we need the literal
883 * entity value to be able to save the internal
884 * subset of the document.
885 * This will be handled by xmlStringDecodeEntities
886 */
887 return;
888 case XML_PARSER_DTD:
889 /*
890 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
891 * In the internal DTD subset, parameter-entity references
892 * can occur only where markup declarations can occur, not
893 * within markup declarations.
894 * In that case this is handled in xmlParseMarkupDecl
895 */
896 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
897 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000898 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
899 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000900 break;
901 case XML_PARSER_IGNORE:
902 return;
903 }
904
905 NEXT;
906 name = xmlParseName(ctxt);
907 if (xmlParserDebugEntities)
908 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000909 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000910 if (name == NULL) {
911 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000913 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000916 } else {
917 if (RAW == ';') {
918 NEXT;
919 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
920 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
921 if (entity == NULL) {
922
923 /*
924 * [ WFC: Entity Declared ]
925 * In a document without any DTD, a document with only an
926 * internal DTD subset which contains no parameter entity
927 * references, or a document with "standalone='yes'", ...
928 * ... The declaration of a parameter entity must precede
929 * any reference to it...
930 */
931 if ((ctxt->standalone == 1) ||
932 ((ctxt->hasExternalSubset == 0) &&
933 (ctxt->hasPErefs == 0))) {
934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
935 ctxt->sax->error(ctxt->userData,
936 "PEReference: %%%s; not found\n", name);
937 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000938 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000939 } else {
940 /*
941 * [ VC: Entity Declared ]
942 * In a document with an external subset or external
943 * parameter entities with "standalone='no'", ...
944 * ... The declaration of a parameter entity must precede
945 * any reference to it...
946 */
947 if ((!ctxt->disableSAX) &&
948 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
949 ctxt->vctxt.error(ctxt->vctxt.userData,
950 "PEReference: %%%s; not found\n", name);
951 } else if ((!ctxt->disableSAX) &&
952 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
953 ctxt->sax->warning(ctxt->userData,
954 "PEReference: %%%s; not found\n", name);
955 ctxt->valid = 0;
956 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000957 } else if (ctxt->input->free != deallocblankswrapper) {
958 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
959 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000960 } else {
961 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
962 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000963 xmlChar start[4];
964 xmlCharEncoding enc;
965
Owen Taylor3473f882001-02-23 17:55:21 +0000966 /*
967 * handle the extra spaces added before and after
968 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000969 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000970 */
971 input = xmlNewEntityInputStream(ctxt, entity);
972 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000973
974 /*
975 * Get the 4 first bytes and decode the charset
976 * if enc != XML_CHAR_ENCODING_NONE
977 * plug some encoding conversion routines.
978 */
979 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000980 if (entity->length >= 4) {
981 start[0] = RAW;
982 start[1] = NXT(1);
983 start[2] = NXT(2);
984 start[3] = NXT(3);
985 enc = xmlDetectCharEncoding(start, 4);
986 if (enc != XML_CHAR_ENCODING_NONE) {
987 xmlSwitchEncoding(ctxt, enc);
988 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000989 }
990
Owen Taylor3473f882001-02-23 17:55:21 +0000991 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
992 (RAW == '<') && (NXT(1) == '?') &&
993 (NXT(2) == 'x') && (NXT(3) == 'm') &&
994 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
995 xmlParseTextDecl(ctxt);
996 }
Owen Taylor3473f882001-02-23 17:55:21 +0000997 } else {
998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
999 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001000 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001001 name);
1002 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001003 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001004 }
1005 }
1006 } else {
1007 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1009 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001010 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001011 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001012 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001013 }
1014 xmlFree(name);
1015 }
1016}
1017
1018/*
1019 * Macro used to grow the current buffer.
1020 */
1021#define growBuffer(buffer) { \
1022 buffer##_size *= 2; \
1023 buffer = (xmlChar *) \
1024 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1025 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001026 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001027 return(NULL); \
1028 } \
1029}
1030
1031/**
1032 * xmlStringDecodeEntities:
1033 * @ctxt: the parser context
1034 * @str: the input string
1035 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1036 * @end: an end marker xmlChar, 0 if none
1037 * @end2: an end marker xmlChar, 0 if none
1038 * @end3: an end marker xmlChar, 0 if none
1039 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001040 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001041 *
1042 * [67] Reference ::= EntityRef | CharRef
1043 *
1044 * [69] PEReference ::= '%' Name ';'
1045 *
1046 * Returns A newly allocated string with the substitution done. The caller
1047 * must deallocate it !
1048 */
1049xmlChar *
1050xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1051 xmlChar end, xmlChar end2, xmlChar end3) {
1052 xmlChar *buffer = NULL;
1053 int buffer_size = 0;
1054
1055 xmlChar *current = NULL;
1056 xmlEntityPtr ent;
1057 int c,l;
1058 int nbchars = 0;
1059
1060 if (str == NULL)
1061 return(NULL);
1062
1063 if (ctxt->depth > 40) {
1064 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1066 ctxt->sax->error(ctxt->userData,
1067 "Detected entity reference loop\n");
1068 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001069 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001070 return(NULL);
1071 }
1072
1073 /*
1074 * allocate a translation buffer.
1075 */
1076 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001077 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001078 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001079 xmlGenericError(xmlGenericErrorContext,
1080 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001081 return(NULL);
1082 }
1083
1084 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001085 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001086 * we are operating on already parsed values.
1087 */
1088 c = CUR_SCHAR(str, l);
1089 while ((c != 0) && (c != end) && /* non input consuming loop */
1090 (c != end2) && (c != end3)) {
1091
1092 if (c == 0) break;
1093 if ((c == '&') && (str[1] == '#')) {
1094 int val = xmlParseStringCharRef(ctxt, &str);
1095 if (val != 0) {
1096 COPY_BUF(0,buffer,nbchars,val);
1097 }
1098 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1099 if (xmlParserDebugEntities)
1100 xmlGenericError(xmlGenericErrorContext,
1101 "String decoding Entity Reference: %.30s\n",
1102 str);
1103 ent = xmlParseStringEntityRef(ctxt, &str);
1104 if ((ent != NULL) &&
1105 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1106 if (ent->content != NULL) {
1107 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1108 } else {
1109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1110 ctxt->sax->error(ctxt->userData,
1111 "internal error entity has no content\n");
1112 }
1113 } else if ((ent != NULL) && (ent->content != NULL)) {
1114 xmlChar *rep;
1115
1116 ctxt->depth++;
1117 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1118 0, 0, 0);
1119 ctxt->depth--;
1120 if (rep != NULL) {
1121 current = rep;
1122 while (*current != 0) { /* non input consuming loop */
1123 buffer[nbchars++] = *current++;
1124 if (nbchars >
1125 buffer_size - XML_PARSER_BUFFER_SIZE) {
1126 growBuffer(buffer);
1127 }
1128 }
1129 xmlFree(rep);
1130 }
1131 } else if (ent != NULL) {
1132 int i = xmlStrlen(ent->name);
1133 const xmlChar *cur = ent->name;
1134
1135 buffer[nbchars++] = '&';
1136 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1137 growBuffer(buffer);
1138 }
1139 for (;i > 0;i--)
1140 buffer[nbchars++] = *cur++;
1141 buffer[nbchars++] = ';';
1142 }
1143 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1144 if (xmlParserDebugEntities)
1145 xmlGenericError(xmlGenericErrorContext,
1146 "String decoding PE Reference: %.30s\n", str);
1147 ent = xmlParseStringPEReference(ctxt, &str);
1148 if (ent != NULL) {
1149 xmlChar *rep;
1150
1151 ctxt->depth++;
1152 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1153 0, 0, 0);
1154 ctxt->depth--;
1155 if (rep != NULL) {
1156 current = rep;
1157 while (*current != 0) { /* non input consuming loop */
1158 buffer[nbchars++] = *current++;
1159 if (nbchars >
1160 buffer_size - XML_PARSER_BUFFER_SIZE) {
1161 growBuffer(buffer);
1162 }
1163 }
1164 xmlFree(rep);
1165 }
1166 }
1167 } else {
1168 COPY_BUF(l,buffer,nbchars,c);
1169 str += l;
1170 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1171 growBuffer(buffer);
1172 }
1173 }
1174 c = CUR_SCHAR(str, l);
1175 }
1176 buffer[nbchars++] = 0;
1177 return(buffer);
1178}
1179
1180
1181/************************************************************************
1182 * *
1183 * Commodity functions to handle xmlChars *
1184 * *
1185 ************************************************************************/
1186
1187/**
1188 * xmlStrndup:
1189 * @cur: the input xmlChar *
1190 * @len: the len of @cur
1191 *
1192 * a strndup for array of xmlChar's
1193 *
1194 * Returns a new xmlChar * or NULL
1195 */
1196xmlChar *
1197xmlStrndup(const xmlChar *cur, int len) {
1198 xmlChar *ret;
1199
1200 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001201 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001202 if (ret == NULL) {
1203 xmlGenericError(xmlGenericErrorContext,
1204 "malloc of %ld byte failed\n",
1205 (len + 1) * (long)sizeof(xmlChar));
1206 return(NULL);
1207 }
1208 memcpy(ret, cur, len * sizeof(xmlChar));
1209 ret[len] = 0;
1210 return(ret);
1211}
1212
1213/**
1214 * xmlStrdup:
1215 * @cur: the input xmlChar *
1216 *
1217 * a strdup for array of xmlChar's. Since they are supposed to be
1218 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1219 * a termination mark of '0'.
1220 *
1221 * Returns a new xmlChar * or NULL
1222 */
1223xmlChar *
1224xmlStrdup(const xmlChar *cur) {
1225 const xmlChar *p = cur;
1226
1227 if (cur == NULL) return(NULL);
1228 while (*p != 0) p++; /* non input consuming */
1229 return(xmlStrndup(cur, p - cur));
1230}
1231
1232/**
1233 * xmlCharStrndup:
1234 * @cur: the input char *
1235 * @len: the len of @cur
1236 *
1237 * a strndup for char's to xmlChar's
1238 *
1239 * Returns a new xmlChar * or NULL
1240 */
1241
1242xmlChar *
1243xmlCharStrndup(const char *cur, int len) {
1244 int i;
1245 xmlChar *ret;
1246
1247 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001248 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001249 if (ret == NULL) {
1250 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1251 (len + 1) * (long)sizeof(xmlChar));
1252 return(NULL);
1253 }
1254 for (i = 0;i < len;i++)
1255 ret[i] = (xmlChar) cur[i];
1256 ret[len] = 0;
1257 return(ret);
1258}
1259
1260/**
1261 * xmlCharStrdup:
1262 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001263 *
1264 * a strdup for char's to xmlChar's
1265 *
1266 * Returns a new xmlChar * or NULL
1267 */
1268
1269xmlChar *
1270xmlCharStrdup(const char *cur) {
1271 const char *p = cur;
1272
1273 if (cur == NULL) return(NULL);
1274 while (*p != '\0') p++; /* non input consuming */
1275 return(xmlCharStrndup(cur, p - cur));
1276}
1277
1278/**
1279 * xmlStrcmp:
1280 * @str1: the first xmlChar *
1281 * @str2: the second xmlChar *
1282 *
1283 * a strcmp for xmlChar's
1284 *
1285 * Returns the integer result of the comparison
1286 */
1287
1288int
1289xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1290 register int tmp;
1291
1292 if (str1 == str2) return(0);
1293 if (str1 == NULL) return(-1);
1294 if (str2 == NULL) return(1);
1295 do {
1296 tmp = *str1++ - *str2;
1297 if (tmp != 0) return(tmp);
1298 } while (*str2++ != 0);
1299 return 0;
1300}
1301
1302/**
1303 * xmlStrEqual:
1304 * @str1: the first xmlChar *
1305 * @str2: the second xmlChar *
1306 *
1307 * Check if both string are equal of have same content
1308 * Should be a bit more readable and faster than xmlStrEqual()
1309 *
1310 * Returns 1 if they are equal, 0 if they are different
1311 */
1312
1313int
1314xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1315 if (str1 == str2) return(1);
1316 if (str1 == NULL) return(0);
1317 if (str2 == NULL) return(0);
1318 do {
1319 if (*str1++ != *str2) return(0);
1320 } while (*str2++);
1321 return(1);
1322}
1323
1324/**
1325 * xmlStrncmp:
1326 * @str1: the first xmlChar *
1327 * @str2: the second xmlChar *
1328 * @len: the max comparison length
1329 *
1330 * a strncmp for xmlChar's
1331 *
1332 * Returns the integer result of the comparison
1333 */
1334
1335int
1336xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1337 register int tmp;
1338
1339 if (len <= 0) return(0);
1340 if (str1 == str2) return(0);
1341 if (str1 == NULL) return(-1);
1342 if (str2 == NULL) return(1);
1343 do {
1344 tmp = *str1++ - *str2;
1345 if (tmp != 0 || --len == 0) return(tmp);
1346 } while (*str2++ != 0);
1347 return 0;
1348}
1349
Daniel Veillardb44025c2001-10-11 22:55:55 +00001350static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001351 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1352 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1353 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1354 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1355 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1356 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1357 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1358 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1359 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1360 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1361 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1362 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1363 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1364 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1365 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1366 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1367 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1368 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1369 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1370 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1371 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1372 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1373 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1374 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1375 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1376 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1377 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1378 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1379 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1380 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1381 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1382 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1383};
1384
1385/**
1386 * xmlStrcasecmp:
1387 * @str1: the first xmlChar *
1388 * @str2: the second xmlChar *
1389 *
1390 * a strcasecmp for xmlChar's
1391 *
1392 * Returns the integer result of the comparison
1393 */
1394
1395int
1396xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1397 register int tmp;
1398
1399 if (str1 == str2) return(0);
1400 if (str1 == NULL) return(-1);
1401 if (str2 == NULL) return(1);
1402 do {
1403 tmp = casemap[*str1++] - casemap[*str2];
1404 if (tmp != 0) return(tmp);
1405 } while (*str2++ != 0);
1406 return 0;
1407}
1408
1409/**
1410 * xmlStrncasecmp:
1411 * @str1: the first xmlChar *
1412 * @str2: the second xmlChar *
1413 * @len: the max comparison length
1414 *
1415 * a strncasecmp for xmlChar's
1416 *
1417 * Returns the integer result of the comparison
1418 */
1419
1420int
1421xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1422 register int tmp;
1423
1424 if (len <= 0) return(0);
1425 if (str1 == str2) return(0);
1426 if (str1 == NULL) return(-1);
1427 if (str2 == NULL) return(1);
1428 do {
1429 tmp = casemap[*str1++] - casemap[*str2];
1430 if (tmp != 0 || --len == 0) return(tmp);
1431 } while (*str2++ != 0);
1432 return 0;
1433}
1434
1435/**
1436 * xmlStrchr:
1437 * @str: the xmlChar * array
1438 * @val: the xmlChar to search
1439 *
1440 * a strchr for xmlChar's
1441 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001442 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001443 */
1444
1445const xmlChar *
1446xmlStrchr(const xmlChar *str, xmlChar val) {
1447 if (str == NULL) return(NULL);
1448 while (*str != 0) { /* non input consuming */
1449 if (*str == val) return((xmlChar *) str);
1450 str++;
1451 }
1452 return(NULL);
1453}
1454
1455/**
1456 * xmlStrstr:
1457 * @str: the xmlChar * array (haystack)
1458 * @val: the xmlChar to search (needle)
1459 *
1460 * a strstr for xmlChar's
1461 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001462 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001463 */
1464
1465const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001466xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001467 int n;
1468
1469 if (str == NULL) return(NULL);
1470 if (val == NULL) return(NULL);
1471 n = xmlStrlen(val);
1472
1473 if (n == 0) return(str);
1474 while (*str != 0) { /* non input consuming */
1475 if (*str == *val) {
1476 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1477 }
1478 str++;
1479 }
1480 return(NULL);
1481}
1482
1483/**
1484 * xmlStrcasestr:
1485 * @str: the xmlChar * array (haystack)
1486 * @val: the xmlChar to search (needle)
1487 *
1488 * a case-ignoring strstr for xmlChar's
1489 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001490 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001491 */
1492
1493const xmlChar *
1494xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1495 int n;
1496
1497 if (str == NULL) return(NULL);
1498 if (val == NULL) return(NULL);
1499 n = xmlStrlen(val);
1500
1501 if (n == 0) return(str);
1502 while (*str != 0) { /* non input consuming */
1503 if (casemap[*str] == casemap[*val])
1504 if (!xmlStrncasecmp(str, val, n)) return(str);
1505 str++;
1506 }
1507 return(NULL);
1508}
1509
1510/**
1511 * xmlStrsub:
1512 * @str: the xmlChar * array (haystack)
1513 * @start: the index of the first char (zero based)
1514 * @len: the length of the substring
1515 *
1516 * Extract a substring of a given string
1517 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001518 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001519 */
1520
1521xmlChar *
1522xmlStrsub(const xmlChar *str, int start, int len) {
1523 int i;
1524
1525 if (str == NULL) return(NULL);
1526 if (start < 0) return(NULL);
1527 if (len < 0) return(NULL);
1528
1529 for (i = 0;i < start;i++) {
1530 if (*str == 0) return(NULL);
1531 str++;
1532 }
1533 if (*str == 0) return(NULL);
1534 return(xmlStrndup(str, len));
1535}
1536
1537/**
1538 * xmlStrlen:
1539 * @str: the xmlChar * array
1540 *
1541 * length of a xmlChar's string
1542 *
1543 * Returns the number of xmlChar contained in the ARRAY.
1544 */
1545
1546int
1547xmlStrlen(const xmlChar *str) {
1548 int len = 0;
1549
1550 if (str == NULL) return(0);
1551 while (*str != 0) { /* non input consuming */
1552 str++;
1553 len++;
1554 }
1555 return(len);
1556}
1557
1558/**
1559 * xmlStrncat:
1560 * @cur: the original xmlChar * array
1561 * @add: the xmlChar * array added
1562 * @len: the length of @add
1563 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001564 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001565 * first bytes of @add.
1566 *
1567 * Returns a new xmlChar *, the original @cur is reallocated if needed
1568 * and should not be freed
1569 */
1570
1571xmlChar *
1572xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1573 int size;
1574 xmlChar *ret;
1575
1576 if ((add == NULL) || (len == 0))
1577 return(cur);
1578 if (cur == NULL)
1579 return(xmlStrndup(add, len));
1580
1581 size = xmlStrlen(cur);
1582 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1583 if (ret == NULL) {
1584 xmlGenericError(xmlGenericErrorContext,
1585 "xmlStrncat: realloc of %ld byte failed\n",
1586 (size + len + 1) * (long)sizeof(xmlChar));
1587 return(cur);
1588 }
1589 memcpy(&ret[size], add, len * sizeof(xmlChar));
1590 ret[size + len] = 0;
1591 return(ret);
1592}
1593
1594/**
1595 * xmlStrcat:
1596 * @cur: the original xmlChar * array
1597 * @add: the xmlChar * array added
1598 *
1599 * a strcat for array of xmlChar's. Since they are supposed to be
1600 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1601 * a termination mark of '0'.
1602 *
1603 * Returns a new xmlChar * containing the concatenated string.
1604 */
1605xmlChar *
1606xmlStrcat(xmlChar *cur, const xmlChar *add) {
1607 const xmlChar *p = add;
1608
1609 if (add == NULL) return(cur);
1610 if (cur == NULL)
1611 return(xmlStrdup(add));
1612
1613 while (*p != 0) p++; /* non input consuming */
1614 return(xmlStrncat(cur, add, p - add));
1615}
1616
1617/************************************************************************
1618 * *
1619 * Commodity functions, cleanup needed ? *
1620 * *
1621 ************************************************************************/
1622
1623/**
1624 * areBlanks:
1625 * @ctxt: an XML parser context
1626 * @str: a xmlChar *
1627 * @len: the size of @str
1628 *
1629 * Is this a sequence of blank chars that one can ignore ?
1630 *
1631 * Returns 1 if ignorable 0 otherwise.
1632 */
1633
1634static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1635 int i, ret;
1636 xmlNodePtr lastChild;
1637
Daniel Veillard05c13a22001-09-09 08:38:09 +00001638 /*
1639 * Don't spend time trying to differentiate them, the same callback is
1640 * used !
1641 */
1642 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001643 return(0);
1644
Owen Taylor3473f882001-02-23 17:55:21 +00001645 /*
1646 * Check for xml:space value.
1647 */
1648 if (*(ctxt->space) == 1)
1649 return(0);
1650
1651 /*
1652 * Check that the string is made of blanks
1653 */
1654 for (i = 0;i < len;i++)
1655 if (!(IS_BLANK(str[i]))) return(0);
1656
1657 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001658 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001659 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001660 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001661 if (ctxt->myDoc != NULL) {
1662 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1663 if (ret == 0) return(1);
1664 if (ret == 1) return(0);
1665 }
1666
1667 /*
1668 * Otherwise, heuristic :-\
1669 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001671 if ((ctxt->node->children == NULL) &&
1672 (RAW == '<') && (NXT(1) == '/')) return(0);
1673
1674 lastChild = xmlGetLastChild(ctxt->node);
1675 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001676 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1677 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001678 } else if (xmlNodeIsText(lastChild))
1679 return(0);
1680 else if ((ctxt->node->children != NULL) &&
1681 (xmlNodeIsText(ctxt->node->children)))
1682 return(0);
1683 return(1);
1684}
1685
Owen Taylor3473f882001-02-23 17:55:21 +00001686/************************************************************************
1687 * *
1688 * Extra stuff for namespace support *
1689 * Relates to http://www.w3.org/TR/WD-xml-names *
1690 * *
1691 ************************************************************************/
1692
1693/**
1694 * xmlSplitQName:
1695 * @ctxt: an XML parser context
1696 * @name: an XML parser context
1697 * @prefix: a xmlChar **
1698 *
1699 * parse an UTF8 encoded XML qualified name string
1700 *
1701 * [NS 5] QName ::= (Prefix ':')? LocalPart
1702 *
1703 * [NS 6] Prefix ::= NCName
1704 *
1705 * [NS 7] LocalPart ::= NCName
1706 *
1707 * Returns the local part, and prefix is updated
1708 * to get the Prefix if any.
1709 */
1710
1711xmlChar *
1712xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1713 xmlChar buf[XML_MAX_NAMELEN + 5];
1714 xmlChar *buffer = NULL;
1715 int len = 0;
1716 int max = XML_MAX_NAMELEN;
1717 xmlChar *ret = NULL;
1718 const xmlChar *cur = name;
1719 int c;
1720
1721 *prefix = NULL;
1722
1723#ifndef XML_XML_NAMESPACE
1724 /* xml: prefix is not really a namespace */
1725 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1726 (cur[2] == 'l') && (cur[3] == ':'))
1727 return(xmlStrdup(name));
1728#endif
1729
1730 /* nasty but valid */
1731 if (cur[0] == ':')
1732 return(xmlStrdup(name));
1733
1734 c = *cur++;
1735 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1736 buf[len++] = c;
1737 c = *cur++;
1738 }
1739 if (len >= max) {
1740 /*
1741 * Okay someone managed to make a huge name, so he's ready to pay
1742 * for the processing speed.
1743 */
1744 max = len * 2;
1745
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001746 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001747 if (buffer == NULL) {
1748 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1749 ctxt->sax->error(ctxt->userData,
1750 "xmlSplitQName: out of memory\n");
1751 return(NULL);
1752 }
1753 memcpy(buffer, buf, len);
1754 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1755 if (len + 10 > max) {
1756 max *= 2;
1757 buffer = (xmlChar *) xmlRealloc(buffer,
1758 max * sizeof(xmlChar));
1759 if (buffer == NULL) {
1760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1761 ctxt->sax->error(ctxt->userData,
1762 "xmlSplitQName: out of memory\n");
1763 return(NULL);
1764 }
1765 }
1766 buffer[len++] = c;
1767 c = *cur++;
1768 }
1769 buffer[len] = 0;
1770 }
1771
1772 if (buffer == NULL)
1773 ret = xmlStrndup(buf, len);
1774 else {
1775 ret = buffer;
1776 buffer = NULL;
1777 max = XML_MAX_NAMELEN;
1778 }
1779
1780
1781 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001782 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001783 if (c == 0) return(ret);
1784 *prefix = ret;
1785 len = 0;
1786
Daniel Veillardbb284f42002-10-16 18:02:47 +00001787 /*
1788 * Check that the first character is proper to start
1789 * a new name
1790 */
1791 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1792 ((c >= 0x41) && (c <= 0x5A)) ||
1793 (c == '_') || (c == ':'))) {
1794 int l;
1795 int first = CUR_SCHAR(cur, l);
1796
1797 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001798 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1799 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001800 ctxt->sax->error(ctxt->userData,
1801 "Name %s is not XML Namespace compliant\n",
1802 name);
1803 }
1804 }
1805 cur++;
1806
Owen Taylor3473f882001-02-23 17:55:21 +00001807 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1808 buf[len++] = c;
1809 c = *cur++;
1810 }
1811 if (len >= max) {
1812 /*
1813 * Okay someone managed to make a huge name, so he's ready to pay
1814 * for the processing speed.
1815 */
1816 max = len * 2;
1817
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001818 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001819 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001820 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1821 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001822 ctxt->sax->error(ctxt->userData,
1823 "xmlSplitQName: out of memory\n");
1824 return(NULL);
1825 }
1826 memcpy(buffer, buf, len);
1827 while (c != 0) { /* tested bigname2.xml */
1828 if (len + 10 > max) {
1829 max *= 2;
1830 buffer = (xmlChar *) xmlRealloc(buffer,
1831 max * sizeof(xmlChar));
1832 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001833 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1834 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001835 ctxt->sax->error(ctxt->userData,
1836 "xmlSplitQName: out of memory\n");
1837 return(NULL);
1838 }
1839 }
1840 buffer[len++] = c;
1841 c = *cur++;
1842 }
1843 buffer[len] = 0;
1844 }
1845
1846 if (buffer == NULL)
1847 ret = xmlStrndup(buf, len);
1848 else {
1849 ret = buffer;
1850 }
1851 }
1852
1853 return(ret);
1854}
1855
1856/************************************************************************
1857 * *
1858 * The parser itself *
1859 * Relates to http://www.w3.org/TR/REC-xml *
1860 * *
1861 ************************************************************************/
1862
Daniel Veillard76d66f42001-05-16 21:05:17 +00001863static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001864/**
1865 * xmlParseName:
1866 * @ctxt: an XML parser context
1867 *
1868 * parse an XML name.
1869 *
1870 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1871 * CombiningChar | Extender
1872 *
1873 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1874 *
1875 * [6] Names ::= Name (S Name)*
1876 *
1877 * Returns the Name parsed or NULL
1878 */
1879
1880xmlChar *
1881xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001882 const xmlChar *in;
1883 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001884 int count = 0;
1885
1886 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001887
1888 /*
1889 * Accelerator for simple ASCII names
1890 */
1891 in = ctxt->input->cur;
1892 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1893 ((*in >= 0x41) && (*in <= 0x5A)) ||
1894 (*in == '_') || (*in == ':')) {
1895 in++;
1896 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1897 ((*in >= 0x41) && (*in <= 0x5A)) ||
1898 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001899 (*in == '_') || (*in == '-') ||
1900 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001901 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001902 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001903 count = in - ctxt->input->cur;
1904 ret = xmlStrndup(ctxt->input->cur, count);
1905 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001906 ctxt->nbChars += count;
1907 ctxt->input->col += count;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001908 return(ret);
1909 }
1910 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001911 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001912}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001913
Daniel Veillard46de64e2002-05-29 08:21:33 +00001914/**
1915 * xmlParseNameAndCompare:
1916 * @ctxt: an XML parser context
1917 *
1918 * parse an XML name and compares for match
1919 * (specialized for endtag parsing)
1920 *
1921 *
1922 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1923 * and the name for mismatch
1924 */
1925
Daniel Veillardf4862f02002-09-10 11:13:43 +00001926static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001927xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1928 const xmlChar *cmp = other;
1929 const xmlChar *in;
1930 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001931
1932 GROW;
1933
1934 in = ctxt->input->cur;
1935 while (*in != 0 && *in == *cmp) {
1936 ++in;
1937 ++cmp;
1938 }
1939 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1940 /* success */
1941 ctxt->input->cur = in;
1942 return (xmlChar*) 1;
1943 }
1944 /* failure (or end of input buffer), check with full function */
1945 ret = xmlParseName (ctxt);
1946 if (ret != 0 && xmlStrEqual (ret, other)) {
1947 xmlFree (ret);
1948 return (xmlChar*) 1;
1949 }
1950 return ret;
1951}
1952
Daniel Veillard76d66f42001-05-16 21:05:17 +00001953static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001954xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1955 xmlChar buf[XML_MAX_NAMELEN + 5];
1956 int len = 0, l;
1957 int c;
1958 int count = 0;
1959
1960 /*
1961 * Handler for more complex cases
1962 */
1963 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001964 c = CUR_CHAR(l);
1965 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1966 (!IS_LETTER(c) && (c != '_') &&
1967 (c != ':'))) {
1968 return(NULL);
1969 }
1970
1971 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1972 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1973 (c == '.') || (c == '-') ||
1974 (c == '_') || (c == ':') ||
1975 (IS_COMBINING(c)) ||
1976 (IS_EXTENDER(c)))) {
1977 if (count++ > 100) {
1978 count = 0;
1979 GROW;
1980 }
1981 COPY_BUF(l,buf,len,c);
1982 NEXTL(l);
1983 c = CUR_CHAR(l);
1984 if (len >= XML_MAX_NAMELEN) {
1985 /*
1986 * Okay someone managed to make a huge name, so he's ready to pay
1987 * for the processing speed.
1988 */
1989 xmlChar *buffer;
1990 int max = len * 2;
1991
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001992 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001993 if (buffer == NULL) {
1994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1995 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001996 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001997 return(NULL);
1998 }
1999 memcpy(buffer, buf, len);
2000 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
2001 (c == '.') || (c == '-') ||
2002 (c == '_') || (c == ':') ||
2003 (IS_COMBINING(c)) ||
2004 (IS_EXTENDER(c))) {
2005 if (count++ > 100) {
2006 count = 0;
2007 GROW;
2008 }
2009 if (len + 10 > max) {
2010 max *= 2;
2011 buffer = (xmlChar *) xmlRealloc(buffer,
2012 max * sizeof(xmlChar));
2013 if (buffer == NULL) {
2014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2015 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00002016 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002017 return(NULL);
2018 }
2019 }
2020 COPY_BUF(l,buffer,len,c);
2021 NEXTL(l);
2022 c = CUR_CHAR(l);
2023 }
2024 buffer[len] = 0;
2025 return(buffer);
2026 }
2027 }
2028 return(xmlStrndup(buf, len));
2029}
2030
2031/**
2032 * xmlParseStringName:
2033 * @ctxt: an XML parser context
2034 * @str: a pointer to the string pointer (IN/OUT)
2035 *
2036 * parse an XML name.
2037 *
2038 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2039 * CombiningChar | Extender
2040 *
2041 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2042 *
2043 * [6] Names ::= Name (S Name)*
2044 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002045 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002046 * is updated to the current location in the string.
2047 */
2048
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002049static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002050xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2051 xmlChar buf[XML_MAX_NAMELEN + 5];
2052 const xmlChar *cur = *str;
2053 int len = 0, l;
2054 int c;
2055
2056 c = CUR_SCHAR(cur, l);
2057 if (!IS_LETTER(c) && (c != '_') &&
2058 (c != ':')) {
2059 return(NULL);
2060 }
2061
2062 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2063 (c == '.') || (c == '-') ||
2064 (c == '_') || (c == ':') ||
2065 (IS_COMBINING(c)) ||
2066 (IS_EXTENDER(c))) {
2067 COPY_BUF(l,buf,len,c);
2068 cur += l;
2069 c = CUR_SCHAR(cur, l);
2070 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2071 /*
2072 * Okay someone managed to make a huge name, so he's ready to pay
2073 * for the processing speed.
2074 */
2075 xmlChar *buffer;
2076 int max = len * 2;
2077
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002078 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002079 if (buffer == NULL) {
2080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2081 ctxt->sax->error(ctxt->userData,
2082 "xmlParseStringName: out of memory\n");
2083 return(NULL);
2084 }
2085 memcpy(buffer, buf, len);
2086 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2087 (c == '.') || (c == '-') ||
2088 (c == '_') || (c == ':') ||
2089 (IS_COMBINING(c)) ||
2090 (IS_EXTENDER(c))) {
2091 if (len + 10 > max) {
2092 max *= 2;
2093 buffer = (xmlChar *) xmlRealloc(buffer,
2094 max * sizeof(xmlChar));
2095 if (buffer == NULL) {
2096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2097 ctxt->sax->error(ctxt->userData,
2098 "xmlParseStringName: out of memory\n");
2099 return(NULL);
2100 }
2101 }
2102 COPY_BUF(l,buffer,len,c);
2103 cur += l;
2104 c = CUR_SCHAR(cur, l);
2105 }
2106 buffer[len] = 0;
2107 *str = cur;
2108 return(buffer);
2109 }
2110 }
2111 *str = cur;
2112 return(xmlStrndup(buf, len));
2113}
2114
2115/**
2116 * xmlParseNmtoken:
2117 * @ctxt: an XML parser context
2118 *
2119 * parse an XML Nmtoken.
2120 *
2121 * [7] Nmtoken ::= (NameChar)+
2122 *
2123 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2124 *
2125 * Returns the Nmtoken parsed or NULL
2126 */
2127
2128xmlChar *
2129xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2130 xmlChar buf[XML_MAX_NAMELEN + 5];
2131 int len = 0, l;
2132 int c;
2133 int count = 0;
2134
2135 GROW;
2136 c = CUR_CHAR(l);
2137
2138 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2139 (c == '.') || (c == '-') ||
2140 (c == '_') || (c == ':') ||
2141 (IS_COMBINING(c)) ||
2142 (IS_EXTENDER(c))) {
2143 if (count++ > 100) {
2144 count = 0;
2145 GROW;
2146 }
2147 COPY_BUF(l,buf,len,c);
2148 NEXTL(l);
2149 c = CUR_CHAR(l);
2150 if (len >= XML_MAX_NAMELEN) {
2151 /*
2152 * Okay someone managed to make a huge token, so he's ready to pay
2153 * for the processing speed.
2154 */
2155 xmlChar *buffer;
2156 int max = len * 2;
2157
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002158 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002159 if (buffer == NULL) {
2160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2161 ctxt->sax->error(ctxt->userData,
2162 "xmlParseNmtoken: out of memory\n");
2163 return(NULL);
2164 }
2165 memcpy(buffer, buf, len);
2166 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2167 (c == '.') || (c == '-') ||
2168 (c == '_') || (c == ':') ||
2169 (IS_COMBINING(c)) ||
2170 (IS_EXTENDER(c))) {
2171 if (count++ > 100) {
2172 count = 0;
2173 GROW;
2174 }
2175 if (len + 10 > max) {
2176 max *= 2;
2177 buffer = (xmlChar *) xmlRealloc(buffer,
2178 max * sizeof(xmlChar));
2179 if (buffer == NULL) {
2180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2181 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002182 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002183 return(NULL);
2184 }
2185 }
2186 COPY_BUF(l,buffer,len,c);
2187 NEXTL(l);
2188 c = CUR_CHAR(l);
2189 }
2190 buffer[len] = 0;
2191 return(buffer);
2192 }
2193 }
2194 if (len == 0)
2195 return(NULL);
2196 return(xmlStrndup(buf, len));
2197}
2198
2199/**
2200 * xmlParseEntityValue:
2201 * @ctxt: an XML parser context
2202 * @orig: if non-NULL store a copy of the original entity value
2203 *
2204 * parse a value for ENTITY declarations
2205 *
2206 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2207 * "'" ([^%&'] | PEReference | Reference)* "'"
2208 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002209 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002210 */
2211
2212xmlChar *
2213xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2214 xmlChar *buf = NULL;
2215 int len = 0;
2216 int size = XML_PARSER_BUFFER_SIZE;
2217 int c, l;
2218 xmlChar stop;
2219 xmlChar *ret = NULL;
2220 const xmlChar *cur = NULL;
2221 xmlParserInputPtr input;
2222
2223 if (RAW == '"') stop = '"';
2224 else if (RAW == '\'') stop = '\'';
2225 else {
2226 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2228 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2229 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002230 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002231 return(NULL);
2232 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002233 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002234 if (buf == NULL) {
2235 xmlGenericError(xmlGenericErrorContext,
2236 "malloc of %d byte failed\n", size);
2237 return(NULL);
2238 }
2239
2240 /*
2241 * The content of the entity definition is copied in a buffer.
2242 */
2243
2244 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2245 input = ctxt->input;
2246 GROW;
2247 NEXT;
2248 c = CUR_CHAR(l);
2249 /*
2250 * NOTE: 4.4.5 Included in Literal
2251 * When a parameter entity reference appears in a literal entity
2252 * value, ... a single or double quote character in the replacement
2253 * text is always treated as a normal data character and will not
2254 * terminate the literal.
2255 * In practice it means we stop the loop only when back at parsing
2256 * the initial entity and the quote is found
2257 */
2258 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2259 (ctxt->input != input))) {
2260 if (len + 5 >= size) {
2261 size *= 2;
2262 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2263 if (buf == NULL) {
2264 xmlGenericError(xmlGenericErrorContext,
2265 "realloc of %d byte failed\n", size);
2266 return(NULL);
2267 }
2268 }
2269 COPY_BUF(l,buf,len,c);
2270 NEXTL(l);
2271 /*
2272 * Pop-up of finished entities.
2273 */
2274 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2275 xmlPopInput(ctxt);
2276
2277 GROW;
2278 c = CUR_CHAR(l);
2279 if (c == 0) {
2280 GROW;
2281 c = CUR_CHAR(l);
2282 }
2283 }
2284 buf[len] = 0;
2285
2286 /*
2287 * Raise problem w.r.t. '&' and '%' being used in non-entities
2288 * reference constructs. Note Charref will be handled in
2289 * xmlStringDecodeEntities()
2290 */
2291 cur = buf;
2292 while (*cur != 0) { /* non input consuming */
2293 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2294 xmlChar *name;
2295 xmlChar tmp = *cur;
2296
2297 cur++;
2298 name = xmlParseStringName(ctxt, &cur);
2299 if ((name == NULL) || (*cur != ';')) {
2300 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2302 ctxt->sax->error(ctxt->userData,
2303 "EntityValue: '%c' forbidden except for entities references\n",
2304 tmp);
2305 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002306 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002307 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002308 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2309 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002310 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2312 ctxt->sax->error(ctxt->userData,
2313 "EntityValue: PEReferences forbidden in internal subset\n",
2314 tmp);
2315 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002316 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002317 }
2318 if (name != NULL)
2319 xmlFree(name);
2320 }
2321 cur++;
2322 }
2323
2324 /*
2325 * Then PEReference entities are substituted.
2326 */
2327 if (c != stop) {
2328 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2330 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2331 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002332 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002333 xmlFree(buf);
2334 } else {
2335 NEXT;
2336 /*
2337 * NOTE: 4.4.7 Bypassed
2338 * When a general entity reference appears in the EntityValue in
2339 * an entity declaration, it is bypassed and left as is.
2340 * so XML_SUBSTITUTE_REF is not set here.
2341 */
2342 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2343 0, 0, 0);
2344 if (orig != NULL)
2345 *orig = buf;
2346 else
2347 xmlFree(buf);
2348 }
2349
2350 return(ret);
2351}
2352
2353/**
2354 * xmlParseAttValue:
2355 * @ctxt: an XML parser context
2356 *
2357 * parse a value for an attribute
2358 * Note: the parser won't do substitution of entities here, this
2359 * will be handled later in xmlStringGetNodeList
2360 *
2361 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2362 * "'" ([^<&'] | Reference)* "'"
2363 *
2364 * 3.3.3 Attribute-Value Normalization:
2365 * Before the value of an attribute is passed to the application or
2366 * checked for validity, the XML processor must normalize it as follows:
2367 * - a character reference is processed by appending the referenced
2368 * character to the attribute value
2369 * - an entity reference is processed by recursively processing the
2370 * replacement text of the entity
2371 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2372 * appending #x20 to the normalized value, except that only a single
2373 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2374 * parsed entity or the literal entity value of an internal parsed entity
2375 * - other characters are processed by appending them to the normalized value
2376 * If the declared value is not CDATA, then the XML processor must further
2377 * process the normalized attribute value by discarding any leading and
2378 * trailing space (#x20) characters, and by replacing sequences of space
2379 * (#x20) characters by a single space (#x20) character.
2380 * All attributes for which no declaration has been read should be treated
2381 * by a non-validating parser as if declared CDATA.
2382 *
2383 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2384 */
2385
2386xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002387xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2388
2389xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002390xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2391 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002392 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002393 xmlChar *ret = NULL;
2394 SHRINK;
2395 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002396 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002397 if (*in != '"' && *in != '\'') {
2398 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2399 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2400 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2401 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002402 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002403 return(NULL);
2404 }
2405 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2406 limit = *in;
2407 ++in;
2408
2409 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2410 *in != '&' && *in != '<'
2411 ) {
2412 ++in;
2413 }
2414 if (*in != limit) {
2415 return xmlParseAttValueComplex(ctxt);
2416 }
2417 ++in;
2418 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2419 CUR_PTR = in;
2420 return ret;
2421}
2422
Daniel Veillard01c13b52002-12-10 15:19:08 +00002423/**
2424 * xmlParseAttValueComplex:
2425 * @ctxt: an XML parser context
2426 *
2427 * parse a value for an attribute, this is the fallback function
2428 * of xmlParseAttValue() when the attribute parsing requires handling
2429 * of non-ASCII characters.
2430 *
2431 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2432 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002433xmlChar *
2434xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2435 xmlChar limit = 0;
2436 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002437 int len = 0;
2438 int buf_size = 0;
2439 int c, l;
2440 xmlChar *current = NULL;
2441 xmlEntityPtr ent;
2442
2443
2444 SHRINK;
2445 if (NXT(0) == '"') {
2446 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2447 limit = '"';
2448 NEXT;
2449 } else if (NXT(0) == '\'') {
2450 limit = '\'';
2451 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2452 NEXT;
2453 } else {
2454 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2456 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2457 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002458 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002459 return(NULL);
2460 }
2461
2462 /*
2463 * allocate a translation buffer.
2464 */
2465 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002466 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002467 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002468 xmlGenericError(xmlGenericErrorContext,
2469 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002470 return(NULL);
2471 }
2472
2473 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002474 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002475 */
2476 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002477 while ((NXT(0) != limit) && /* checked */
2478 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002479 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002480 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002481 if (NXT(1) == '#') {
2482 int val = xmlParseCharRef(ctxt);
2483 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002484 if (ctxt->replaceEntities) {
2485 if (len > buf_size - 10) {
2486 growBuffer(buf);
2487 }
2488 buf[len++] = '&';
2489 } else {
2490 /*
2491 * The reparsing will be done in xmlStringGetNodeList()
2492 * called by the attribute() function in SAX.c
2493 */
2494 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002495
Daniel Veillard319a7422001-09-11 09:27:09 +00002496 if (len > buf_size - 10) {
2497 growBuffer(buf);
2498 }
2499 current = &buffer[0];
2500 while (*current != 0) { /* non input consuming */
2501 buf[len++] = *current++;
2502 }
Owen Taylor3473f882001-02-23 17:55:21 +00002503 }
2504 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002505 if (len > buf_size - 10) {
2506 growBuffer(buf);
2507 }
Owen Taylor3473f882001-02-23 17:55:21 +00002508 len += xmlCopyChar(0, &buf[len], val);
2509 }
2510 } else {
2511 ent = xmlParseEntityRef(ctxt);
2512 if ((ent != NULL) &&
2513 (ctxt->replaceEntities != 0)) {
2514 xmlChar *rep;
2515
2516 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2517 rep = xmlStringDecodeEntities(ctxt, ent->content,
2518 XML_SUBSTITUTE_REF, 0, 0, 0);
2519 if (rep != NULL) {
2520 current = rep;
2521 while (*current != 0) { /* non input consuming */
2522 buf[len++] = *current++;
2523 if (len > buf_size - 10) {
2524 growBuffer(buf);
2525 }
2526 }
2527 xmlFree(rep);
2528 }
2529 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002530 if (len > buf_size - 10) {
2531 growBuffer(buf);
2532 }
Owen Taylor3473f882001-02-23 17:55:21 +00002533 if (ent->content != NULL)
2534 buf[len++] = ent->content[0];
2535 }
2536 } else if (ent != NULL) {
2537 int i = xmlStrlen(ent->name);
2538 const xmlChar *cur = ent->name;
2539
2540 /*
2541 * This may look absurd but is needed to detect
2542 * entities problems
2543 */
2544 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2545 (ent->content != NULL)) {
2546 xmlChar *rep;
2547 rep = xmlStringDecodeEntities(ctxt, ent->content,
2548 XML_SUBSTITUTE_REF, 0, 0, 0);
2549 if (rep != NULL)
2550 xmlFree(rep);
2551 }
2552
2553 /*
2554 * Just output the reference
2555 */
2556 buf[len++] = '&';
2557 if (len > buf_size - i - 10) {
2558 growBuffer(buf);
2559 }
2560 for (;i > 0;i--)
2561 buf[len++] = *cur++;
2562 buf[len++] = ';';
2563 }
2564 }
2565 } else {
2566 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2567 COPY_BUF(l,buf,len,0x20);
2568 if (len > buf_size - 10) {
2569 growBuffer(buf);
2570 }
2571 } else {
2572 COPY_BUF(l,buf,len,c);
2573 if (len > buf_size - 10) {
2574 growBuffer(buf);
2575 }
2576 }
2577 NEXTL(l);
2578 }
2579 GROW;
2580 c = CUR_CHAR(l);
2581 }
2582 buf[len++] = 0;
2583 if (RAW == '<') {
2584 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2586 ctxt->sax->error(ctxt->userData,
2587 "Unescaped '<' not allowed in attributes values\n");
2588 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002589 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002590 } else if (RAW != limit) {
2591 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2593 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2594 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002595 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002596 } else
2597 NEXT;
2598 return(buf);
2599}
2600
2601/**
2602 * xmlParseSystemLiteral:
2603 * @ctxt: an XML parser context
2604 *
2605 * parse an XML Literal
2606 *
2607 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2608 *
2609 * Returns the SystemLiteral parsed or NULL
2610 */
2611
2612xmlChar *
2613xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2614 xmlChar *buf = NULL;
2615 int len = 0;
2616 int size = XML_PARSER_BUFFER_SIZE;
2617 int cur, l;
2618 xmlChar stop;
2619 int state = ctxt->instate;
2620 int count = 0;
2621
2622 SHRINK;
2623 if (RAW == '"') {
2624 NEXT;
2625 stop = '"';
2626 } else if (RAW == '\'') {
2627 NEXT;
2628 stop = '\'';
2629 } else {
2630 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2632 ctxt->sax->error(ctxt->userData,
2633 "SystemLiteral \" or ' expected\n");
2634 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002635 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002636 return(NULL);
2637 }
2638
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002639 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002640 if (buf == NULL) {
2641 xmlGenericError(xmlGenericErrorContext,
2642 "malloc of %d byte failed\n", size);
2643 return(NULL);
2644 }
2645 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2646 cur = CUR_CHAR(l);
2647 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2648 if (len + 5 >= size) {
2649 size *= 2;
2650 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2651 if (buf == NULL) {
2652 xmlGenericError(xmlGenericErrorContext,
2653 "realloc of %d byte failed\n", size);
2654 ctxt->instate = (xmlParserInputState) state;
2655 return(NULL);
2656 }
2657 }
2658 count++;
2659 if (count > 50) {
2660 GROW;
2661 count = 0;
2662 }
2663 COPY_BUF(l,buf,len,cur);
2664 NEXTL(l);
2665 cur = CUR_CHAR(l);
2666 if (cur == 0) {
2667 GROW;
2668 SHRINK;
2669 cur = CUR_CHAR(l);
2670 }
2671 }
2672 buf[len] = 0;
2673 ctxt->instate = (xmlParserInputState) state;
2674 if (!IS_CHAR(cur)) {
2675 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2677 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2678 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002679 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002680 } else {
2681 NEXT;
2682 }
2683 return(buf);
2684}
2685
2686/**
2687 * xmlParsePubidLiteral:
2688 * @ctxt: an XML parser context
2689 *
2690 * parse an XML public literal
2691 *
2692 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2693 *
2694 * Returns the PubidLiteral parsed or NULL.
2695 */
2696
2697xmlChar *
2698xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2699 xmlChar *buf = NULL;
2700 int len = 0;
2701 int size = XML_PARSER_BUFFER_SIZE;
2702 xmlChar cur;
2703 xmlChar stop;
2704 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002705 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002706
2707 SHRINK;
2708 if (RAW == '"') {
2709 NEXT;
2710 stop = '"';
2711 } else if (RAW == '\'') {
2712 NEXT;
2713 stop = '\'';
2714 } else {
2715 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2717 ctxt->sax->error(ctxt->userData,
2718 "SystemLiteral \" or ' expected\n");
2719 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002720 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002721 return(NULL);
2722 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002723 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002724 if (buf == NULL) {
2725 xmlGenericError(xmlGenericErrorContext,
2726 "malloc of %d byte failed\n", size);
2727 return(NULL);
2728 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002729 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002730 cur = CUR;
2731 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2732 if (len + 1 >= size) {
2733 size *= 2;
2734 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2735 if (buf == NULL) {
2736 xmlGenericError(xmlGenericErrorContext,
2737 "realloc of %d byte failed\n", size);
2738 return(NULL);
2739 }
2740 }
2741 buf[len++] = cur;
2742 count++;
2743 if (count > 50) {
2744 GROW;
2745 count = 0;
2746 }
2747 NEXT;
2748 cur = CUR;
2749 if (cur == 0) {
2750 GROW;
2751 SHRINK;
2752 cur = CUR;
2753 }
2754 }
2755 buf[len] = 0;
2756 if (cur != stop) {
2757 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2759 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2760 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002761 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002762 } else {
2763 NEXT;
2764 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002765 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002766 return(buf);
2767}
2768
Daniel Veillard48b2f892001-02-25 16:11:03 +00002769void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002770/**
2771 * xmlParseCharData:
2772 * @ctxt: an XML parser context
2773 * @cdata: int indicating whether we are within a CDATA section
2774 *
2775 * parse a CharData section.
2776 * if we are within a CDATA section ']]>' marks an end of section.
2777 *
2778 * The right angle bracket (>) may be represented using the string "&gt;",
2779 * and must, for compatibility, be escaped using "&gt;" or a character
2780 * reference when it appears in the string "]]>" in content, when that
2781 * string is not marking the end of a CDATA section.
2782 *
2783 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2784 */
2785
2786void
2787xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002788 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002789 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002790 int line = ctxt->input->line;
2791 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002792
2793 SHRINK;
2794 GROW;
2795 /*
2796 * Accelerated common case where input don't need to be
2797 * modified before passing it to the handler.
2798 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002799 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002800 in = ctxt->input->cur;
2801 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002802get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002803 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2804 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002805 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002806 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002807 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002808 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002809 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002810 ctxt->input->line++;
2811 in++;
2812 }
2813 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002814 }
2815 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002816 if ((in[1] == ']') && (in[2] == '>')) {
2817 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2819 ctxt->sax->error(ctxt->userData,
2820 "Sequence ']]>' not allowed in content\n");
2821 ctxt->input->cur = in;
2822 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002823 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002824 return;
2825 }
2826 in++;
2827 goto get_more;
2828 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002829 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002830 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002831 if (IS_BLANK(*ctxt->input->cur)) {
2832 const xmlChar *tmp = ctxt->input->cur;
2833 ctxt->input->cur = in;
2834 if (areBlanks(ctxt, tmp, nbchar)) {
2835 if (ctxt->sax->ignorableWhitespace != NULL)
2836 ctxt->sax->ignorableWhitespace(ctxt->userData,
2837 tmp, nbchar);
2838 } else {
2839 if (ctxt->sax->characters != NULL)
2840 ctxt->sax->characters(ctxt->userData,
2841 tmp, nbchar);
2842 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002843 line = ctxt->input->line;
2844 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002845 } else {
2846 if (ctxt->sax->characters != NULL)
2847 ctxt->sax->characters(ctxt->userData,
2848 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002849 line = ctxt->input->line;
2850 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002851 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002852 }
2853 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002854 if (*in == 0xD) {
2855 in++;
2856 if (*in == 0xA) {
2857 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002858 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002859 ctxt->input->line++;
2860 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002861 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002862 in--;
2863 }
2864 if (*in == '<') {
2865 return;
2866 }
2867 if (*in == '&') {
2868 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002869 }
2870 SHRINK;
2871 GROW;
2872 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002873 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002874 nbchar = 0;
2875 }
Daniel Veillard50582112001-03-26 22:52:16 +00002876 ctxt->input->line = line;
2877 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002878 xmlParseCharDataComplex(ctxt, cdata);
2879}
2880
Daniel Veillard01c13b52002-12-10 15:19:08 +00002881/**
2882 * xmlParseCharDataComplex:
2883 * @ctxt: an XML parser context
2884 * @cdata: int indicating whether we are within a CDATA section
2885 *
2886 * parse a CharData section.this is the fallback function
2887 * of xmlParseCharData() when the parsing requires handling
2888 * of non-ASCII characters.
2889 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002890void
2891xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002892 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2893 int nbchar = 0;
2894 int cur, l;
2895 int count = 0;
2896
2897 SHRINK;
2898 GROW;
2899 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002900 while ((cur != '<') && /* checked */
2901 (cur != '&') &&
2902 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002903 if ((cur == ']') && (NXT(1) == ']') &&
2904 (NXT(2) == '>')) {
2905 if (cdata) break;
2906 else {
2907 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2909 ctxt->sax->error(ctxt->userData,
2910 "Sequence ']]>' not allowed in content\n");
2911 /* Should this be relaxed ??? I see a "must here */
2912 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002913 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002914 }
2915 }
2916 COPY_BUF(l,buf,nbchar,cur);
2917 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2918 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002919 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002920 */
2921 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2922 if (areBlanks(ctxt, buf, nbchar)) {
2923 if (ctxt->sax->ignorableWhitespace != NULL)
2924 ctxt->sax->ignorableWhitespace(ctxt->userData,
2925 buf, nbchar);
2926 } else {
2927 if (ctxt->sax->characters != NULL)
2928 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2929 }
2930 }
2931 nbchar = 0;
2932 }
2933 count++;
2934 if (count > 50) {
2935 GROW;
2936 count = 0;
2937 }
2938 NEXTL(l);
2939 cur = CUR_CHAR(l);
2940 }
2941 if (nbchar != 0) {
2942 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002943 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002944 */
2945 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2946 if (areBlanks(ctxt, buf, nbchar)) {
2947 if (ctxt->sax->ignorableWhitespace != NULL)
2948 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2949 } else {
2950 if (ctxt->sax->characters != NULL)
2951 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2952 }
2953 }
2954 }
2955}
2956
2957/**
2958 * xmlParseExternalID:
2959 * @ctxt: an XML parser context
2960 * @publicID: a xmlChar** receiving PubidLiteral
2961 * @strict: indicate whether we should restrict parsing to only
2962 * production [75], see NOTE below
2963 *
2964 * Parse an External ID or a Public ID
2965 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002966 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002967 * 'PUBLIC' S PubidLiteral S SystemLiteral
2968 *
2969 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2970 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2971 *
2972 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2973 *
2974 * Returns the function returns SystemLiteral and in the second
2975 * case publicID receives PubidLiteral, is strict is off
2976 * it is possible to return NULL and have publicID set.
2977 */
2978
2979xmlChar *
2980xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2981 xmlChar *URI = NULL;
2982
2983 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002984
2985 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002986 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2987 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2988 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2989 SKIP(6);
2990 if (!IS_BLANK(CUR)) {
2991 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2993 ctxt->sax->error(ctxt->userData,
2994 "Space required after 'SYSTEM'\n");
2995 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002996 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002997 }
2998 SKIP_BLANKS;
2999 URI = xmlParseSystemLiteral(ctxt);
3000 if (URI == NULL) {
3001 ctxt->errNo = XML_ERR_URI_REQUIRED;
3002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3003 ctxt->sax->error(ctxt->userData,
3004 "xmlParseExternalID: SYSTEM, no URI\n");
3005 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003006 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003007 }
3008 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3009 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3010 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3011 SKIP(6);
3012 if (!IS_BLANK(CUR)) {
3013 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3015 ctxt->sax->error(ctxt->userData,
3016 "Space required after 'PUBLIC'\n");
3017 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003018 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003019 }
3020 SKIP_BLANKS;
3021 *publicID = xmlParsePubidLiteral(ctxt);
3022 if (*publicID == NULL) {
3023 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
3024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3025 ctxt->sax->error(ctxt->userData,
3026 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
3027 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003028 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003029 }
3030 if (strict) {
3031 /*
3032 * We don't handle [83] so "S SystemLiteral" is required.
3033 */
3034 if (!IS_BLANK(CUR)) {
3035 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3037 ctxt->sax->error(ctxt->userData,
3038 "Space required after the Public Identifier\n");
3039 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003040 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003041 }
3042 } else {
3043 /*
3044 * We handle [83] so we return immediately, if
3045 * "S SystemLiteral" is not detected. From a purely parsing
3046 * point of view that's a nice mess.
3047 */
3048 const xmlChar *ptr;
3049 GROW;
3050
3051 ptr = CUR_PTR;
3052 if (!IS_BLANK(*ptr)) return(NULL);
3053
3054 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3055 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3056 }
3057 SKIP_BLANKS;
3058 URI = xmlParseSystemLiteral(ctxt);
3059 if (URI == NULL) {
3060 ctxt->errNo = XML_ERR_URI_REQUIRED;
3061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3062 ctxt->sax->error(ctxt->userData,
3063 "xmlParseExternalID: PUBLIC, no URI\n");
3064 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003065 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003066 }
3067 }
3068 return(URI);
3069}
3070
3071/**
3072 * xmlParseComment:
3073 * @ctxt: an XML parser context
3074 *
3075 * Skip an XML (SGML) comment <!-- .... -->
3076 * The spec says that "For compatibility, the string "--" (double-hyphen)
3077 * must not occur within comments. "
3078 *
3079 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3080 */
3081void
3082xmlParseComment(xmlParserCtxtPtr ctxt) {
3083 xmlChar *buf = NULL;
3084 int len;
3085 int size = XML_PARSER_BUFFER_SIZE;
3086 int q, ql;
3087 int r, rl;
3088 int cur, l;
3089 xmlParserInputState state;
3090 xmlParserInputPtr input = ctxt->input;
3091 int count = 0;
3092
3093 /*
3094 * Check that there is a comment right here.
3095 */
3096 if ((RAW != '<') || (NXT(1) != '!') ||
3097 (NXT(2) != '-') || (NXT(3) != '-')) return;
3098
3099 state = ctxt->instate;
3100 ctxt->instate = XML_PARSER_COMMENT;
3101 SHRINK;
3102 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003103 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003104 if (buf == NULL) {
3105 xmlGenericError(xmlGenericErrorContext,
3106 "malloc of %d byte failed\n", size);
3107 ctxt->instate = state;
3108 return;
3109 }
3110 q = CUR_CHAR(ql);
3111 NEXTL(ql);
3112 r = CUR_CHAR(rl);
3113 NEXTL(rl);
3114 cur = CUR_CHAR(l);
3115 len = 0;
3116 while (IS_CHAR(cur) && /* checked */
3117 ((cur != '>') ||
3118 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003119 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003120 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3122 ctxt->sax->error(ctxt->userData,
3123 "Comment must not contain '--' (double-hyphen)`\n");
3124 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003125 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003126 }
3127 if (len + 5 >= size) {
3128 size *= 2;
3129 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3130 if (buf == NULL) {
3131 xmlGenericError(xmlGenericErrorContext,
3132 "realloc of %d byte failed\n", size);
3133 ctxt->instate = state;
3134 return;
3135 }
3136 }
3137 COPY_BUF(ql,buf,len,q);
3138 q = r;
3139 ql = rl;
3140 r = cur;
3141 rl = l;
3142
3143 count++;
3144 if (count > 50) {
3145 GROW;
3146 count = 0;
3147 }
3148 NEXTL(l);
3149 cur = CUR_CHAR(l);
3150 if (cur == 0) {
3151 SHRINK;
3152 GROW;
3153 cur = CUR_CHAR(l);
3154 }
3155 }
3156 buf[len] = 0;
3157 if (!IS_CHAR(cur)) {
3158 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3159 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3160 ctxt->sax->error(ctxt->userData,
3161 "Comment not terminated \n<!--%.50s\n", buf);
3162 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003163 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003164 xmlFree(buf);
3165 } else {
3166 if (input != ctxt->input) {
3167 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3168 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3169 ctxt->sax->error(ctxt->userData,
3170"Comment doesn't start and stop in the same entity\n");
3171 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003172 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003173 }
3174 NEXT;
3175 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3176 (!ctxt->disableSAX))
3177 ctxt->sax->comment(ctxt->userData, buf);
3178 xmlFree(buf);
3179 }
3180 ctxt->instate = state;
3181}
3182
3183/**
3184 * xmlParsePITarget:
3185 * @ctxt: an XML parser context
3186 *
3187 * parse the name of a PI
3188 *
3189 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3190 *
3191 * Returns the PITarget name or NULL
3192 */
3193
3194xmlChar *
3195xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3196 xmlChar *name;
3197
3198 name = xmlParseName(ctxt);
3199 if ((name != NULL) &&
3200 ((name[0] == 'x') || (name[0] == 'X')) &&
3201 ((name[1] == 'm') || (name[1] == 'M')) &&
3202 ((name[2] == 'l') || (name[2] == 'L'))) {
3203 int i;
3204 if ((name[0] == 'x') && (name[1] == 'm') &&
3205 (name[2] == 'l') && (name[3] == 0)) {
3206 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3208 ctxt->sax->error(ctxt->userData,
3209 "XML declaration allowed only at the start of the document\n");
3210 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003211 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003212 return(name);
3213 } else if (name[3] == 0) {
3214 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3216 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3217 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003218 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003219 return(name);
3220 }
3221 for (i = 0;;i++) {
3222 if (xmlW3CPIs[i] == NULL) break;
3223 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3224 return(name);
3225 }
3226 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3227 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3228 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003229 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003230 }
3231 }
3232 return(name);
3233}
3234
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003235#ifdef LIBXML_CATALOG_ENABLED
3236/**
3237 * xmlParseCatalogPI:
3238 * @ctxt: an XML parser context
3239 * @catalog: the PI value string
3240 *
3241 * parse an XML Catalog Processing Instruction.
3242 *
3243 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3244 *
3245 * Occurs only if allowed by the user and if happening in the Misc
3246 * part of the document before any doctype informations
3247 * This will add the given catalog to the parsing context in order
3248 * to be used if there is a resolution need further down in the document
3249 */
3250
3251static void
3252xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3253 xmlChar *URL = NULL;
3254 const xmlChar *tmp, *base;
3255 xmlChar marker;
3256
3257 tmp = catalog;
3258 while (IS_BLANK(*tmp)) tmp++;
3259 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3260 goto error;
3261 tmp += 7;
3262 while (IS_BLANK(*tmp)) tmp++;
3263 if (*tmp != '=') {
3264 return;
3265 }
3266 tmp++;
3267 while (IS_BLANK(*tmp)) tmp++;
3268 marker = *tmp;
3269 if ((marker != '\'') && (marker != '"'))
3270 goto error;
3271 tmp++;
3272 base = tmp;
3273 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3274 if (*tmp == 0)
3275 goto error;
3276 URL = xmlStrndup(base, tmp - base);
3277 tmp++;
3278 while (IS_BLANK(*tmp)) tmp++;
3279 if (*tmp != 0)
3280 goto error;
3281
3282 if (URL != NULL) {
3283 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3284 xmlFree(URL);
3285 }
3286 return;
3287
3288error:
3289 ctxt->errNo = XML_WAR_CATALOG_PI;
3290 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3291 ctxt->sax->warning(ctxt->userData,
3292 "Catalog PI syntax error: %s\n", catalog);
3293 if (URL != NULL)
3294 xmlFree(URL);
3295}
3296#endif
3297
Owen Taylor3473f882001-02-23 17:55:21 +00003298/**
3299 * xmlParsePI:
3300 * @ctxt: an XML parser context
3301 *
3302 * parse an XML Processing Instruction.
3303 *
3304 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3305 *
3306 * The processing is transfered to SAX once parsed.
3307 */
3308
3309void
3310xmlParsePI(xmlParserCtxtPtr ctxt) {
3311 xmlChar *buf = NULL;
3312 int len = 0;
3313 int size = XML_PARSER_BUFFER_SIZE;
3314 int cur, l;
3315 xmlChar *target;
3316 xmlParserInputState state;
3317 int count = 0;
3318
3319 if ((RAW == '<') && (NXT(1) == '?')) {
3320 xmlParserInputPtr input = ctxt->input;
3321 state = ctxt->instate;
3322 ctxt->instate = XML_PARSER_PI;
3323 /*
3324 * this is a Processing Instruction.
3325 */
3326 SKIP(2);
3327 SHRINK;
3328
3329 /*
3330 * Parse the target name and check for special support like
3331 * namespace.
3332 */
3333 target = xmlParsePITarget(ctxt);
3334 if (target != NULL) {
3335 if ((RAW == '?') && (NXT(1) == '>')) {
3336 if (input != ctxt->input) {
3337 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3339 ctxt->sax->error(ctxt->userData,
3340 "PI declaration doesn't start and stop in the same entity\n");
3341 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003342 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003343 }
3344 SKIP(2);
3345
3346 /*
3347 * SAX: PI detected.
3348 */
3349 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3350 (ctxt->sax->processingInstruction != NULL))
3351 ctxt->sax->processingInstruction(ctxt->userData,
3352 target, NULL);
3353 ctxt->instate = state;
3354 xmlFree(target);
3355 return;
3356 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003357 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003358 if (buf == NULL) {
3359 xmlGenericError(xmlGenericErrorContext,
3360 "malloc of %d byte failed\n", size);
3361 ctxt->instate = state;
3362 return;
3363 }
3364 cur = CUR;
3365 if (!IS_BLANK(cur)) {
3366 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3367 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3368 ctxt->sax->error(ctxt->userData,
3369 "xmlParsePI: PI %s space expected\n", target);
3370 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003371 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003372 }
3373 SKIP_BLANKS;
3374 cur = CUR_CHAR(l);
3375 while (IS_CHAR(cur) && /* checked */
3376 ((cur != '?') || (NXT(1) != '>'))) {
3377 if (len + 5 >= size) {
3378 size *= 2;
3379 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3380 if (buf == NULL) {
3381 xmlGenericError(xmlGenericErrorContext,
3382 "realloc of %d byte failed\n", size);
3383 ctxt->instate = state;
3384 return;
3385 }
3386 }
3387 count++;
3388 if (count > 50) {
3389 GROW;
3390 count = 0;
3391 }
3392 COPY_BUF(l,buf,len,cur);
3393 NEXTL(l);
3394 cur = CUR_CHAR(l);
3395 if (cur == 0) {
3396 SHRINK;
3397 GROW;
3398 cur = CUR_CHAR(l);
3399 }
3400 }
3401 buf[len] = 0;
3402 if (cur != '?') {
3403 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3405 ctxt->sax->error(ctxt->userData,
3406 "xmlParsePI: PI %s never end ...\n", target);
3407 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003408 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003409 } else {
3410 if (input != ctxt->input) {
3411 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3413 ctxt->sax->error(ctxt->userData,
3414 "PI declaration doesn't start and stop in the same entity\n");
3415 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003416 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003417 }
3418 SKIP(2);
3419
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003420#ifdef LIBXML_CATALOG_ENABLED
3421 if (((state == XML_PARSER_MISC) ||
3422 (state == XML_PARSER_START)) &&
3423 (xmlStrEqual(target, XML_CATALOG_PI))) {
3424 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3425 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3426 (allow == XML_CATA_ALLOW_ALL))
3427 xmlParseCatalogPI(ctxt, buf);
3428 }
3429#endif
3430
3431
Owen Taylor3473f882001-02-23 17:55:21 +00003432 /*
3433 * SAX: PI detected.
3434 */
3435 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3436 (ctxt->sax->processingInstruction != NULL))
3437 ctxt->sax->processingInstruction(ctxt->userData,
3438 target, buf);
3439 }
3440 xmlFree(buf);
3441 xmlFree(target);
3442 } else {
3443 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3445 ctxt->sax->error(ctxt->userData,
3446 "xmlParsePI : no target name\n");
3447 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003448 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003449 }
3450 ctxt->instate = state;
3451 }
3452}
3453
3454/**
3455 * xmlParseNotationDecl:
3456 * @ctxt: an XML parser context
3457 *
3458 * parse a notation declaration
3459 *
3460 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3461 *
3462 * Hence there is actually 3 choices:
3463 * 'PUBLIC' S PubidLiteral
3464 * 'PUBLIC' S PubidLiteral S SystemLiteral
3465 * and 'SYSTEM' S SystemLiteral
3466 *
3467 * See the NOTE on xmlParseExternalID().
3468 */
3469
3470void
3471xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3472 xmlChar *name;
3473 xmlChar *Pubid;
3474 xmlChar *Systemid;
3475
3476 if ((RAW == '<') && (NXT(1) == '!') &&
3477 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3478 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3479 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3480 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3481 xmlParserInputPtr input = ctxt->input;
3482 SHRINK;
3483 SKIP(10);
3484 if (!IS_BLANK(CUR)) {
3485 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3487 ctxt->sax->error(ctxt->userData,
3488 "Space required after '<!NOTATION'\n");
3489 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003490 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003491 return;
3492 }
3493 SKIP_BLANKS;
3494
Daniel Veillard76d66f42001-05-16 21:05:17 +00003495 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003496 if (name == NULL) {
3497 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3499 ctxt->sax->error(ctxt->userData,
3500 "NOTATION: Name expected here\n");
3501 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003502 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003503 return;
3504 }
3505 if (!IS_BLANK(CUR)) {
3506 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3508 ctxt->sax->error(ctxt->userData,
3509 "Space required after the NOTATION name'\n");
3510 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003511 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003512 return;
3513 }
3514 SKIP_BLANKS;
3515
3516 /*
3517 * Parse the IDs.
3518 */
3519 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3520 SKIP_BLANKS;
3521
3522 if (RAW == '>') {
3523 if (input != ctxt->input) {
3524 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3526 ctxt->sax->error(ctxt->userData,
3527"Notation declaration doesn't start and stop in the same entity\n");
3528 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003529 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003530 }
3531 NEXT;
3532 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3533 (ctxt->sax->notationDecl != NULL))
3534 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3535 } else {
3536 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3538 ctxt->sax->error(ctxt->userData,
3539 "'>' required to close NOTATION declaration\n");
3540 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003541 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003542 }
3543 xmlFree(name);
3544 if (Systemid != NULL) xmlFree(Systemid);
3545 if (Pubid != NULL) xmlFree(Pubid);
3546 }
3547}
3548
3549/**
3550 * xmlParseEntityDecl:
3551 * @ctxt: an XML parser context
3552 *
3553 * parse <!ENTITY declarations
3554 *
3555 * [70] EntityDecl ::= GEDecl | PEDecl
3556 *
3557 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3558 *
3559 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3560 *
3561 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3562 *
3563 * [74] PEDef ::= EntityValue | ExternalID
3564 *
3565 * [76] NDataDecl ::= S 'NDATA' S Name
3566 *
3567 * [ VC: Notation Declared ]
3568 * The Name must match the declared name of a notation.
3569 */
3570
3571void
3572xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3573 xmlChar *name = NULL;
3574 xmlChar *value = NULL;
3575 xmlChar *URI = NULL, *literal = NULL;
3576 xmlChar *ndata = NULL;
3577 int isParameter = 0;
3578 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003579 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003580
3581 GROW;
3582 if ((RAW == '<') && (NXT(1) == '!') &&
3583 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3584 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3585 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3586 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003587 SHRINK;
3588 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003589 skipped = SKIP_BLANKS;
3590 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003591 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3593 ctxt->sax->error(ctxt->userData,
3594 "Space required after '<!ENTITY'\n");
3595 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003596 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003597 }
Owen Taylor3473f882001-02-23 17:55:21 +00003598
3599 if (RAW == '%') {
3600 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003601 skipped = SKIP_BLANKS;
3602 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003603 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3605 ctxt->sax->error(ctxt->userData,
3606 "Space required after '%'\n");
3607 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003608 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003609 }
Owen Taylor3473f882001-02-23 17:55:21 +00003610 isParameter = 1;
3611 }
3612
Daniel Veillard76d66f42001-05-16 21:05:17 +00003613 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003614 if (name == NULL) {
3615 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3617 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3618 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003619 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003620 return;
3621 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003622 skipped = SKIP_BLANKS;
3623 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003624 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3626 ctxt->sax->error(ctxt->userData,
3627 "Space required after the entity name\n");
3628 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003629 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003630 }
Owen Taylor3473f882001-02-23 17:55:21 +00003631
Daniel Veillardf5582f12002-06-11 10:08:16 +00003632 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003633 /*
3634 * handle the various case of definitions...
3635 */
3636 if (isParameter) {
3637 if ((RAW == '"') || (RAW == '\'')) {
3638 value = xmlParseEntityValue(ctxt, &orig);
3639 if (value) {
3640 if ((ctxt->sax != NULL) &&
3641 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3642 ctxt->sax->entityDecl(ctxt->userData, name,
3643 XML_INTERNAL_PARAMETER_ENTITY,
3644 NULL, NULL, value);
3645 }
3646 } else {
3647 URI = xmlParseExternalID(ctxt, &literal, 1);
3648 if ((URI == NULL) && (literal == NULL)) {
3649 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3651 ctxt->sax->error(ctxt->userData,
3652 "Entity value required\n");
3653 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003654 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003655 }
3656 if (URI) {
3657 xmlURIPtr uri;
3658
3659 uri = xmlParseURI((const char *) URI);
3660 if (uri == NULL) {
3661 ctxt->errNo = XML_ERR_INVALID_URI;
3662 if ((ctxt->sax != NULL) &&
3663 (!ctxt->disableSAX) &&
3664 (ctxt->sax->error != NULL))
3665 ctxt->sax->error(ctxt->userData,
3666 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003667 /*
3668 * This really ought to be a well formedness error
3669 * but the XML Core WG decided otherwise c.f. issue
3670 * E26 of the XML erratas.
3671 */
Owen Taylor3473f882001-02-23 17:55:21 +00003672 } else {
3673 if (uri->fragment != NULL) {
3674 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3675 if ((ctxt->sax != NULL) &&
3676 (!ctxt->disableSAX) &&
3677 (ctxt->sax->error != NULL))
3678 ctxt->sax->error(ctxt->userData,
3679 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003680 /*
3681 * Okay this is foolish to block those but not
3682 * invalid URIs.
3683 */
Owen Taylor3473f882001-02-23 17:55:21 +00003684 ctxt->wellFormed = 0;
3685 } else {
3686 if ((ctxt->sax != NULL) &&
3687 (!ctxt->disableSAX) &&
3688 (ctxt->sax->entityDecl != NULL))
3689 ctxt->sax->entityDecl(ctxt->userData, name,
3690 XML_EXTERNAL_PARAMETER_ENTITY,
3691 literal, URI, NULL);
3692 }
3693 xmlFreeURI(uri);
3694 }
3695 }
3696 }
3697 } else {
3698 if ((RAW == '"') || (RAW == '\'')) {
3699 value = xmlParseEntityValue(ctxt, &orig);
3700 if ((ctxt->sax != NULL) &&
3701 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3702 ctxt->sax->entityDecl(ctxt->userData, name,
3703 XML_INTERNAL_GENERAL_ENTITY,
3704 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003705 /*
3706 * For expat compatibility in SAX mode.
3707 */
3708 if ((ctxt->myDoc == NULL) ||
3709 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3710 if (ctxt->myDoc == NULL) {
3711 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3712 }
3713 if (ctxt->myDoc->intSubset == NULL)
3714 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3715 BAD_CAST "fake", NULL, NULL);
3716
3717 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3718 NULL, NULL, value);
3719 }
Owen Taylor3473f882001-02-23 17:55:21 +00003720 } else {
3721 URI = xmlParseExternalID(ctxt, &literal, 1);
3722 if ((URI == NULL) && (literal == NULL)) {
3723 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3724 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3725 ctxt->sax->error(ctxt->userData,
3726 "Entity value required\n");
3727 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003728 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003729 }
3730 if (URI) {
3731 xmlURIPtr uri;
3732
3733 uri = xmlParseURI((const char *)URI);
3734 if (uri == NULL) {
3735 ctxt->errNo = XML_ERR_INVALID_URI;
3736 if ((ctxt->sax != NULL) &&
3737 (!ctxt->disableSAX) &&
3738 (ctxt->sax->error != NULL))
3739 ctxt->sax->error(ctxt->userData,
3740 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003741 /*
3742 * This really ought to be a well formedness error
3743 * but the XML Core WG decided otherwise c.f. issue
3744 * E26 of the XML erratas.
3745 */
Owen Taylor3473f882001-02-23 17:55:21 +00003746 } else {
3747 if (uri->fragment != NULL) {
3748 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3749 if ((ctxt->sax != NULL) &&
3750 (!ctxt->disableSAX) &&
3751 (ctxt->sax->error != NULL))
3752 ctxt->sax->error(ctxt->userData,
3753 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003754 /*
3755 * Okay this is foolish to block those but not
3756 * invalid URIs.
3757 */
Owen Taylor3473f882001-02-23 17:55:21 +00003758 ctxt->wellFormed = 0;
3759 }
3760 xmlFreeURI(uri);
3761 }
3762 }
3763 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3764 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3766 ctxt->sax->error(ctxt->userData,
3767 "Space required before 'NDATA'\n");
3768 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003769 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003770 }
3771 SKIP_BLANKS;
3772 if ((RAW == 'N') && (NXT(1) == 'D') &&
3773 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3774 (NXT(4) == 'A')) {
3775 SKIP(5);
3776 if (!IS_BLANK(CUR)) {
3777 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3779 ctxt->sax->error(ctxt->userData,
3780 "Space required after 'NDATA'\n");
3781 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003782 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003783 }
3784 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003785 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003786 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3787 (ctxt->sax->unparsedEntityDecl != NULL))
3788 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3789 literal, URI, ndata);
3790 } else {
3791 if ((ctxt->sax != NULL) &&
3792 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3793 ctxt->sax->entityDecl(ctxt->userData, name,
3794 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3795 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003796 /*
3797 * For expat compatibility in SAX mode.
3798 * assuming the entity repalcement was asked for
3799 */
3800 if ((ctxt->replaceEntities != 0) &&
3801 ((ctxt->myDoc == NULL) ||
3802 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3803 if (ctxt->myDoc == NULL) {
3804 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3805 }
3806
3807 if (ctxt->myDoc->intSubset == NULL)
3808 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3809 BAD_CAST "fake", NULL, NULL);
3810 entityDecl(ctxt, name,
3811 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3812 literal, URI, NULL);
3813 }
Owen Taylor3473f882001-02-23 17:55:21 +00003814 }
3815 }
3816 }
3817 SKIP_BLANKS;
3818 if (RAW != '>') {
3819 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3821 ctxt->sax->error(ctxt->userData,
3822 "xmlParseEntityDecl: entity %s not terminated\n", name);
3823 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003824 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003825 } else {
3826 if (input != ctxt->input) {
3827 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3829 ctxt->sax->error(ctxt->userData,
3830"Entity declaration doesn't start and stop in the same entity\n");
3831 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003832 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003833 }
3834 NEXT;
3835 }
3836 if (orig != NULL) {
3837 /*
3838 * Ugly mechanism to save the raw entity value.
3839 */
3840 xmlEntityPtr cur = NULL;
3841
3842 if (isParameter) {
3843 if ((ctxt->sax != NULL) &&
3844 (ctxt->sax->getParameterEntity != NULL))
3845 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3846 } else {
3847 if ((ctxt->sax != NULL) &&
3848 (ctxt->sax->getEntity != NULL))
3849 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003850 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3851 cur = getEntity(ctxt, name);
3852 }
Owen Taylor3473f882001-02-23 17:55:21 +00003853 }
3854 if (cur != NULL) {
3855 if (cur->orig != NULL)
3856 xmlFree(orig);
3857 else
3858 cur->orig = orig;
3859 } else
3860 xmlFree(orig);
3861 }
3862 if (name != NULL) xmlFree(name);
3863 if (value != NULL) xmlFree(value);
3864 if (URI != NULL) xmlFree(URI);
3865 if (literal != NULL) xmlFree(literal);
3866 if (ndata != NULL) xmlFree(ndata);
3867 }
3868}
3869
3870/**
3871 * xmlParseDefaultDecl:
3872 * @ctxt: an XML parser context
3873 * @value: Receive a possible fixed default value for the attribute
3874 *
3875 * Parse an attribute default declaration
3876 *
3877 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3878 *
3879 * [ VC: Required Attribute ]
3880 * if the default declaration is the keyword #REQUIRED, then the
3881 * attribute must be specified for all elements of the type in the
3882 * attribute-list declaration.
3883 *
3884 * [ VC: Attribute Default Legal ]
3885 * The declared default value must meet the lexical constraints of
3886 * the declared attribute type c.f. xmlValidateAttributeDecl()
3887 *
3888 * [ VC: Fixed Attribute Default ]
3889 * if an attribute has a default value declared with the #FIXED
3890 * keyword, instances of that attribute must match the default value.
3891 *
3892 * [ WFC: No < in Attribute Values ]
3893 * handled in xmlParseAttValue()
3894 *
3895 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3896 * or XML_ATTRIBUTE_FIXED.
3897 */
3898
3899int
3900xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3901 int val;
3902 xmlChar *ret;
3903
3904 *value = NULL;
3905 if ((RAW == '#') && (NXT(1) == 'R') &&
3906 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3907 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3908 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3909 (NXT(8) == 'D')) {
3910 SKIP(9);
3911 return(XML_ATTRIBUTE_REQUIRED);
3912 }
3913 if ((RAW == '#') && (NXT(1) == 'I') &&
3914 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3915 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3916 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3917 SKIP(8);
3918 return(XML_ATTRIBUTE_IMPLIED);
3919 }
3920 val = XML_ATTRIBUTE_NONE;
3921 if ((RAW == '#') && (NXT(1) == 'F') &&
3922 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3923 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3924 SKIP(6);
3925 val = XML_ATTRIBUTE_FIXED;
3926 if (!IS_BLANK(CUR)) {
3927 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3929 ctxt->sax->error(ctxt->userData,
3930 "Space required after '#FIXED'\n");
3931 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003932 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003933 }
3934 SKIP_BLANKS;
3935 }
3936 ret = xmlParseAttValue(ctxt);
3937 ctxt->instate = XML_PARSER_DTD;
3938 if (ret == NULL) {
3939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3940 ctxt->sax->error(ctxt->userData,
3941 "Attribute default value declaration error\n");
3942 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003943 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003944 } else
3945 *value = ret;
3946 return(val);
3947}
3948
3949/**
3950 * xmlParseNotationType:
3951 * @ctxt: an XML parser context
3952 *
3953 * parse an Notation attribute type.
3954 *
3955 * Note: the leading 'NOTATION' S part has already being parsed...
3956 *
3957 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3958 *
3959 * [ VC: Notation Attributes ]
3960 * Values of this type must match one of the notation names included
3961 * in the declaration; all notation names in the declaration must be declared.
3962 *
3963 * Returns: the notation attribute tree built while parsing
3964 */
3965
3966xmlEnumerationPtr
3967xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3968 xmlChar *name;
3969 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3970
3971 if (RAW != '(') {
3972 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3974 ctxt->sax->error(ctxt->userData,
3975 "'(' required to start 'NOTATION'\n");
3976 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003977 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003978 return(NULL);
3979 }
3980 SHRINK;
3981 do {
3982 NEXT;
3983 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003984 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003985 if (name == NULL) {
3986 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3988 ctxt->sax->error(ctxt->userData,
3989 "Name expected in NOTATION declaration\n");
3990 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003991 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003992 return(ret);
3993 }
3994 cur = xmlCreateEnumeration(name);
3995 xmlFree(name);
3996 if (cur == NULL) return(ret);
3997 if (last == NULL) ret = last = cur;
3998 else {
3999 last->next = cur;
4000 last = cur;
4001 }
4002 SKIP_BLANKS;
4003 } while (RAW == '|');
4004 if (RAW != ')') {
4005 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
4006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4007 ctxt->sax->error(ctxt->userData,
4008 "')' required to finish NOTATION declaration\n");
4009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004011 if ((last != NULL) && (last != ret))
4012 xmlFreeEnumeration(last);
4013 return(ret);
4014 }
4015 NEXT;
4016 return(ret);
4017}
4018
4019/**
4020 * xmlParseEnumerationType:
4021 * @ctxt: an XML parser context
4022 *
4023 * parse an Enumeration attribute type.
4024 *
4025 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4026 *
4027 * [ VC: Enumeration ]
4028 * Values of this type must match one of the Nmtoken tokens in
4029 * the declaration
4030 *
4031 * Returns: the enumeration attribute tree built while parsing
4032 */
4033
4034xmlEnumerationPtr
4035xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4036 xmlChar *name;
4037 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4038
4039 if (RAW != '(') {
4040 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4042 ctxt->sax->error(ctxt->userData,
4043 "'(' required to start ATTLIST enumeration\n");
4044 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004045 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004046 return(NULL);
4047 }
4048 SHRINK;
4049 do {
4050 NEXT;
4051 SKIP_BLANKS;
4052 name = xmlParseNmtoken(ctxt);
4053 if (name == NULL) {
4054 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4056 ctxt->sax->error(ctxt->userData,
4057 "NmToken expected in ATTLIST enumeration\n");
4058 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004059 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004060 return(ret);
4061 }
4062 cur = xmlCreateEnumeration(name);
4063 xmlFree(name);
4064 if (cur == NULL) return(ret);
4065 if (last == NULL) ret = last = cur;
4066 else {
4067 last->next = cur;
4068 last = cur;
4069 }
4070 SKIP_BLANKS;
4071 } while (RAW == '|');
4072 if (RAW != ')') {
4073 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4075 ctxt->sax->error(ctxt->userData,
4076 "')' required to finish ATTLIST enumeration\n");
4077 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004078 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004079 return(ret);
4080 }
4081 NEXT;
4082 return(ret);
4083}
4084
4085/**
4086 * xmlParseEnumeratedType:
4087 * @ctxt: an XML parser context
4088 * @tree: the enumeration tree built while parsing
4089 *
4090 * parse an Enumerated attribute type.
4091 *
4092 * [57] EnumeratedType ::= NotationType | Enumeration
4093 *
4094 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4095 *
4096 *
4097 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4098 */
4099
4100int
4101xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4102 if ((RAW == 'N') && (NXT(1) == 'O') &&
4103 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4104 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4105 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4106 SKIP(8);
4107 if (!IS_BLANK(CUR)) {
4108 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4110 ctxt->sax->error(ctxt->userData,
4111 "Space required after 'NOTATION'\n");
4112 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004113 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004114 return(0);
4115 }
4116 SKIP_BLANKS;
4117 *tree = xmlParseNotationType(ctxt);
4118 if (*tree == NULL) return(0);
4119 return(XML_ATTRIBUTE_NOTATION);
4120 }
4121 *tree = xmlParseEnumerationType(ctxt);
4122 if (*tree == NULL) return(0);
4123 return(XML_ATTRIBUTE_ENUMERATION);
4124}
4125
4126/**
4127 * xmlParseAttributeType:
4128 * @ctxt: an XML parser context
4129 * @tree: the enumeration tree built while parsing
4130 *
4131 * parse the Attribute list def for an element
4132 *
4133 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4134 *
4135 * [55] StringType ::= 'CDATA'
4136 *
4137 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4138 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4139 *
4140 * Validity constraints for attribute values syntax are checked in
4141 * xmlValidateAttributeValue()
4142 *
4143 * [ VC: ID ]
4144 * Values of type ID must match the Name production. A name must not
4145 * appear more than once in an XML document as a value of this type;
4146 * i.e., ID values must uniquely identify the elements which bear them.
4147 *
4148 * [ VC: One ID per Element Type ]
4149 * No element type may have more than one ID attribute specified.
4150 *
4151 * [ VC: ID Attribute Default ]
4152 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4153 *
4154 * [ VC: IDREF ]
4155 * Values of type IDREF must match the Name production, and values
4156 * of type IDREFS must match Names; each IDREF Name must match the value
4157 * of an ID attribute on some element in the XML document; i.e. IDREF
4158 * values must match the value of some ID attribute.
4159 *
4160 * [ VC: Entity Name ]
4161 * Values of type ENTITY must match the Name production, values
4162 * of type ENTITIES must match Names; each Entity Name must match the
4163 * name of an unparsed entity declared in the DTD.
4164 *
4165 * [ VC: Name Token ]
4166 * Values of type NMTOKEN must match the Nmtoken production; values
4167 * of type NMTOKENS must match Nmtokens.
4168 *
4169 * Returns the attribute type
4170 */
4171int
4172xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4173 SHRINK;
4174 if ((RAW == 'C') && (NXT(1) == 'D') &&
4175 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4176 (NXT(4) == 'A')) {
4177 SKIP(5);
4178 return(XML_ATTRIBUTE_CDATA);
4179 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4180 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4181 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4182 SKIP(6);
4183 return(XML_ATTRIBUTE_IDREFS);
4184 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4185 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4186 (NXT(4) == 'F')) {
4187 SKIP(5);
4188 return(XML_ATTRIBUTE_IDREF);
4189 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4190 SKIP(2);
4191 return(XML_ATTRIBUTE_ID);
4192 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4193 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4194 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4195 SKIP(6);
4196 return(XML_ATTRIBUTE_ENTITY);
4197 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4198 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4199 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4200 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4201 SKIP(8);
4202 return(XML_ATTRIBUTE_ENTITIES);
4203 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4204 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4205 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4206 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4207 SKIP(8);
4208 return(XML_ATTRIBUTE_NMTOKENS);
4209 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4210 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4211 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4212 (NXT(6) == 'N')) {
4213 SKIP(7);
4214 return(XML_ATTRIBUTE_NMTOKEN);
4215 }
4216 return(xmlParseEnumeratedType(ctxt, tree));
4217}
4218
4219/**
4220 * xmlParseAttributeListDecl:
4221 * @ctxt: an XML parser context
4222 *
4223 * : parse the Attribute list def for an element
4224 *
4225 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4226 *
4227 * [53] AttDef ::= S Name S AttType S DefaultDecl
4228 *
4229 */
4230void
4231xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4232 xmlChar *elemName;
4233 xmlChar *attrName;
4234 xmlEnumerationPtr tree;
4235
4236 if ((RAW == '<') && (NXT(1) == '!') &&
4237 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4238 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4239 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4240 (NXT(8) == 'T')) {
4241 xmlParserInputPtr input = ctxt->input;
4242
4243 SKIP(9);
4244 if (!IS_BLANK(CUR)) {
4245 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4247 ctxt->sax->error(ctxt->userData,
4248 "Space required after '<!ATTLIST'\n");
4249 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004250 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004251 }
4252 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004253 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004254 if (elemName == NULL) {
4255 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4257 ctxt->sax->error(ctxt->userData,
4258 "ATTLIST: no name for Element\n");
4259 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004260 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004261 return;
4262 }
4263 SKIP_BLANKS;
4264 GROW;
4265 while (RAW != '>') {
4266 const xmlChar *check = CUR_PTR;
4267 int type;
4268 int def;
4269 xmlChar *defaultValue = NULL;
4270
4271 GROW;
4272 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004273 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004274 if (attrName == NULL) {
4275 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4277 ctxt->sax->error(ctxt->userData,
4278 "ATTLIST: no name for Attribute\n");
4279 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004280 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004281 break;
4282 }
4283 GROW;
4284 if (!IS_BLANK(CUR)) {
4285 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4287 ctxt->sax->error(ctxt->userData,
4288 "Space required after the attribute name\n");
4289 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004290 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004291 if (attrName != NULL)
4292 xmlFree(attrName);
4293 if (defaultValue != NULL)
4294 xmlFree(defaultValue);
4295 break;
4296 }
4297 SKIP_BLANKS;
4298
4299 type = xmlParseAttributeType(ctxt, &tree);
4300 if (type <= 0) {
4301 if (attrName != NULL)
4302 xmlFree(attrName);
4303 if (defaultValue != NULL)
4304 xmlFree(defaultValue);
4305 break;
4306 }
4307
4308 GROW;
4309 if (!IS_BLANK(CUR)) {
4310 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4312 ctxt->sax->error(ctxt->userData,
4313 "Space required after the attribute type\n");
4314 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004315 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004316 if (attrName != NULL)
4317 xmlFree(attrName);
4318 if (defaultValue != NULL)
4319 xmlFree(defaultValue);
4320 if (tree != NULL)
4321 xmlFreeEnumeration(tree);
4322 break;
4323 }
4324 SKIP_BLANKS;
4325
4326 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4327 if (def <= 0) {
4328 if (attrName != NULL)
4329 xmlFree(attrName);
4330 if (defaultValue != NULL)
4331 xmlFree(defaultValue);
4332 if (tree != NULL)
4333 xmlFreeEnumeration(tree);
4334 break;
4335 }
4336
4337 GROW;
4338 if (RAW != '>') {
4339 if (!IS_BLANK(CUR)) {
4340 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4342 ctxt->sax->error(ctxt->userData,
4343 "Space required after the attribute default value\n");
4344 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004345 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004346 if (attrName != NULL)
4347 xmlFree(attrName);
4348 if (defaultValue != NULL)
4349 xmlFree(defaultValue);
4350 if (tree != NULL)
4351 xmlFreeEnumeration(tree);
4352 break;
4353 }
4354 SKIP_BLANKS;
4355 }
4356 if (check == CUR_PTR) {
4357 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4359 ctxt->sax->error(ctxt->userData,
4360 "xmlParseAttributeListDecl: detected internal error\n");
4361 if (attrName != NULL)
4362 xmlFree(attrName);
4363 if (defaultValue != NULL)
4364 xmlFree(defaultValue);
4365 if (tree != NULL)
4366 xmlFreeEnumeration(tree);
4367 break;
4368 }
4369 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4370 (ctxt->sax->attributeDecl != NULL))
4371 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4372 type, def, defaultValue, tree);
4373 if (attrName != NULL)
4374 xmlFree(attrName);
4375 if (defaultValue != NULL)
4376 xmlFree(defaultValue);
4377 GROW;
4378 }
4379 if (RAW == '>') {
4380 if (input != ctxt->input) {
4381 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4383 ctxt->sax->error(ctxt->userData,
4384"Attribute list declaration doesn't start and stop in the same entity\n");
4385 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004386 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004387 }
4388 NEXT;
4389 }
4390
4391 xmlFree(elemName);
4392 }
4393}
4394
4395/**
4396 * xmlParseElementMixedContentDecl:
4397 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004398 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004399 *
4400 * parse the declaration for a Mixed Element content
4401 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4402 *
4403 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4404 * '(' S? '#PCDATA' S? ')'
4405 *
4406 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4407 *
4408 * [ VC: No Duplicate Types ]
4409 * The same name must not appear more than once in a single
4410 * mixed-content declaration.
4411 *
4412 * returns: the list of the xmlElementContentPtr describing the element choices
4413 */
4414xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004415xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004416 xmlElementContentPtr ret = NULL, cur = NULL, n;
4417 xmlChar *elem = NULL;
4418
4419 GROW;
4420 if ((RAW == '#') && (NXT(1) == 'P') &&
4421 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4422 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4423 (NXT(6) == 'A')) {
4424 SKIP(7);
4425 SKIP_BLANKS;
4426 SHRINK;
4427 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004428 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4429 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4430 if (ctxt->vctxt.error != NULL)
4431 ctxt->vctxt.error(ctxt->vctxt.userData,
4432"Element content declaration doesn't start and stop in the same entity\n");
4433 ctxt->valid = 0;
4434 }
Owen Taylor3473f882001-02-23 17:55:21 +00004435 NEXT;
4436 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4437 if (RAW == '*') {
4438 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4439 NEXT;
4440 }
4441 return(ret);
4442 }
4443 if ((RAW == '(') || (RAW == '|')) {
4444 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4445 if (ret == NULL) return(NULL);
4446 }
4447 while (RAW == '|') {
4448 NEXT;
4449 if (elem == NULL) {
4450 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4451 if (ret == NULL) return(NULL);
4452 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004453 if (cur != NULL)
4454 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004455 cur = ret;
4456 } else {
4457 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4458 if (n == NULL) return(NULL);
4459 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004460 if (n->c1 != NULL)
4461 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004462 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004463 if (n != NULL)
4464 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004465 cur = n;
4466 xmlFree(elem);
4467 }
4468 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004469 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004470 if (elem == NULL) {
4471 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4473 ctxt->sax->error(ctxt->userData,
4474 "xmlParseElementMixedContentDecl : Name expected\n");
4475 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004476 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004477 xmlFreeElementContent(cur);
4478 return(NULL);
4479 }
4480 SKIP_BLANKS;
4481 GROW;
4482 }
4483 if ((RAW == ')') && (NXT(1) == '*')) {
4484 if (elem != NULL) {
4485 cur->c2 = xmlNewElementContent(elem,
4486 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004487 if (cur->c2 != NULL)
4488 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004489 xmlFree(elem);
4490 }
4491 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004492 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4493 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4494 if (ctxt->vctxt.error != NULL)
4495 ctxt->vctxt.error(ctxt->vctxt.userData,
4496"Element content declaration doesn't start and stop in the same entity\n");
4497 ctxt->valid = 0;
4498 }
Owen Taylor3473f882001-02-23 17:55:21 +00004499 SKIP(2);
4500 } else {
4501 if (elem != NULL) xmlFree(elem);
4502 xmlFreeElementContent(ret);
4503 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4505 ctxt->sax->error(ctxt->userData,
4506 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4507 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004508 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004509 return(NULL);
4510 }
4511
4512 } else {
4513 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4515 ctxt->sax->error(ctxt->userData,
4516 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4517 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004518 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004519 }
4520 return(ret);
4521}
4522
4523/**
4524 * xmlParseElementChildrenContentDecl:
4525 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004526 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004527 *
4528 * parse the declaration for a Mixed Element content
4529 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4530 *
4531 *
4532 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4533 *
4534 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4535 *
4536 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4537 *
4538 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4539 *
4540 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4541 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004542 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004543 * opening or closing parentheses in a choice, seq, or Mixed
4544 * construct is contained in the replacement text for a parameter
4545 * entity, both must be contained in the same replacement text. For
4546 * interoperability, if a parameter-entity reference appears in a
4547 * choice, seq, or Mixed construct, its replacement text should not
4548 * be empty, and neither the first nor last non-blank character of
4549 * the replacement text should be a connector (| or ,).
4550 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004551 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004552 * hierarchy.
4553 */
4554xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004555xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004556(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004557 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4558 xmlChar *elem;
4559 xmlChar type = 0;
4560
4561 SKIP_BLANKS;
4562 GROW;
4563 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004564 xmlParserInputPtr input = ctxt->input;
4565
Owen Taylor3473f882001-02-23 17:55:21 +00004566 /* Recurse on first child */
4567 NEXT;
4568 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004569 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004570 SKIP_BLANKS;
4571 GROW;
4572 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004573 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004574 if (elem == NULL) {
4575 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4577 ctxt->sax->error(ctxt->userData,
4578 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004581 return(NULL);
4582 }
4583 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4584 GROW;
4585 if (RAW == '?') {
4586 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4587 NEXT;
4588 } else if (RAW == '*') {
4589 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4590 NEXT;
4591 } else if (RAW == '+') {
4592 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4593 NEXT;
4594 } else {
4595 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4596 }
4597 xmlFree(elem);
4598 GROW;
4599 }
4600 SKIP_BLANKS;
4601 SHRINK;
4602 while (RAW != ')') {
4603 /*
4604 * Each loop we parse one separator and one element.
4605 */
4606 if (RAW == ',') {
4607 if (type == 0) type = CUR;
4608
4609 /*
4610 * Detect "Name | Name , Name" error
4611 */
4612 else if (type != CUR) {
4613 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4615 ctxt->sax->error(ctxt->userData,
4616 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4617 type);
4618 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004619 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004620 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004621 xmlFreeElementContent(last);
4622 if (ret != NULL)
4623 xmlFreeElementContent(ret);
4624 return(NULL);
4625 }
4626 NEXT;
4627
4628 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4629 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004630 if ((last != NULL) && (last != ret))
4631 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004632 xmlFreeElementContent(ret);
4633 return(NULL);
4634 }
4635 if (last == NULL) {
4636 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004637 if (ret != NULL)
4638 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004639 ret = cur = op;
4640 } else {
4641 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004642 if (op != NULL)
4643 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004644 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004645 if (last != NULL)
4646 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004647 cur =op;
4648 last = NULL;
4649 }
4650 } else if (RAW == '|') {
4651 if (type == 0) type = CUR;
4652
4653 /*
4654 * Detect "Name , Name | Name" error
4655 */
4656 else if (type != CUR) {
4657 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4659 ctxt->sax->error(ctxt->userData,
4660 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4661 type);
4662 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004663 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004664 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004665 xmlFreeElementContent(last);
4666 if (ret != NULL)
4667 xmlFreeElementContent(ret);
4668 return(NULL);
4669 }
4670 NEXT;
4671
4672 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4673 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004674 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004675 xmlFreeElementContent(last);
4676 if (ret != NULL)
4677 xmlFreeElementContent(ret);
4678 return(NULL);
4679 }
4680 if (last == NULL) {
4681 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004682 if (ret != NULL)
4683 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004684 ret = cur = op;
4685 } else {
4686 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004687 if (op != NULL)
4688 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004689 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004690 if (last != NULL)
4691 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004692 cur =op;
4693 last = NULL;
4694 }
4695 } else {
4696 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4698 ctxt->sax->error(ctxt->userData,
4699 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4700 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004701 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004702 if (ret != NULL)
4703 xmlFreeElementContent(ret);
4704 return(NULL);
4705 }
4706 GROW;
4707 SKIP_BLANKS;
4708 GROW;
4709 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004710 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004711 /* Recurse on second child */
4712 NEXT;
4713 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004714 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004715 SKIP_BLANKS;
4716 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004717 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004718 if (elem == NULL) {
4719 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4721 ctxt->sax->error(ctxt->userData,
4722 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4723 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004724 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004725 if (ret != NULL)
4726 xmlFreeElementContent(ret);
4727 return(NULL);
4728 }
4729 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4730 xmlFree(elem);
4731 if (RAW == '?') {
4732 last->ocur = XML_ELEMENT_CONTENT_OPT;
4733 NEXT;
4734 } else if (RAW == '*') {
4735 last->ocur = XML_ELEMENT_CONTENT_MULT;
4736 NEXT;
4737 } else if (RAW == '+') {
4738 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4739 NEXT;
4740 } else {
4741 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4742 }
4743 }
4744 SKIP_BLANKS;
4745 GROW;
4746 }
4747 if ((cur != NULL) && (last != NULL)) {
4748 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004749 if (last != NULL)
4750 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004751 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004752 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4753 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4754 if (ctxt->vctxt.error != NULL)
4755 ctxt->vctxt.error(ctxt->vctxt.userData,
4756"Element content declaration doesn't start and stop in the same entity\n");
4757 ctxt->valid = 0;
4758 }
Owen Taylor3473f882001-02-23 17:55:21 +00004759 NEXT;
4760 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004761 if (ret != NULL)
4762 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004763 NEXT;
4764 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004765 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004766 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004767 cur = ret;
4768 /*
4769 * Some normalization:
4770 * (a | b* | c?)* == (a | b | c)*
4771 */
4772 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4773 if ((cur->c1 != NULL) &&
4774 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4775 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4776 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4777 if ((cur->c2 != NULL) &&
4778 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4779 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4780 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4781 cur = cur->c2;
4782 }
4783 }
Owen Taylor3473f882001-02-23 17:55:21 +00004784 NEXT;
4785 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004786 if (ret != NULL) {
4787 int found = 0;
4788
Daniel Veillarde470df72001-04-18 21:41:07 +00004789 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004790 /*
4791 * Some normalization:
4792 * (a | b*)+ == (a | b)*
4793 * (a | b?)+ == (a | b)*
4794 */
4795 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4796 if ((cur->c1 != NULL) &&
4797 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4798 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4799 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4800 found = 1;
4801 }
4802 if ((cur->c2 != NULL) &&
4803 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4804 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4805 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4806 found = 1;
4807 }
4808 cur = cur->c2;
4809 }
4810 if (found)
4811 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4812 }
Owen Taylor3473f882001-02-23 17:55:21 +00004813 NEXT;
4814 }
4815 return(ret);
4816}
4817
4818/**
4819 * xmlParseElementContentDecl:
4820 * @ctxt: an XML parser context
4821 * @name: the name of the element being defined.
4822 * @result: the Element Content pointer will be stored here if any
4823 *
4824 * parse the declaration for an Element content either Mixed or Children,
4825 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4826 *
4827 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4828 *
4829 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4830 */
4831
4832int
4833xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4834 xmlElementContentPtr *result) {
4835
4836 xmlElementContentPtr tree = NULL;
4837 xmlParserInputPtr input = ctxt->input;
4838 int res;
4839
4840 *result = NULL;
4841
4842 if (RAW != '(') {
4843 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4844 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4845 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004846 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004847 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004848 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004849 return(-1);
4850 }
4851 NEXT;
4852 GROW;
4853 SKIP_BLANKS;
4854 if ((RAW == '#') && (NXT(1) == 'P') &&
4855 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4856 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4857 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004858 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004859 res = XML_ELEMENT_TYPE_MIXED;
4860 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004861 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004862 res = XML_ELEMENT_TYPE_ELEMENT;
4863 }
Owen Taylor3473f882001-02-23 17:55:21 +00004864 SKIP_BLANKS;
4865 *result = tree;
4866 return(res);
4867}
4868
4869/**
4870 * xmlParseElementDecl:
4871 * @ctxt: an XML parser context
4872 *
4873 * parse an Element declaration.
4874 *
4875 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4876 *
4877 * [ VC: Unique Element Type Declaration ]
4878 * No element type may be declared more than once
4879 *
4880 * Returns the type of the element, or -1 in case of error
4881 */
4882int
4883xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4884 xmlChar *name;
4885 int ret = -1;
4886 xmlElementContentPtr content = NULL;
4887
4888 GROW;
4889 if ((RAW == '<') && (NXT(1) == '!') &&
4890 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4891 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4892 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4893 (NXT(8) == 'T')) {
4894 xmlParserInputPtr input = ctxt->input;
4895
4896 SKIP(9);
4897 if (!IS_BLANK(CUR)) {
4898 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4900 ctxt->sax->error(ctxt->userData,
4901 "Space required after 'ELEMENT'\n");
4902 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004903 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004904 }
4905 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004906 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004907 if (name == NULL) {
4908 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4910 ctxt->sax->error(ctxt->userData,
4911 "xmlParseElementDecl: no name for Element\n");
4912 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004913 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004914 return(-1);
4915 }
4916 while ((RAW == 0) && (ctxt->inputNr > 1))
4917 xmlPopInput(ctxt);
4918 if (!IS_BLANK(CUR)) {
4919 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4921 ctxt->sax->error(ctxt->userData,
4922 "Space required after the element name\n");
4923 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004924 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004925 }
4926 SKIP_BLANKS;
4927 if ((RAW == 'E') && (NXT(1) == 'M') &&
4928 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4929 (NXT(4) == 'Y')) {
4930 SKIP(5);
4931 /*
4932 * Element must always be empty.
4933 */
4934 ret = XML_ELEMENT_TYPE_EMPTY;
4935 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4936 (NXT(2) == 'Y')) {
4937 SKIP(3);
4938 /*
4939 * Element is a generic container.
4940 */
4941 ret = XML_ELEMENT_TYPE_ANY;
4942 } else if (RAW == '(') {
4943 ret = xmlParseElementContentDecl(ctxt, name, &content);
4944 } else {
4945 /*
4946 * [ WFC: PEs in Internal Subset ] error handling.
4947 */
4948 if ((RAW == '%') && (ctxt->external == 0) &&
4949 (ctxt->inputNr == 1)) {
4950 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4952 ctxt->sax->error(ctxt->userData,
4953 "PEReference: forbidden within markup decl in internal subset\n");
4954 } else {
4955 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4957 ctxt->sax->error(ctxt->userData,
4958 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4959 }
4960 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004961 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004962 if (name != NULL) xmlFree(name);
4963 return(-1);
4964 }
4965
4966 SKIP_BLANKS;
4967 /*
4968 * Pop-up of finished entities.
4969 */
4970 while ((RAW == 0) && (ctxt->inputNr > 1))
4971 xmlPopInput(ctxt);
4972 SKIP_BLANKS;
4973
4974 if (RAW != '>') {
4975 ctxt->errNo = XML_ERR_GT_REQUIRED;
4976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4977 ctxt->sax->error(ctxt->userData,
4978 "xmlParseElementDecl: expected '>' at the end\n");
4979 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004980 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004981 } else {
4982 if (input != ctxt->input) {
4983 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4985 ctxt->sax->error(ctxt->userData,
4986"Element declaration doesn't start and stop in the same entity\n");
4987 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004988 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004989 }
4990
4991 NEXT;
4992 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4993 (ctxt->sax->elementDecl != NULL))
4994 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4995 content);
4996 }
4997 if (content != NULL) {
4998 xmlFreeElementContent(content);
4999 }
5000 if (name != NULL) {
5001 xmlFree(name);
5002 }
5003 }
5004 return(ret);
5005}
5006
5007/**
Owen Taylor3473f882001-02-23 17:55:21 +00005008 * xmlParseConditionalSections
5009 * @ctxt: an XML parser context
5010 *
5011 * [61] conditionalSect ::= includeSect | ignoreSect
5012 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5013 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5014 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5015 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5016 */
5017
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005018static void
Owen Taylor3473f882001-02-23 17:55:21 +00005019xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5020 SKIP(3);
5021 SKIP_BLANKS;
5022 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5023 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5024 (NXT(6) == 'E')) {
5025 SKIP(7);
5026 SKIP_BLANKS;
5027 if (RAW != '[') {
5028 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5030 ctxt->sax->error(ctxt->userData,
5031 "XML conditional section '[' expected\n");
5032 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005033 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005034 } else {
5035 NEXT;
5036 }
5037 if (xmlParserDebugEntities) {
5038 if ((ctxt->input != NULL) && (ctxt->input->filename))
5039 xmlGenericError(xmlGenericErrorContext,
5040 "%s(%d): ", ctxt->input->filename,
5041 ctxt->input->line);
5042 xmlGenericError(xmlGenericErrorContext,
5043 "Entering INCLUDE Conditional Section\n");
5044 }
5045
5046 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5047 (NXT(2) != '>'))) {
5048 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005049 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005050
5051 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5052 xmlParseConditionalSections(ctxt);
5053 } else if (IS_BLANK(CUR)) {
5054 NEXT;
5055 } else if (RAW == '%') {
5056 xmlParsePEReference(ctxt);
5057 } else
5058 xmlParseMarkupDecl(ctxt);
5059
5060 /*
5061 * Pop-up of finished entities.
5062 */
5063 while ((RAW == 0) && (ctxt->inputNr > 1))
5064 xmlPopInput(ctxt);
5065
Daniel Veillardfdc91562002-07-01 21:52:03 +00005066 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005067 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5069 ctxt->sax->error(ctxt->userData,
5070 "Content error in the external subset\n");
5071 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005072 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005073 break;
5074 }
5075 }
5076 if (xmlParserDebugEntities) {
5077 if ((ctxt->input != NULL) && (ctxt->input->filename))
5078 xmlGenericError(xmlGenericErrorContext,
5079 "%s(%d): ", ctxt->input->filename,
5080 ctxt->input->line);
5081 xmlGenericError(xmlGenericErrorContext,
5082 "Leaving INCLUDE Conditional Section\n");
5083 }
5084
5085 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5086 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5087 int state;
5088 int instate;
5089 int depth = 0;
5090
5091 SKIP(6);
5092 SKIP_BLANKS;
5093 if (RAW != '[') {
5094 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5096 ctxt->sax->error(ctxt->userData,
5097 "XML conditional section '[' expected\n");
5098 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005099 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005100 } else {
5101 NEXT;
5102 }
5103 if (xmlParserDebugEntities) {
5104 if ((ctxt->input != NULL) && (ctxt->input->filename))
5105 xmlGenericError(xmlGenericErrorContext,
5106 "%s(%d): ", ctxt->input->filename,
5107 ctxt->input->line);
5108 xmlGenericError(xmlGenericErrorContext,
5109 "Entering IGNORE Conditional Section\n");
5110 }
5111
5112 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005113 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005114 * But disable SAX event generating DTD building in the meantime
5115 */
5116 state = ctxt->disableSAX;
5117 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005118 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005119 ctxt->instate = XML_PARSER_IGNORE;
5120
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005121 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005122 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5123 depth++;
5124 SKIP(3);
5125 continue;
5126 }
5127 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5128 if (--depth >= 0) SKIP(3);
5129 continue;
5130 }
5131 NEXT;
5132 continue;
5133 }
5134
5135 ctxt->disableSAX = state;
5136 ctxt->instate = instate;
5137
5138 if (xmlParserDebugEntities) {
5139 if ((ctxt->input != NULL) && (ctxt->input->filename))
5140 xmlGenericError(xmlGenericErrorContext,
5141 "%s(%d): ", ctxt->input->filename,
5142 ctxt->input->line);
5143 xmlGenericError(xmlGenericErrorContext,
5144 "Leaving IGNORE Conditional Section\n");
5145 }
5146
5147 } else {
5148 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5150 ctxt->sax->error(ctxt->userData,
5151 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5152 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005153 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005154 }
5155
5156 if (RAW == 0)
5157 SHRINK;
5158
5159 if (RAW == 0) {
5160 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5162 ctxt->sax->error(ctxt->userData,
5163 "XML conditional section not closed\n");
5164 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005165 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005166 } else {
5167 SKIP(3);
5168 }
5169}
5170
5171/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005172 * xmlParseMarkupDecl:
5173 * @ctxt: an XML parser context
5174 *
5175 * parse Markup declarations
5176 *
5177 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5178 * NotationDecl | PI | Comment
5179 *
5180 * [ VC: Proper Declaration/PE Nesting ]
5181 * Parameter-entity replacement text must be properly nested with
5182 * markup declarations. That is to say, if either the first character
5183 * or the last character of a markup declaration (markupdecl above) is
5184 * contained in the replacement text for a parameter-entity reference,
5185 * both must be contained in the same replacement text.
5186 *
5187 * [ WFC: PEs in Internal Subset ]
5188 * In the internal DTD subset, parameter-entity references can occur
5189 * only where markup declarations can occur, not within markup declarations.
5190 * (This does not apply to references that occur in external parameter
5191 * entities or to the external subset.)
5192 */
5193void
5194xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5195 GROW;
5196 xmlParseElementDecl(ctxt);
5197 xmlParseAttributeListDecl(ctxt);
5198 xmlParseEntityDecl(ctxt);
5199 xmlParseNotationDecl(ctxt);
5200 xmlParsePI(ctxt);
5201 xmlParseComment(ctxt);
5202 /*
5203 * This is only for internal subset. On external entities,
5204 * the replacement is done before parsing stage
5205 */
5206 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5207 xmlParsePEReference(ctxt);
5208
5209 /*
5210 * Conditional sections are allowed from entities included
5211 * by PE References in the internal subset.
5212 */
5213 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5214 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5215 xmlParseConditionalSections(ctxt);
5216 }
5217 }
5218
5219 ctxt->instate = XML_PARSER_DTD;
5220}
5221
5222/**
5223 * xmlParseTextDecl:
5224 * @ctxt: an XML parser context
5225 *
5226 * parse an XML declaration header for external entities
5227 *
5228 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5229 *
5230 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5231 */
5232
5233void
5234xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5235 xmlChar *version;
5236
5237 /*
5238 * We know that '<?xml' is here.
5239 */
5240 if ((RAW == '<') && (NXT(1) == '?') &&
5241 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5242 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5243 SKIP(5);
5244 } else {
5245 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5247 ctxt->sax->error(ctxt->userData,
5248 "Text declaration '<?xml' required\n");
5249 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005250 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005251
5252 return;
5253 }
5254
5255 if (!IS_BLANK(CUR)) {
5256 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5258 ctxt->sax->error(ctxt->userData,
5259 "Space needed after '<?xml'\n");
5260 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005261 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005262 }
5263 SKIP_BLANKS;
5264
5265 /*
5266 * We may have the VersionInfo here.
5267 */
5268 version = xmlParseVersionInfo(ctxt);
5269 if (version == NULL)
5270 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005271 else {
5272 if (!IS_BLANK(CUR)) {
5273 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5275 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5276 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005277 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005278 }
5279 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005280 ctxt->input->version = version;
5281
5282 /*
5283 * We must have the encoding declaration
5284 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005285 xmlParseEncodingDecl(ctxt);
5286 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5287 /*
5288 * The XML REC instructs us to stop parsing right here
5289 */
5290 return;
5291 }
5292
5293 SKIP_BLANKS;
5294 if ((RAW == '?') && (NXT(1) == '>')) {
5295 SKIP(2);
5296 } else if (RAW == '>') {
5297 /* Deprecated old WD ... */
5298 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5300 ctxt->sax->error(ctxt->userData,
5301 "XML declaration must end-up with '?>'\n");
5302 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005303 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005304 NEXT;
5305 } else {
5306 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5307 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5308 ctxt->sax->error(ctxt->userData,
5309 "parsing XML declaration: '?>' expected\n");
5310 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005311 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005312 MOVETO_ENDTAG(CUR_PTR);
5313 NEXT;
5314 }
5315}
5316
5317/**
Owen Taylor3473f882001-02-23 17:55:21 +00005318 * xmlParseExternalSubset:
5319 * @ctxt: an XML parser context
5320 * @ExternalID: the external identifier
5321 * @SystemID: the system identifier (or URL)
5322 *
5323 * parse Markup declarations from an external subset
5324 *
5325 * [30] extSubset ::= textDecl? extSubsetDecl
5326 *
5327 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5328 */
5329void
5330xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5331 const xmlChar *SystemID) {
5332 GROW;
5333 if ((RAW == '<') && (NXT(1) == '?') &&
5334 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5335 (NXT(4) == 'l')) {
5336 xmlParseTextDecl(ctxt);
5337 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5338 /*
5339 * The XML REC instructs us to stop parsing right here
5340 */
5341 ctxt->instate = XML_PARSER_EOF;
5342 return;
5343 }
5344 }
5345 if (ctxt->myDoc == NULL) {
5346 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5347 }
5348 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5349 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5350
5351 ctxt->instate = XML_PARSER_DTD;
5352 ctxt->external = 1;
5353 while (((RAW == '<') && (NXT(1) == '?')) ||
5354 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005355 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005356 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005357 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005358
5359 GROW;
5360 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5361 xmlParseConditionalSections(ctxt);
5362 } else if (IS_BLANK(CUR)) {
5363 NEXT;
5364 } else if (RAW == '%') {
5365 xmlParsePEReference(ctxt);
5366 } else
5367 xmlParseMarkupDecl(ctxt);
5368
5369 /*
5370 * Pop-up of finished entities.
5371 */
5372 while ((RAW == 0) && (ctxt->inputNr > 1))
5373 xmlPopInput(ctxt);
5374
Daniel Veillardfdc91562002-07-01 21:52:03 +00005375 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005376 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5378 ctxt->sax->error(ctxt->userData,
5379 "Content error in the external subset\n");
5380 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005381 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005382 break;
5383 }
5384 }
5385
5386 if (RAW != 0) {
5387 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5389 ctxt->sax->error(ctxt->userData,
5390 "Extra content at the end of the document\n");
5391 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005392 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005393 }
5394
5395}
5396
5397/**
5398 * xmlParseReference:
5399 * @ctxt: an XML parser context
5400 *
5401 * parse and handle entity references in content, depending on the SAX
5402 * interface, this may end-up in a call to character() if this is a
5403 * CharRef, a predefined entity, if there is no reference() callback.
5404 * or if the parser was asked to switch to that mode.
5405 *
5406 * [67] Reference ::= EntityRef | CharRef
5407 */
5408void
5409xmlParseReference(xmlParserCtxtPtr ctxt) {
5410 xmlEntityPtr ent;
5411 xmlChar *val;
5412 if (RAW != '&') return;
5413
5414 if (NXT(1) == '#') {
5415 int i = 0;
5416 xmlChar out[10];
5417 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005418 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005419
5420 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5421 /*
5422 * So we are using non-UTF-8 buffers
5423 * Check that the char fit on 8bits, if not
5424 * generate a CharRef.
5425 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005426 if (value <= 0xFF) {
5427 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005428 out[1] = 0;
5429 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5430 (!ctxt->disableSAX))
5431 ctxt->sax->characters(ctxt->userData, out, 1);
5432 } else {
5433 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005434 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005435 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005436 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005437 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5438 (!ctxt->disableSAX))
5439 ctxt->sax->reference(ctxt->userData, out);
5440 }
5441 } else {
5442 /*
5443 * Just encode the value in UTF-8
5444 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005445 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005446 out[i] = 0;
5447 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5448 (!ctxt->disableSAX))
5449 ctxt->sax->characters(ctxt->userData, out, i);
5450 }
5451 } else {
5452 ent = xmlParseEntityRef(ctxt);
5453 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005454 if (!ctxt->wellFormed)
5455 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005456 if ((ent->name != NULL) &&
5457 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5458 xmlNodePtr list = NULL;
5459 int ret;
5460
5461
5462 /*
5463 * The first reference to the entity trigger a parsing phase
5464 * where the ent->children is filled with the result from
5465 * the parsing.
5466 */
5467 if (ent->children == NULL) {
5468 xmlChar *value;
5469 value = ent->content;
5470
5471 /*
5472 * Check that this entity is well formed
5473 */
5474 if ((value != NULL) &&
5475 (value[1] == 0) && (value[0] == '<') &&
5476 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5477 /*
5478 * DONE: get definite answer on this !!!
5479 * Lots of entity decls are used to declare a single
5480 * char
5481 * <!ENTITY lt "<">
5482 * Which seems to be valid since
5483 * 2.4: The ampersand character (&) and the left angle
5484 * bracket (<) may appear in their literal form only
5485 * when used ... They are also legal within the literal
5486 * entity value of an internal entity declaration;i
5487 * see "4.3.2 Well-Formed Parsed Entities".
5488 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5489 * Looking at the OASIS test suite and James Clark
5490 * tests, this is broken. However the XML REC uses
5491 * it. Is the XML REC not well-formed ????
5492 * This is a hack to avoid this problem
5493 *
5494 * ANSWER: since lt gt amp .. are already defined,
5495 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005496 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005497 * is lousy but acceptable.
5498 */
5499 list = xmlNewDocText(ctxt->myDoc, value);
5500 if (list != NULL) {
5501 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5502 (ent->children == NULL)) {
5503 ent->children = list;
5504 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005505 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005506 list->parent = (xmlNodePtr) ent;
5507 } else {
5508 xmlFreeNodeList(list);
5509 }
5510 } else if (list != NULL) {
5511 xmlFreeNodeList(list);
5512 }
5513 } else {
5514 /*
5515 * 4.3.2: An internal general parsed entity is well-formed
5516 * if its replacement text matches the production labeled
5517 * content.
5518 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005519
5520 void *user_data;
5521 /*
5522 * This is a bit hackish but this seems the best
5523 * way to make sure both SAX and DOM entity support
5524 * behaves okay.
5525 */
5526 if (ctxt->userData == ctxt)
5527 user_data = NULL;
5528 else
5529 user_data = ctxt->userData;
5530
Owen Taylor3473f882001-02-23 17:55:21 +00005531 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5532 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005533 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5534 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005535 ctxt->depth--;
5536 } else if (ent->etype ==
5537 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5538 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005539 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005540 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005541 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005542 ctxt->depth--;
5543 } else {
5544 ret = -1;
5545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5546 ctxt->sax->error(ctxt->userData,
5547 "Internal: invalid entity type\n");
5548 }
5549 if (ret == XML_ERR_ENTITY_LOOP) {
5550 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5552 ctxt->sax->error(ctxt->userData,
5553 "Detected entity reference loop\n");
5554 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005555 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005556 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005557 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005558 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5559 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005560 (ent->children == NULL)) {
5561 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005562 if (ctxt->replaceEntities) {
5563 /*
5564 * Prune it directly in the generated document
5565 * except for single text nodes.
5566 */
5567 if ((list->type == XML_TEXT_NODE) &&
5568 (list->next == NULL)) {
5569 list->parent = (xmlNodePtr) ent;
5570 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005571 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005572 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005573 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005574 while (list != NULL) {
5575 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005576 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005577 if (list->next == NULL)
5578 ent->last = list;
5579 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005580 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005581 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005582 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5583 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005584 }
5585 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005586 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005587 while (list != NULL) {
5588 list->parent = (xmlNodePtr) ent;
5589 if (list->next == NULL)
5590 ent->last = list;
5591 list = list->next;
5592 }
Owen Taylor3473f882001-02-23 17:55:21 +00005593 }
5594 } else {
5595 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005596 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005597 }
5598 } else if (ret > 0) {
5599 ctxt->errNo = ret;
5600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5601 ctxt->sax->error(ctxt->userData,
5602 "Entity value required\n");
5603 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005604 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005605 } else if (list != NULL) {
5606 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005607 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005608 }
5609 }
5610 }
5611 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5612 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5613 /*
5614 * Create a node.
5615 */
5616 ctxt->sax->reference(ctxt->userData, ent->name);
5617 return;
5618 } else if (ctxt->replaceEntities) {
5619 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5620 /*
5621 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005622 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005623 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005624 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005625 if ((list == NULL) && (ent->owner == 0)) {
5626 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005627 cur = ent->children;
5628 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005629 nw = xmlCopyNode(cur, 1);
5630 if (nw != NULL) {
5631 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005632 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005633 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005634 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005635 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005636 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005637 if (cur == ent->last)
5638 break;
5639 cur = cur->next;
5640 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005641 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005642 xmlAddEntityReference(ent, firstChild, nw);
5643 } else if (list == NULL) {
5644 xmlNodePtr nw = NULL, cur, next, last,
5645 firstChild = NULL;
5646 /*
5647 * Copy the entity child list and make it the new
5648 * entity child list. The goal is to make sure any
5649 * ID or REF referenced will be the one from the
5650 * document content and not the entity copy.
5651 */
5652 cur = ent->children;
5653 ent->children = NULL;
5654 last = ent->last;
5655 ent->last = NULL;
5656 while (cur != NULL) {
5657 next = cur->next;
5658 cur->next = NULL;
5659 cur->parent = NULL;
5660 nw = xmlCopyNode(cur, 1);
5661 if (nw != NULL) {
5662 nw->_private = cur->_private;
5663 if (firstChild == NULL){
5664 firstChild = cur;
5665 }
5666 xmlAddChild((xmlNodePtr) ent, nw);
5667 xmlAddChild(ctxt->node, cur);
5668 }
5669 if (cur == last)
5670 break;
5671 cur = next;
5672 }
5673 ent->owner = 1;
5674 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5675 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005676 } else {
5677 /*
5678 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005679 * node with a possible previous text one which
5680 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005681 */
5682 if (ent->children->type == XML_TEXT_NODE)
5683 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5684 if ((ent->last != ent->children) &&
5685 (ent->last->type == XML_TEXT_NODE))
5686 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5687 xmlAddChildList(ctxt->node, ent->children);
5688 }
5689
Owen Taylor3473f882001-02-23 17:55:21 +00005690 /*
5691 * This is to avoid a nasty side effect, see
5692 * characters() in SAX.c
5693 */
5694 ctxt->nodemem = 0;
5695 ctxt->nodelen = 0;
5696 return;
5697 } else {
5698 /*
5699 * Probably running in SAX mode
5700 */
5701 xmlParserInputPtr input;
5702
5703 input = xmlNewEntityInputStream(ctxt, ent);
5704 xmlPushInput(ctxt, input);
5705 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5706 (RAW == '<') && (NXT(1) == '?') &&
5707 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5708 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5709 xmlParseTextDecl(ctxt);
5710 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5711 /*
5712 * The XML REC instructs us to stop parsing right here
5713 */
5714 ctxt->instate = XML_PARSER_EOF;
5715 return;
5716 }
5717 if (input->standalone == 1) {
5718 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5720 ctxt->sax->error(ctxt->userData,
5721 "external parsed entities cannot be standalone\n");
5722 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005723 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005724 }
5725 }
5726 return;
5727 }
5728 }
5729 } else {
5730 val = ent->content;
5731 if (val == NULL) return;
5732 /*
5733 * inline the entity.
5734 */
5735 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5736 (!ctxt->disableSAX))
5737 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5738 }
5739 }
5740}
5741
5742/**
5743 * xmlParseEntityRef:
5744 * @ctxt: an XML parser context
5745 *
5746 * parse ENTITY references declarations
5747 *
5748 * [68] EntityRef ::= '&' Name ';'
5749 *
5750 * [ WFC: Entity Declared ]
5751 * In a document without any DTD, a document with only an internal DTD
5752 * subset which contains no parameter entity references, or a document
5753 * with "standalone='yes'", the Name given in the entity reference
5754 * must match that in an entity declaration, except that well-formed
5755 * documents need not declare any of the following entities: amp, lt,
5756 * gt, apos, quot. The declaration of a parameter entity must precede
5757 * any reference to it. Similarly, the declaration of a general entity
5758 * must precede any reference to it which appears in a default value in an
5759 * attribute-list declaration. Note that if entities are declared in the
5760 * external subset or in external parameter entities, a non-validating
5761 * processor is not obligated to read and process their declarations;
5762 * for such documents, the rule that an entity must be declared is a
5763 * well-formedness constraint only if standalone='yes'.
5764 *
5765 * [ WFC: Parsed Entity ]
5766 * An entity reference must not contain the name of an unparsed entity
5767 *
5768 * Returns the xmlEntityPtr if found, or NULL otherwise.
5769 */
5770xmlEntityPtr
5771xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5772 xmlChar *name;
5773 xmlEntityPtr ent = NULL;
5774
5775 GROW;
5776
5777 if (RAW == '&') {
5778 NEXT;
5779 name = xmlParseName(ctxt);
5780 if (name == NULL) {
5781 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5783 ctxt->sax->error(ctxt->userData,
5784 "xmlParseEntityRef: no name\n");
5785 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005786 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005787 } else {
5788 if (RAW == ';') {
5789 NEXT;
5790 /*
5791 * Ask first SAX for entity resolution, otherwise try the
5792 * predefined set.
5793 */
5794 if (ctxt->sax != NULL) {
5795 if (ctxt->sax->getEntity != NULL)
5796 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005797 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005798 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005799 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5800 (ctxt->userData==ctxt)) {
Daniel Veillard5997aca2002-03-18 18:36:20 +00005801 ent = getEntity(ctxt, name);
5802 }
Owen Taylor3473f882001-02-23 17:55:21 +00005803 }
5804 /*
5805 * [ WFC: Entity Declared ]
5806 * In a document without any DTD, a document with only an
5807 * internal DTD subset which contains no parameter entity
5808 * references, or a document with "standalone='yes'", the
5809 * Name given in the entity reference must match that in an
5810 * entity declaration, except that well-formed documents
5811 * need not declare any of the following entities: amp, lt,
5812 * gt, apos, quot.
5813 * The declaration of a parameter entity must precede any
5814 * reference to it.
5815 * Similarly, the declaration of a general entity must
5816 * precede any reference to it which appears in a default
5817 * value in an attribute-list declaration. Note that if
5818 * entities are declared in the external subset or in
5819 * external parameter entities, a non-validating processor
5820 * is not obligated to read and process their declarations;
5821 * for such documents, the rule that an entity must be
5822 * declared is a well-formedness constraint only if
5823 * standalone='yes'.
5824 */
5825 if (ent == NULL) {
5826 if ((ctxt->standalone == 1) ||
5827 ((ctxt->hasExternalSubset == 0) &&
5828 (ctxt->hasPErefs == 0))) {
5829 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5831 ctxt->sax->error(ctxt->userData,
5832 "Entity '%s' not defined\n", name);
5833 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005834 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005835 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005836 } else {
5837 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005839 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005840 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005841 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005842 }
5843 }
5844
5845 /*
5846 * [ WFC: Parsed Entity ]
5847 * An entity reference must not contain the name of an
5848 * unparsed entity
5849 */
5850 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5851 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5852 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5853 ctxt->sax->error(ctxt->userData,
5854 "Entity reference to unparsed entity %s\n", name);
5855 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005856 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005857 }
5858
5859 /*
5860 * [ WFC: No External Entity References ]
5861 * Attribute values cannot contain direct or indirect
5862 * entity references to external entities.
5863 */
5864 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5865 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5866 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5868 ctxt->sax->error(ctxt->userData,
5869 "Attribute references external entity '%s'\n", name);
5870 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005871 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005872 }
5873 /*
5874 * [ WFC: No < in Attribute Values ]
5875 * The replacement text of any entity referred to directly or
5876 * indirectly in an attribute value (other than "&lt;") must
5877 * not contain a <.
5878 */
5879 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5880 (ent != NULL) &&
5881 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5882 (ent->content != NULL) &&
5883 (xmlStrchr(ent->content, '<'))) {
5884 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5885 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5886 ctxt->sax->error(ctxt->userData,
5887 "'<' in entity '%s' is not allowed in attributes values\n", name);
5888 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005889 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005890 }
5891
5892 /*
5893 * Internal check, no parameter entities here ...
5894 */
5895 else {
5896 switch (ent->etype) {
5897 case XML_INTERNAL_PARAMETER_ENTITY:
5898 case XML_EXTERNAL_PARAMETER_ENTITY:
5899 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5901 ctxt->sax->error(ctxt->userData,
5902 "Attempt to reference the parameter entity '%s'\n", name);
5903 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005904 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005905 break;
5906 default:
5907 break;
5908 }
5909 }
5910
5911 /*
5912 * [ WFC: No Recursion ]
5913 * A parsed entity must not contain a recursive reference
5914 * to itself, either directly or indirectly.
5915 * Done somewhere else
5916 */
5917
5918 } else {
5919 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5921 ctxt->sax->error(ctxt->userData,
5922 "xmlParseEntityRef: expecting ';'\n");
5923 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005924 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005925 }
5926 xmlFree(name);
5927 }
5928 }
5929 return(ent);
5930}
5931
5932/**
5933 * xmlParseStringEntityRef:
5934 * @ctxt: an XML parser context
5935 * @str: a pointer to an index in the string
5936 *
5937 * parse ENTITY references declarations, but this version parses it from
5938 * a string value.
5939 *
5940 * [68] EntityRef ::= '&' Name ';'
5941 *
5942 * [ WFC: Entity Declared ]
5943 * In a document without any DTD, a document with only an internal DTD
5944 * subset which contains no parameter entity references, or a document
5945 * with "standalone='yes'", the Name given in the entity reference
5946 * must match that in an entity declaration, except that well-formed
5947 * documents need not declare any of the following entities: amp, lt,
5948 * gt, apos, quot. The declaration of a parameter entity must precede
5949 * any reference to it. Similarly, the declaration of a general entity
5950 * must precede any reference to it which appears in a default value in an
5951 * attribute-list declaration. Note that if entities are declared in the
5952 * external subset or in external parameter entities, a non-validating
5953 * processor is not obligated to read and process their declarations;
5954 * for such documents, the rule that an entity must be declared is a
5955 * well-formedness constraint only if standalone='yes'.
5956 *
5957 * [ WFC: Parsed Entity ]
5958 * An entity reference must not contain the name of an unparsed entity
5959 *
5960 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5961 * is updated to the current location in the string.
5962 */
5963xmlEntityPtr
5964xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5965 xmlChar *name;
5966 const xmlChar *ptr;
5967 xmlChar cur;
5968 xmlEntityPtr ent = NULL;
5969
5970 if ((str == NULL) || (*str == NULL))
5971 return(NULL);
5972 ptr = *str;
5973 cur = *ptr;
5974 if (cur == '&') {
5975 ptr++;
5976 cur = *ptr;
5977 name = xmlParseStringName(ctxt, &ptr);
5978 if (name == NULL) {
5979 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5981 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005982 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005983 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005984 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005985 } else {
5986 if (*ptr == ';') {
5987 ptr++;
5988 /*
5989 * Ask first SAX for entity resolution, otherwise try the
5990 * predefined set.
5991 */
5992 if (ctxt->sax != NULL) {
5993 if (ctxt->sax->getEntity != NULL)
5994 ent = ctxt->sax->getEntity(ctxt->userData, name);
5995 if (ent == NULL)
5996 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005997 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5998 ent = getEntity(ctxt, name);
5999 }
Owen Taylor3473f882001-02-23 17:55:21 +00006000 }
6001 /*
6002 * [ WFC: Entity Declared ]
6003 * In a document without any DTD, a document with only an
6004 * internal DTD subset which contains no parameter entity
6005 * references, or a document with "standalone='yes'", the
6006 * Name given in the entity reference must match that in an
6007 * entity declaration, except that well-formed documents
6008 * need not declare any of the following entities: amp, lt,
6009 * gt, apos, quot.
6010 * The declaration of a parameter entity must precede any
6011 * reference to it.
6012 * Similarly, the declaration of a general entity must
6013 * precede any reference to it which appears in a default
6014 * value in an attribute-list declaration. Note that if
6015 * entities are declared in the external subset or in
6016 * external parameter entities, a non-validating processor
6017 * is not obligated to read and process their declarations;
6018 * for such documents, the rule that an entity must be
6019 * declared is a well-formedness constraint only if
6020 * standalone='yes'.
6021 */
6022 if (ent == NULL) {
6023 if ((ctxt->standalone == 1) ||
6024 ((ctxt->hasExternalSubset == 0) &&
6025 (ctxt->hasPErefs == 0))) {
6026 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6028 ctxt->sax->error(ctxt->userData,
6029 "Entity '%s' not defined\n", name);
6030 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006031 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006032 } else {
6033 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6034 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6035 ctxt->sax->warning(ctxt->userData,
6036 "Entity '%s' not defined\n", name);
6037 }
6038 }
6039
6040 /*
6041 * [ WFC: Parsed Entity ]
6042 * An entity reference must not contain the name of an
6043 * unparsed entity
6044 */
6045 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6046 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6048 ctxt->sax->error(ctxt->userData,
6049 "Entity reference to unparsed entity %s\n", name);
6050 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006051 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006052 }
6053
6054 /*
6055 * [ WFC: No External Entity References ]
6056 * Attribute values cannot contain direct or indirect
6057 * entity references to external entities.
6058 */
6059 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6060 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6061 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6063 ctxt->sax->error(ctxt->userData,
6064 "Attribute references external entity '%s'\n", name);
6065 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006066 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006067 }
6068 /*
6069 * [ WFC: No < in Attribute Values ]
6070 * The replacement text of any entity referred to directly or
6071 * indirectly in an attribute value (other than "&lt;") must
6072 * not contain a <.
6073 */
6074 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6075 (ent != NULL) &&
6076 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6077 (ent->content != NULL) &&
6078 (xmlStrchr(ent->content, '<'))) {
6079 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6081 ctxt->sax->error(ctxt->userData,
6082 "'<' in entity '%s' is not allowed in attributes values\n", name);
6083 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006084 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006085 }
6086
6087 /*
6088 * Internal check, no parameter entities here ...
6089 */
6090 else {
6091 switch (ent->etype) {
6092 case XML_INTERNAL_PARAMETER_ENTITY:
6093 case XML_EXTERNAL_PARAMETER_ENTITY:
6094 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6096 ctxt->sax->error(ctxt->userData,
6097 "Attempt to reference the parameter entity '%s'\n", name);
6098 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006099 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006100 break;
6101 default:
6102 break;
6103 }
6104 }
6105
6106 /*
6107 * [ WFC: No Recursion ]
6108 * A parsed entity must not contain a recursive reference
6109 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006110 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006111 */
6112
6113 } else {
6114 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6116 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006117 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006118 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006119 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006120 }
6121 xmlFree(name);
6122 }
6123 }
6124 *str = ptr;
6125 return(ent);
6126}
6127
6128/**
6129 * xmlParsePEReference:
6130 * @ctxt: an XML parser context
6131 *
6132 * parse PEReference declarations
6133 * The entity content is handled directly by pushing it's content as
6134 * a new input stream.
6135 *
6136 * [69] PEReference ::= '%' Name ';'
6137 *
6138 * [ WFC: No Recursion ]
6139 * A parsed entity must not contain a recursive
6140 * reference to itself, either directly or indirectly.
6141 *
6142 * [ WFC: Entity Declared ]
6143 * In a document without any DTD, a document with only an internal DTD
6144 * subset which contains no parameter entity references, or a document
6145 * with "standalone='yes'", ... ... The declaration of a parameter
6146 * entity must precede any reference to it...
6147 *
6148 * [ VC: Entity Declared ]
6149 * In a document with an external subset or external parameter entities
6150 * with "standalone='no'", ... ... The declaration of a parameter entity
6151 * must precede any reference to it...
6152 *
6153 * [ WFC: In DTD ]
6154 * Parameter-entity references may only appear in the DTD.
6155 * NOTE: misleading but this is handled.
6156 */
6157void
6158xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6159 xmlChar *name;
6160 xmlEntityPtr entity = NULL;
6161 xmlParserInputPtr input;
6162
6163 if (RAW == '%') {
6164 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006165 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006166 if (name == NULL) {
6167 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6168 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6169 ctxt->sax->error(ctxt->userData,
6170 "xmlParsePEReference: no name\n");
6171 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006172 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006173 } else {
6174 if (RAW == ';') {
6175 NEXT;
6176 if ((ctxt->sax != NULL) &&
6177 (ctxt->sax->getParameterEntity != NULL))
6178 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6179 name);
6180 if (entity == NULL) {
6181 /*
6182 * [ WFC: Entity Declared ]
6183 * In a document without any DTD, a document with only an
6184 * internal DTD subset which contains no parameter entity
6185 * references, or a document with "standalone='yes'", ...
6186 * ... The declaration of a parameter entity must precede
6187 * any reference to it...
6188 */
6189 if ((ctxt->standalone == 1) ||
6190 ((ctxt->hasExternalSubset == 0) &&
6191 (ctxt->hasPErefs == 0))) {
6192 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6193 if ((!ctxt->disableSAX) &&
6194 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6195 ctxt->sax->error(ctxt->userData,
6196 "PEReference: %%%s; not found\n", name);
6197 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006198 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006199 } else {
6200 /*
6201 * [ VC: Entity Declared ]
6202 * In a document with an external subset or external
6203 * parameter entities with "standalone='no'", ...
6204 * ... The declaration of a parameter entity must precede
6205 * any reference to it...
6206 */
6207 if ((!ctxt->disableSAX) &&
6208 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6209 ctxt->sax->warning(ctxt->userData,
6210 "PEReference: %%%s; not found\n", name);
6211 ctxt->valid = 0;
6212 }
6213 } else {
6214 /*
6215 * Internal checking in case the entity quest barfed
6216 */
6217 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6218 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6219 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6220 ctxt->sax->warning(ctxt->userData,
6221 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006222 } else if (ctxt->input->free != deallocblankswrapper) {
6223 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6224 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006225 } else {
6226 /*
6227 * TODO !!!
6228 * handle the extra spaces added before and after
6229 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6230 */
6231 input = xmlNewEntityInputStream(ctxt, entity);
6232 xmlPushInput(ctxt, input);
6233 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6234 (RAW == '<') && (NXT(1) == '?') &&
6235 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6236 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6237 xmlParseTextDecl(ctxt);
6238 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6239 /*
6240 * The XML REC instructs us to stop parsing
6241 * right here
6242 */
6243 ctxt->instate = XML_PARSER_EOF;
6244 xmlFree(name);
6245 return;
6246 }
6247 }
Owen Taylor3473f882001-02-23 17:55:21 +00006248 }
6249 }
6250 ctxt->hasPErefs = 1;
6251 } else {
6252 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6254 ctxt->sax->error(ctxt->userData,
6255 "xmlParsePEReference: expecting ';'\n");
6256 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006257 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006258 }
6259 xmlFree(name);
6260 }
6261 }
6262}
6263
6264/**
6265 * xmlParseStringPEReference:
6266 * @ctxt: an XML parser context
6267 * @str: a pointer to an index in the string
6268 *
6269 * parse PEReference declarations
6270 *
6271 * [69] PEReference ::= '%' Name ';'
6272 *
6273 * [ WFC: No Recursion ]
6274 * A parsed entity must not contain a recursive
6275 * reference to itself, either directly or indirectly.
6276 *
6277 * [ WFC: Entity Declared ]
6278 * In a document without any DTD, a document with only an internal DTD
6279 * subset which contains no parameter entity references, or a document
6280 * with "standalone='yes'", ... ... The declaration of a parameter
6281 * entity must precede any reference to it...
6282 *
6283 * [ VC: Entity Declared ]
6284 * In a document with an external subset or external parameter entities
6285 * with "standalone='no'", ... ... The declaration of a parameter entity
6286 * must precede any reference to it...
6287 *
6288 * [ WFC: In DTD ]
6289 * Parameter-entity references may only appear in the DTD.
6290 * NOTE: misleading but this is handled.
6291 *
6292 * Returns the string of the entity content.
6293 * str is updated to the current value of the index
6294 */
6295xmlEntityPtr
6296xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6297 const xmlChar *ptr;
6298 xmlChar cur;
6299 xmlChar *name;
6300 xmlEntityPtr entity = NULL;
6301
6302 if ((str == NULL) || (*str == NULL)) return(NULL);
6303 ptr = *str;
6304 cur = *ptr;
6305 if (cur == '%') {
6306 ptr++;
6307 cur = *ptr;
6308 name = xmlParseStringName(ctxt, &ptr);
6309 if (name == NULL) {
6310 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6312 ctxt->sax->error(ctxt->userData,
6313 "xmlParseStringPEReference: no name\n");
6314 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006315 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006316 } else {
6317 cur = *ptr;
6318 if (cur == ';') {
6319 ptr++;
6320 cur = *ptr;
6321 if ((ctxt->sax != NULL) &&
6322 (ctxt->sax->getParameterEntity != NULL))
6323 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6324 name);
6325 if (entity == NULL) {
6326 /*
6327 * [ WFC: Entity Declared ]
6328 * In a document without any DTD, a document with only an
6329 * internal DTD subset which contains no parameter entity
6330 * references, or a document with "standalone='yes'", ...
6331 * ... The declaration of a parameter entity must precede
6332 * any reference to it...
6333 */
6334 if ((ctxt->standalone == 1) ||
6335 ((ctxt->hasExternalSubset == 0) &&
6336 (ctxt->hasPErefs == 0))) {
6337 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6339 ctxt->sax->error(ctxt->userData,
6340 "PEReference: %%%s; not found\n", name);
6341 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006342 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006343 } else {
6344 /*
6345 * [ VC: Entity Declared ]
6346 * In a document with an external subset or external
6347 * parameter entities with "standalone='no'", ...
6348 * ... The declaration of a parameter entity must
6349 * precede any reference to it...
6350 */
6351 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6352 ctxt->sax->warning(ctxt->userData,
6353 "PEReference: %%%s; not found\n", name);
6354 ctxt->valid = 0;
6355 }
6356 } else {
6357 /*
6358 * Internal checking in case the entity quest barfed
6359 */
6360 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6361 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6362 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6363 ctxt->sax->warning(ctxt->userData,
6364 "Internal: %%%s; is not a parameter entity\n", name);
6365 }
6366 }
6367 ctxt->hasPErefs = 1;
6368 } else {
6369 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6370 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6371 ctxt->sax->error(ctxt->userData,
6372 "xmlParseStringPEReference: expecting ';'\n");
6373 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006374 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006375 }
6376 xmlFree(name);
6377 }
6378 }
6379 *str = ptr;
6380 return(entity);
6381}
6382
6383/**
6384 * xmlParseDocTypeDecl:
6385 * @ctxt: an XML parser context
6386 *
6387 * parse a DOCTYPE declaration
6388 *
6389 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6390 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6391 *
6392 * [ VC: Root Element Type ]
6393 * The Name in the document type declaration must match the element
6394 * type of the root element.
6395 */
6396
6397void
6398xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6399 xmlChar *name = NULL;
6400 xmlChar *ExternalID = NULL;
6401 xmlChar *URI = NULL;
6402
6403 /*
6404 * We know that '<!DOCTYPE' has been detected.
6405 */
6406 SKIP(9);
6407
6408 SKIP_BLANKS;
6409
6410 /*
6411 * Parse the DOCTYPE name.
6412 */
6413 name = xmlParseName(ctxt);
6414 if (name == NULL) {
6415 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6417 ctxt->sax->error(ctxt->userData,
6418 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6419 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006420 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006421 }
6422 ctxt->intSubName = name;
6423
6424 SKIP_BLANKS;
6425
6426 /*
6427 * Check for SystemID and ExternalID
6428 */
6429 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6430
6431 if ((URI != NULL) || (ExternalID != NULL)) {
6432 ctxt->hasExternalSubset = 1;
6433 }
6434 ctxt->extSubURI = URI;
6435 ctxt->extSubSystem = ExternalID;
6436
6437 SKIP_BLANKS;
6438
6439 /*
6440 * Create and update the internal subset.
6441 */
6442 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6443 (!ctxt->disableSAX))
6444 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6445
6446 /*
6447 * Is there any internal subset declarations ?
6448 * they are handled separately in xmlParseInternalSubset()
6449 */
6450 if (RAW == '[')
6451 return;
6452
6453 /*
6454 * We should be at the end of the DOCTYPE declaration.
6455 */
6456 if (RAW != '>') {
6457 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006459 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006460 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006461 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006462 }
6463 NEXT;
6464}
6465
6466/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006467 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006468 * @ctxt: an XML parser context
6469 *
6470 * parse the internal subset declaration
6471 *
6472 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6473 */
6474
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006475static void
Owen Taylor3473f882001-02-23 17:55:21 +00006476xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6477 /*
6478 * Is there any DTD definition ?
6479 */
6480 if (RAW == '[') {
6481 ctxt->instate = XML_PARSER_DTD;
6482 NEXT;
6483 /*
6484 * Parse the succession of Markup declarations and
6485 * PEReferences.
6486 * Subsequence (markupdecl | PEReference | S)*
6487 */
6488 while (RAW != ']') {
6489 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006490 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006491
6492 SKIP_BLANKS;
6493 xmlParseMarkupDecl(ctxt);
6494 xmlParsePEReference(ctxt);
6495
6496 /*
6497 * Pop-up of finished entities.
6498 */
6499 while ((RAW == 0) && (ctxt->inputNr > 1))
6500 xmlPopInput(ctxt);
6501
6502 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6503 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6505 ctxt->sax->error(ctxt->userData,
6506 "xmlParseInternalSubset: error detected in Markup declaration\n");
6507 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006508 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006509 break;
6510 }
6511 }
6512 if (RAW == ']') {
6513 NEXT;
6514 SKIP_BLANKS;
6515 }
6516 }
6517
6518 /*
6519 * We should be at the end of the DOCTYPE declaration.
6520 */
6521 if (RAW != '>') {
6522 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006524 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006525 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006526 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006527 }
6528 NEXT;
6529}
6530
6531/**
6532 * xmlParseAttribute:
6533 * @ctxt: an XML parser context
6534 * @value: a xmlChar ** used to store the value of the attribute
6535 *
6536 * parse an attribute
6537 *
6538 * [41] Attribute ::= Name Eq AttValue
6539 *
6540 * [ WFC: No External Entity References ]
6541 * Attribute values cannot contain direct or indirect entity references
6542 * to external entities.
6543 *
6544 * [ WFC: No < in Attribute Values ]
6545 * The replacement text of any entity referred to directly or indirectly in
6546 * an attribute value (other than "&lt;") must not contain a <.
6547 *
6548 * [ VC: Attribute Value Type ]
6549 * The attribute must have been declared; the value must be of the type
6550 * declared for it.
6551 *
6552 * [25] Eq ::= S? '=' S?
6553 *
6554 * With namespace:
6555 *
6556 * [NS 11] Attribute ::= QName Eq AttValue
6557 *
6558 * Also the case QName == xmlns:??? is handled independently as a namespace
6559 * definition.
6560 *
6561 * Returns the attribute name, and the value in *value.
6562 */
6563
6564xmlChar *
6565xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6566 xmlChar *name, *val;
6567
6568 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006569 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006570 name = xmlParseName(ctxt);
6571 if (name == NULL) {
6572 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6574 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6575 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006576 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006577 return(NULL);
6578 }
6579
6580 /*
6581 * read the value
6582 */
6583 SKIP_BLANKS;
6584 if (RAW == '=') {
6585 NEXT;
6586 SKIP_BLANKS;
6587 val = xmlParseAttValue(ctxt);
6588 ctxt->instate = XML_PARSER_CONTENT;
6589 } else {
6590 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6592 ctxt->sax->error(ctxt->userData,
6593 "Specification mandate value for attribute %s\n", name);
6594 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006595 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006596 xmlFree(name);
6597 return(NULL);
6598 }
6599
6600 /*
6601 * Check that xml:lang conforms to the specification
6602 * No more registered as an error, just generate a warning now
6603 * since this was deprecated in XML second edition
6604 */
6605 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6606 if (!xmlCheckLanguageID(val)) {
6607 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6608 ctxt->sax->warning(ctxt->userData,
6609 "Malformed value for xml:lang : %s\n", val);
6610 }
6611 }
6612
6613 /*
6614 * Check that xml:space conforms to the specification
6615 */
6616 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6617 if (xmlStrEqual(val, BAD_CAST "default"))
6618 *(ctxt->space) = 0;
6619 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6620 *(ctxt->space) = 1;
6621 else {
6622 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6624 ctxt->sax->error(ctxt->userData,
Daniel Veillard642104e2003-03-26 16:32:05 +00006625"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006626 val);
6627 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006628 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006629 }
6630 }
6631
6632 *value = val;
6633 return(name);
6634}
6635
6636/**
6637 * xmlParseStartTag:
6638 * @ctxt: an XML parser context
6639 *
6640 * parse a start of tag either for rule element or
6641 * EmptyElement. In both case we don't parse the tag closing chars.
6642 *
6643 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6644 *
6645 * [ WFC: Unique Att Spec ]
6646 * No attribute name may appear more than once in the same start-tag or
6647 * empty-element tag.
6648 *
6649 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6650 *
6651 * [ WFC: Unique Att Spec ]
6652 * No attribute name may appear more than once in the same start-tag or
6653 * empty-element tag.
6654 *
6655 * With namespace:
6656 *
6657 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6658 *
6659 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6660 *
6661 * Returns the element name parsed
6662 */
6663
6664xmlChar *
6665xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6666 xmlChar *name;
6667 xmlChar *attname;
6668 xmlChar *attvalue;
6669 const xmlChar **atts = NULL;
6670 int nbatts = 0;
6671 int maxatts = 0;
6672 int i;
6673
6674 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006675 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006676
6677 name = xmlParseName(ctxt);
6678 if (name == NULL) {
6679 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6681 ctxt->sax->error(ctxt->userData,
6682 "xmlParseStartTag: invalid element name\n");
6683 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006684 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006685 return(NULL);
6686 }
6687
6688 /*
6689 * Now parse the attributes, it ends up with the ending
6690 *
6691 * (S Attribute)* S?
6692 */
6693 SKIP_BLANKS;
6694 GROW;
6695
Daniel Veillard21a0f912001-02-25 19:54:14 +00006696 while ((RAW != '>') &&
6697 ((RAW != '/') || (NXT(1) != '>')) &&
6698 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006699 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006700 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006701
6702 attname = xmlParseAttribute(ctxt, &attvalue);
6703 if ((attname != NULL) && (attvalue != NULL)) {
6704 /*
6705 * [ WFC: Unique Att Spec ]
6706 * No attribute name may appear more than once in the same
6707 * start-tag or empty-element tag.
6708 */
6709 for (i = 0; i < nbatts;i += 2) {
6710 if (xmlStrEqual(atts[i], attname)) {
6711 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6713 ctxt->sax->error(ctxt->userData,
6714 "Attribute %s redefined\n",
6715 attname);
6716 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006717 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006718 xmlFree(attname);
6719 xmlFree(attvalue);
6720 goto failed;
6721 }
6722 }
6723
6724 /*
6725 * Add the pair to atts
6726 */
6727 if (atts == NULL) {
6728 maxatts = 10;
6729 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6730 if (atts == NULL) {
6731 xmlGenericError(xmlGenericErrorContext,
6732 "malloc of %ld byte failed\n",
6733 maxatts * (long)sizeof(xmlChar *));
6734 return(NULL);
6735 }
6736 } else if (nbatts + 4 > maxatts) {
6737 maxatts *= 2;
6738 atts = (const xmlChar **) xmlRealloc((void *) atts,
6739 maxatts * sizeof(xmlChar *));
6740 if (atts == NULL) {
6741 xmlGenericError(xmlGenericErrorContext,
6742 "realloc of %ld byte failed\n",
6743 maxatts * (long)sizeof(xmlChar *));
6744 return(NULL);
6745 }
6746 }
6747 atts[nbatts++] = attname;
6748 atts[nbatts++] = attvalue;
6749 atts[nbatts] = NULL;
6750 atts[nbatts + 1] = NULL;
6751 } else {
6752 if (attname != NULL)
6753 xmlFree(attname);
6754 if (attvalue != NULL)
6755 xmlFree(attvalue);
6756 }
6757
6758failed:
6759
Daniel Veillard3772de32002-12-17 10:31:45 +00006760 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006761 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6762 break;
6763 if (!IS_BLANK(RAW)) {
6764 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6766 ctxt->sax->error(ctxt->userData,
6767 "attributes construct error\n");
6768 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006769 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006770 }
6771 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006772 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6773 (attname == NULL) && (attvalue == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006774 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6776 ctxt->sax->error(ctxt->userData,
6777 "xmlParseStartTag: problem parsing attributes\n");
6778 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006779 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006780 break;
6781 }
6782 GROW;
6783 }
6784
6785 /*
6786 * SAX: Start of Element !
6787 */
6788 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6789 (!ctxt->disableSAX))
6790 ctxt->sax->startElement(ctxt->userData, name, atts);
6791
6792 if (atts != NULL) {
6793 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6794 xmlFree((void *) atts);
6795 }
6796 return(name);
6797}
6798
6799/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006800 * xmlParseEndTagInternal:
Owen Taylor3473f882001-02-23 17:55:21 +00006801 * @ctxt: an XML parser context
6802 *
6803 * parse an end of tag
6804 *
6805 * [42] ETag ::= '</' Name S? '>'
6806 *
6807 * With namespace
6808 *
6809 * [NS 9] ETag ::= '</' QName S? '>'
6810 */
6811
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006812static void
6813xmlParseEndTagInternal(xmlParserCtxtPtr ctxt, int line) {
Owen Taylor3473f882001-02-23 17:55:21 +00006814 xmlChar *name;
6815 xmlChar *oldname;
6816
6817 GROW;
6818 if ((RAW != '<') || (NXT(1) != '/')) {
6819 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6821 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6822 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006823 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006824 return;
6825 }
6826 SKIP(2);
6827
Daniel Veillard46de64e2002-05-29 08:21:33 +00006828 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006829
6830 /*
6831 * We should definitely be at the ending "S? '>'" part
6832 */
6833 GROW;
6834 SKIP_BLANKS;
6835 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6836 ctxt->errNo = XML_ERR_GT_REQUIRED;
6837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6838 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6839 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006840 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006841 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006842 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006843
6844 /*
6845 * [ WFC: Element Type Match ]
6846 * The Name in an element's end-tag must match the element type in the
6847 * start-tag.
6848 *
6849 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006850 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006851 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6852 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006853 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006854 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006855 "Opening and ending tag mismatch: %s line %d and %s\n",
6856 ctxt->name, line, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006857 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006858 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006859 "Ending tag error for: %s line %d\n", ctxt->name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00006860 }
6861
6862 }
6863 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006864 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006865 if (name != NULL)
6866 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006867 }
6868
6869 /*
6870 * SAX: End of Tag
6871 */
6872 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6873 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006874 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006875
Owen Taylor3473f882001-02-23 17:55:21 +00006876 oldname = namePop(ctxt);
6877 spacePop(ctxt);
6878 if (oldname != NULL) {
6879#ifdef DEBUG_STACK
6880 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6881#endif
6882 xmlFree(oldname);
6883 }
6884 return;
6885}
6886
6887/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006888 * xmlParseEndTag:
6889 * @ctxt: an XML parser context
6890 *
6891 * parse an end of tag
6892 *
6893 * [42] ETag ::= '</' Name S? '>'
6894 *
6895 * With namespace
6896 *
6897 * [NS 9] ETag ::= '</' QName S? '>'
6898 */
6899
6900void
6901xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6902 xmlParseEndTagInternal(ctxt, 0);
6903}
6904
6905/**
Owen Taylor3473f882001-02-23 17:55:21 +00006906 * xmlParseCDSect:
6907 * @ctxt: an XML parser context
6908 *
6909 * Parse escaped pure raw content.
6910 *
6911 * [18] CDSect ::= CDStart CData CDEnd
6912 *
6913 * [19] CDStart ::= '<![CDATA['
6914 *
6915 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6916 *
6917 * [21] CDEnd ::= ']]>'
6918 */
6919void
6920xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6921 xmlChar *buf = NULL;
6922 int len = 0;
6923 int size = XML_PARSER_BUFFER_SIZE;
6924 int r, rl;
6925 int s, sl;
6926 int cur, l;
6927 int count = 0;
6928
6929 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6930 (NXT(2) == '[') && (NXT(3) == 'C') &&
6931 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6932 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6933 (NXT(8) == '[')) {
6934 SKIP(9);
6935 } else
6936 return;
6937
6938 ctxt->instate = XML_PARSER_CDATA_SECTION;
6939 r = CUR_CHAR(rl);
6940 if (!IS_CHAR(r)) {
6941 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6943 ctxt->sax->error(ctxt->userData,
6944 "CData section not finished\n");
6945 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006946 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006947 ctxt->instate = XML_PARSER_CONTENT;
6948 return;
6949 }
6950 NEXTL(rl);
6951 s = CUR_CHAR(sl);
6952 if (!IS_CHAR(s)) {
6953 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6955 ctxt->sax->error(ctxt->userData,
6956 "CData section not finished\n");
6957 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006958 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006959 ctxt->instate = XML_PARSER_CONTENT;
6960 return;
6961 }
6962 NEXTL(sl);
6963 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00006964 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00006965 if (buf == NULL) {
6966 xmlGenericError(xmlGenericErrorContext,
6967 "malloc of %d byte failed\n", size);
6968 return;
6969 }
6970 while (IS_CHAR(cur) &&
6971 ((r != ']') || (s != ']') || (cur != '>'))) {
6972 if (len + 5 >= size) {
6973 size *= 2;
6974 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6975 if (buf == NULL) {
6976 xmlGenericError(xmlGenericErrorContext,
6977 "realloc of %d byte failed\n", size);
6978 return;
6979 }
6980 }
6981 COPY_BUF(rl,buf,len,r);
6982 r = s;
6983 rl = sl;
6984 s = cur;
6985 sl = l;
6986 count++;
6987 if (count > 50) {
6988 GROW;
6989 count = 0;
6990 }
6991 NEXTL(l);
6992 cur = CUR_CHAR(l);
6993 }
6994 buf[len] = 0;
6995 ctxt->instate = XML_PARSER_CONTENT;
6996 if (cur != '>') {
6997 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6999 ctxt->sax->error(ctxt->userData,
7000 "CData section not finished\n%.50s\n", buf);
7001 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007002 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007003 xmlFree(buf);
7004 return;
7005 }
7006 NEXTL(l);
7007
7008 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007009 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007010 */
7011 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7012 if (ctxt->sax->cdataBlock != NULL)
7013 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007014 else if (ctxt->sax->characters != NULL)
7015 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007016 }
7017 xmlFree(buf);
7018}
7019
7020/**
7021 * xmlParseContent:
7022 * @ctxt: an XML parser context
7023 *
7024 * Parse a content:
7025 *
7026 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7027 */
7028
7029void
7030xmlParseContent(xmlParserCtxtPtr ctxt) {
7031 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007032 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007033 ((RAW != '<') || (NXT(1) != '/'))) {
7034 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007035 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007036 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007037
7038 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007039 * First case : a Processing Instruction.
7040 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007041 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007042 xmlParsePI(ctxt);
7043 }
7044
7045 /*
7046 * Second case : a CDSection
7047 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007048 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007049 (NXT(2) == '[') && (NXT(3) == 'C') &&
7050 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7051 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7052 (NXT(8) == '[')) {
7053 xmlParseCDSect(ctxt);
7054 }
7055
7056 /*
7057 * Third case : a comment
7058 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007059 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007060 (NXT(2) == '-') && (NXT(3) == '-')) {
7061 xmlParseComment(ctxt);
7062 ctxt->instate = XML_PARSER_CONTENT;
7063 }
7064
7065 /*
7066 * Fourth case : a sub-element.
7067 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007068 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007069 xmlParseElement(ctxt);
7070 }
7071
7072 /*
7073 * Fifth case : a reference. If if has not been resolved,
7074 * parsing returns it's Name, create the node
7075 */
7076
Daniel Veillard21a0f912001-02-25 19:54:14 +00007077 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007078 xmlParseReference(ctxt);
7079 }
7080
7081 /*
7082 * Last case, text. Note that References are handled directly.
7083 */
7084 else {
7085 xmlParseCharData(ctxt, 0);
7086 }
7087
7088 GROW;
7089 /*
7090 * Pop-up of finished entities.
7091 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007092 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007093 xmlPopInput(ctxt);
7094 SHRINK;
7095
Daniel Veillardfdc91562002-07-01 21:52:03 +00007096 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007097 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7099 ctxt->sax->error(ctxt->userData,
7100 "detected an error in element content\n");
7101 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007102 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007103 ctxt->instate = XML_PARSER_EOF;
7104 break;
7105 }
7106 }
7107}
7108
7109/**
7110 * xmlParseElement:
7111 * @ctxt: an XML parser context
7112 *
7113 * parse an XML element, this is highly recursive
7114 *
7115 * [39] element ::= EmptyElemTag | STag content ETag
7116 *
7117 * [ WFC: Element Type Match ]
7118 * The Name in an element's end-tag must match the element type in the
7119 * start-tag.
7120 *
7121 * [ VC: Element Valid ]
7122 * An element is valid if there is a declaration matching elementdecl
7123 * where the Name matches the element type and one of the following holds:
7124 * - The declaration matches EMPTY and the element has no content.
7125 * - The declaration matches children and the sequence of child elements
7126 * belongs to the language generated by the regular expression in the
7127 * content model, with optional white space (characters matching the
7128 * nonterminal S) between each pair of child elements.
7129 * - The declaration matches Mixed and the content consists of character
7130 * data and child elements whose types match names in the content model.
7131 * - The declaration matches ANY, and the types of any child elements have
7132 * been declared.
7133 */
7134
7135void
7136xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007137 xmlChar *name;
7138 xmlChar *oldname;
7139 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007140 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00007141 xmlNodePtr ret;
7142
7143 /* Capture start position */
7144 if (ctxt->record_info) {
7145 node_info.begin_pos = ctxt->input->consumed +
7146 (CUR_PTR - ctxt->input->base);
7147 node_info.begin_line = ctxt->input->line;
7148 }
7149
7150 if (ctxt->spaceNr == 0)
7151 spacePush(ctxt, -1);
7152 else
7153 spacePush(ctxt, *ctxt->space);
7154
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007155 line = ctxt->input->line;
Owen Taylor3473f882001-02-23 17:55:21 +00007156 name = xmlParseStartTag(ctxt);
7157 if (name == NULL) {
7158 spacePop(ctxt);
7159 return;
7160 }
7161 namePush(ctxt, name);
7162 ret = ctxt->node;
7163
7164 /*
7165 * [ VC: Root Element Type ]
7166 * The Name in the document type declaration must match the element
7167 * type of the root element.
7168 */
7169 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7170 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7171 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7172
7173 /*
7174 * Check for an Empty Element.
7175 */
7176 if ((RAW == '/') && (NXT(1) == '>')) {
7177 SKIP(2);
7178 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7179 (!ctxt->disableSAX))
7180 ctxt->sax->endElement(ctxt->userData, name);
7181 oldname = namePop(ctxt);
7182 spacePop(ctxt);
7183 if (oldname != NULL) {
7184#ifdef DEBUG_STACK
7185 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7186#endif
7187 xmlFree(oldname);
7188 }
7189 if ( ret != NULL && ctxt->record_info ) {
7190 node_info.end_pos = ctxt->input->consumed +
7191 (CUR_PTR - ctxt->input->base);
7192 node_info.end_line = ctxt->input->line;
7193 node_info.node = ret;
7194 xmlParserAddNodeInfo(ctxt, &node_info);
7195 }
7196 return;
7197 }
7198 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007199 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007200 } else {
7201 ctxt->errNo = XML_ERR_GT_REQUIRED;
7202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7203 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007204 "Couldn't find end of Start Tag %s line %d\n",
7205 name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007206 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007207 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007208
7209 /*
7210 * end of parsing of this node.
7211 */
7212 nodePop(ctxt);
7213 oldname = namePop(ctxt);
7214 spacePop(ctxt);
7215 if (oldname != NULL) {
7216#ifdef DEBUG_STACK
7217 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7218#endif
7219 xmlFree(oldname);
7220 }
7221
7222 /*
7223 * Capture end position and add node
7224 */
7225 if ( ret != NULL && ctxt->record_info ) {
7226 node_info.end_pos = ctxt->input->consumed +
7227 (CUR_PTR - ctxt->input->base);
7228 node_info.end_line = ctxt->input->line;
7229 node_info.node = ret;
7230 xmlParserAddNodeInfo(ctxt, &node_info);
7231 }
7232 return;
7233 }
7234
7235 /*
7236 * Parse the content of the element:
7237 */
7238 xmlParseContent(ctxt);
7239 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007240 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7242 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007243 "Premature end of data in tag %s line %d\n", name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007244 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007245 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007246
7247 /*
7248 * end of parsing of this node.
7249 */
7250 nodePop(ctxt);
7251 oldname = namePop(ctxt);
7252 spacePop(ctxt);
7253 if (oldname != NULL) {
7254#ifdef DEBUG_STACK
7255 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7256#endif
7257 xmlFree(oldname);
7258 }
7259 return;
7260 }
7261
7262 /*
7263 * parse the end of tag: '</' should be here.
7264 */
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007265 xmlParseEndTagInternal(ctxt, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007266
7267 /*
7268 * Capture end position and add node
7269 */
7270 if ( ret != NULL && ctxt->record_info ) {
7271 node_info.end_pos = ctxt->input->consumed +
7272 (CUR_PTR - ctxt->input->base);
7273 node_info.end_line = ctxt->input->line;
7274 node_info.node = ret;
7275 xmlParserAddNodeInfo(ctxt, &node_info);
7276 }
7277}
7278
7279/**
7280 * xmlParseVersionNum:
7281 * @ctxt: an XML parser context
7282 *
7283 * parse the XML version value.
7284 *
7285 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7286 *
7287 * Returns the string giving the XML version number, or NULL
7288 */
7289xmlChar *
7290xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7291 xmlChar *buf = NULL;
7292 int len = 0;
7293 int size = 10;
7294 xmlChar cur;
7295
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007296 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007297 if (buf == NULL) {
7298 xmlGenericError(xmlGenericErrorContext,
7299 "malloc of %d byte failed\n", size);
7300 return(NULL);
7301 }
7302 cur = CUR;
7303 while (((cur >= 'a') && (cur <= 'z')) ||
7304 ((cur >= 'A') && (cur <= 'Z')) ||
7305 ((cur >= '0') && (cur <= '9')) ||
7306 (cur == '_') || (cur == '.') ||
7307 (cur == ':') || (cur == '-')) {
7308 if (len + 1 >= size) {
7309 size *= 2;
7310 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7311 if (buf == NULL) {
7312 xmlGenericError(xmlGenericErrorContext,
7313 "realloc of %d byte failed\n", size);
7314 return(NULL);
7315 }
7316 }
7317 buf[len++] = cur;
7318 NEXT;
7319 cur=CUR;
7320 }
7321 buf[len] = 0;
7322 return(buf);
7323}
7324
7325/**
7326 * xmlParseVersionInfo:
7327 * @ctxt: an XML parser context
7328 *
7329 * parse the XML version.
7330 *
7331 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7332 *
7333 * [25] Eq ::= S? '=' S?
7334 *
7335 * Returns the version string, e.g. "1.0"
7336 */
7337
7338xmlChar *
7339xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7340 xmlChar *version = NULL;
7341 const xmlChar *q;
7342
7343 if ((RAW == 'v') && (NXT(1) == 'e') &&
7344 (NXT(2) == 'r') && (NXT(3) == 's') &&
7345 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7346 (NXT(6) == 'n')) {
7347 SKIP(7);
7348 SKIP_BLANKS;
7349 if (RAW != '=') {
7350 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7352 ctxt->sax->error(ctxt->userData,
7353 "xmlParseVersionInfo : expected '='\n");
7354 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007355 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007356 return(NULL);
7357 }
7358 NEXT;
7359 SKIP_BLANKS;
7360 if (RAW == '"') {
7361 NEXT;
7362 q = CUR_PTR;
7363 version = xmlParseVersionNum(ctxt);
7364 if (RAW != '"') {
7365 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7367 ctxt->sax->error(ctxt->userData,
7368 "String not closed\n%.50s\n", q);
7369 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007370 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007371 } else
7372 NEXT;
7373 } else if (RAW == '\''){
7374 NEXT;
7375 q = CUR_PTR;
7376 version = xmlParseVersionNum(ctxt);
7377 if (RAW != '\'') {
7378 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7379 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7380 ctxt->sax->error(ctxt->userData,
7381 "String not closed\n%.50s\n", q);
7382 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007383 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007384 } else
7385 NEXT;
7386 } else {
7387 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7389 ctxt->sax->error(ctxt->userData,
7390 "xmlParseVersionInfo : expected ' or \"\n");
7391 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007392 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007393 }
7394 }
7395 return(version);
7396}
7397
7398/**
7399 * xmlParseEncName:
7400 * @ctxt: an XML parser context
7401 *
7402 * parse the XML encoding name
7403 *
7404 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7405 *
7406 * Returns the encoding name value or NULL
7407 */
7408xmlChar *
7409xmlParseEncName(xmlParserCtxtPtr ctxt) {
7410 xmlChar *buf = NULL;
7411 int len = 0;
7412 int size = 10;
7413 xmlChar cur;
7414
7415 cur = CUR;
7416 if (((cur >= 'a') && (cur <= 'z')) ||
7417 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007418 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007419 if (buf == NULL) {
7420 xmlGenericError(xmlGenericErrorContext,
7421 "malloc of %d byte failed\n", size);
7422 return(NULL);
7423 }
7424
7425 buf[len++] = cur;
7426 NEXT;
7427 cur = CUR;
7428 while (((cur >= 'a') && (cur <= 'z')) ||
7429 ((cur >= 'A') && (cur <= 'Z')) ||
7430 ((cur >= '0') && (cur <= '9')) ||
7431 (cur == '.') || (cur == '_') ||
7432 (cur == '-')) {
7433 if (len + 1 >= size) {
7434 size *= 2;
7435 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7436 if (buf == NULL) {
7437 xmlGenericError(xmlGenericErrorContext,
7438 "realloc of %d byte failed\n", size);
7439 return(NULL);
7440 }
7441 }
7442 buf[len++] = cur;
7443 NEXT;
7444 cur = CUR;
7445 if (cur == 0) {
7446 SHRINK;
7447 GROW;
7448 cur = CUR;
7449 }
7450 }
7451 buf[len] = 0;
7452 } else {
7453 ctxt->errNo = XML_ERR_ENCODING_NAME;
7454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7455 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7456 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007457 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007458 }
7459 return(buf);
7460}
7461
7462/**
7463 * xmlParseEncodingDecl:
7464 * @ctxt: an XML parser context
7465 *
7466 * parse the XML encoding declaration
7467 *
7468 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7469 *
7470 * this setups the conversion filters.
7471 *
7472 * Returns the encoding value or NULL
7473 */
7474
7475xmlChar *
7476xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7477 xmlChar *encoding = NULL;
7478 const xmlChar *q;
7479
7480 SKIP_BLANKS;
7481 if ((RAW == 'e') && (NXT(1) == 'n') &&
7482 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7483 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7484 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7485 SKIP(8);
7486 SKIP_BLANKS;
7487 if (RAW != '=') {
7488 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7490 ctxt->sax->error(ctxt->userData,
7491 "xmlParseEncodingDecl : expected '='\n");
7492 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007493 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007494 return(NULL);
7495 }
7496 NEXT;
7497 SKIP_BLANKS;
7498 if (RAW == '"') {
7499 NEXT;
7500 q = CUR_PTR;
7501 encoding = xmlParseEncName(ctxt);
7502 if (RAW != '"') {
7503 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7505 ctxt->sax->error(ctxt->userData,
7506 "String not closed\n%.50s\n", q);
7507 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007508 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007509 } else
7510 NEXT;
7511 } else if (RAW == '\''){
7512 NEXT;
7513 q = CUR_PTR;
7514 encoding = xmlParseEncName(ctxt);
7515 if (RAW != '\'') {
7516 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7518 ctxt->sax->error(ctxt->userData,
7519 "String not closed\n%.50s\n", q);
7520 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007521 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007522 } else
7523 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007524 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007525 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7526 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7527 ctxt->sax->error(ctxt->userData,
7528 "xmlParseEncodingDecl : expected ' or \"\n");
7529 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007530 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007531 }
7532 if (encoding != NULL) {
7533 xmlCharEncoding enc;
7534 xmlCharEncodingHandlerPtr handler;
7535
7536 if (ctxt->input->encoding != NULL)
7537 xmlFree((xmlChar *) ctxt->input->encoding);
7538 ctxt->input->encoding = encoding;
7539
7540 enc = xmlParseCharEncoding((const char *) encoding);
7541 /*
7542 * registered set of known encodings
7543 */
7544 if (enc != XML_CHAR_ENCODING_ERROR) {
7545 xmlSwitchEncoding(ctxt, enc);
7546 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007547 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007548 xmlFree(encoding);
7549 return(NULL);
7550 }
7551 } else {
7552 /*
7553 * fallback for unknown encodings
7554 */
7555 handler = xmlFindCharEncodingHandler((const char *) encoding);
7556 if (handler != NULL) {
7557 xmlSwitchToEncoding(ctxt, handler);
7558 } else {
7559 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7561 ctxt->sax->error(ctxt->userData,
7562 "Unsupported encoding %s\n", encoding);
7563 return(NULL);
7564 }
7565 }
7566 }
7567 }
7568 return(encoding);
7569}
7570
7571/**
7572 * xmlParseSDDecl:
7573 * @ctxt: an XML parser context
7574 *
7575 * parse the XML standalone declaration
7576 *
7577 * [32] SDDecl ::= S 'standalone' Eq
7578 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7579 *
7580 * [ VC: Standalone Document Declaration ]
7581 * TODO The standalone document declaration must have the value "no"
7582 * if any external markup declarations contain declarations of:
7583 * - attributes with default values, if elements to which these
7584 * attributes apply appear in the document without specifications
7585 * of values for these attributes, or
7586 * - entities (other than amp, lt, gt, apos, quot), if references
7587 * to those entities appear in the document, or
7588 * - attributes with values subject to normalization, where the
7589 * attribute appears in the document with a value which will change
7590 * as a result of normalization, or
7591 * - element types with element content, if white space occurs directly
7592 * within any instance of those types.
7593 *
7594 * Returns 1 if standalone, 0 otherwise
7595 */
7596
7597int
7598xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7599 int standalone = -1;
7600
7601 SKIP_BLANKS;
7602 if ((RAW == 's') && (NXT(1) == 't') &&
7603 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7604 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7605 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7606 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7607 SKIP(10);
7608 SKIP_BLANKS;
7609 if (RAW != '=') {
7610 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7612 ctxt->sax->error(ctxt->userData,
7613 "XML standalone declaration : expected '='\n");
7614 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007615 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007616 return(standalone);
7617 }
7618 NEXT;
7619 SKIP_BLANKS;
7620 if (RAW == '\''){
7621 NEXT;
7622 if ((RAW == 'n') && (NXT(1) == 'o')) {
7623 standalone = 0;
7624 SKIP(2);
7625 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7626 (NXT(2) == 's')) {
7627 standalone = 1;
7628 SKIP(3);
7629 } else {
7630 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7632 ctxt->sax->error(ctxt->userData,
7633 "standalone accepts only 'yes' or 'no'\n");
7634 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007635 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007636 }
7637 if (RAW != '\'') {
7638 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7640 ctxt->sax->error(ctxt->userData, "String not closed\n");
7641 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007642 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007643 } else
7644 NEXT;
7645 } else if (RAW == '"'){
7646 NEXT;
7647 if ((RAW == 'n') && (NXT(1) == 'o')) {
7648 standalone = 0;
7649 SKIP(2);
7650 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7651 (NXT(2) == 's')) {
7652 standalone = 1;
7653 SKIP(3);
7654 } else {
7655 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7657 ctxt->sax->error(ctxt->userData,
7658 "standalone accepts only 'yes' or 'no'\n");
7659 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007660 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007661 }
7662 if (RAW != '"') {
7663 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7665 ctxt->sax->error(ctxt->userData, "String not closed\n");
7666 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007667 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007668 } else
7669 NEXT;
7670 } else {
7671 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7673 ctxt->sax->error(ctxt->userData,
7674 "Standalone value not found\n");
7675 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007676 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007677 }
7678 }
7679 return(standalone);
7680}
7681
7682/**
7683 * xmlParseXMLDecl:
7684 * @ctxt: an XML parser context
7685 *
7686 * parse an XML declaration header
7687 *
7688 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7689 */
7690
7691void
7692xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7693 xmlChar *version;
7694
7695 /*
7696 * We know that '<?xml' is here.
7697 */
7698 SKIP(5);
7699
7700 if (!IS_BLANK(RAW)) {
7701 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7703 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7704 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007705 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007706 }
7707 SKIP_BLANKS;
7708
7709 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007710 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007711 */
7712 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007713 if (version == NULL) {
7714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7715 ctxt->sax->error(ctxt->userData,
7716 "Malformed declaration expecting version\n");
7717 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007718 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007719 } else {
7720 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7721 /*
7722 * TODO: Blueberry should be detected here
7723 */
7724 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7725 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7726 version);
7727 }
7728 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007729 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007730 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007731 }
Owen Taylor3473f882001-02-23 17:55:21 +00007732
7733 /*
7734 * We may have the encoding declaration
7735 */
7736 if (!IS_BLANK(RAW)) {
7737 if ((RAW == '?') && (NXT(1) == '>')) {
7738 SKIP(2);
7739 return;
7740 }
7741 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7743 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7744 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007745 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007746 }
7747 xmlParseEncodingDecl(ctxt);
7748 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7749 /*
7750 * The XML REC instructs us to stop parsing right here
7751 */
7752 return;
7753 }
7754
7755 /*
7756 * We may have the standalone status.
7757 */
7758 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7759 if ((RAW == '?') && (NXT(1) == '>')) {
7760 SKIP(2);
7761 return;
7762 }
7763 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7765 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7766 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007767 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007768 }
7769 SKIP_BLANKS;
7770 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7771
7772 SKIP_BLANKS;
7773 if ((RAW == '?') && (NXT(1) == '>')) {
7774 SKIP(2);
7775 } else if (RAW == '>') {
7776 /* Deprecated old WD ... */
7777 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7779 ctxt->sax->error(ctxt->userData,
7780 "XML declaration must end-up with '?>'\n");
7781 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007782 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007783 NEXT;
7784 } else {
7785 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7787 ctxt->sax->error(ctxt->userData,
7788 "parsing XML declaration: '?>' expected\n");
7789 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007790 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007791 MOVETO_ENDTAG(CUR_PTR);
7792 NEXT;
7793 }
7794}
7795
7796/**
7797 * xmlParseMisc:
7798 * @ctxt: an XML parser context
7799 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007800 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007801 *
7802 * [27] Misc ::= Comment | PI | S
7803 */
7804
7805void
7806xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007807 while (((RAW == '<') && (NXT(1) == '?')) ||
7808 ((RAW == '<') && (NXT(1) == '!') &&
7809 (NXT(2) == '-') && (NXT(3) == '-')) ||
7810 IS_BLANK(CUR)) {
7811 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007812 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007813 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007814 NEXT;
7815 } else
7816 xmlParseComment(ctxt);
7817 }
7818}
7819
7820/**
7821 * xmlParseDocument:
7822 * @ctxt: an XML parser context
7823 *
7824 * parse an XML document (and build a tree if using the standard SAX
7825 * interface).
7826 *
7827 * [1] document ::= prolog element Misc*
7828 *
7829 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7830 *
7831 * Returns 0, -1 in case of error. the parser context is augmented
7832 * as a result of the parsing.
7833 */
7834
7835int
7836xmlParseDocument(xmlParserCtxtPtr ctxt) {
7837 xmlChar start[4];
7838 xmlCharEncoding enc;
7839
7840 xmlInitParser();
7841
7842 GROW;
7843
7844 /*
7845 * SAX: beginning of the document processing.
7846 */
7847 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7848 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7849
Daniel Veillard50f34372001-08-03 12:06:36 +00007850 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007851 /*
7852 * Get the 4 first bytes and decode the charset
7853 * if enc != XML_CHAR_ENCODING_NONE
7854 * plug some encoding conversion routines.
7855 */
7856 start[0] = RAW;
7857 start[1] = NXT(1);
7858 start[2] = NXT(2);
7859 start[3] = NXT(3);
7860 enc = xmlDetectCharEncoding(start, 4);
7861 if (enc != XML_CHAR_ENCODING_NONE) {
7862 xmlSwitchEncoding(ctxt, enc);
7863 }
Owen Taylor3473f882001-02-23 17:55:21 +00007864 }
7865
7866
7867 if (CUR == 0) {
7868 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7870 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7871 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007872 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007873 }
7874
7875 /*
7876 * Check for the XMLDecl in the Prolog.
7877 */
7878 GROW;
7879 if ((RAW == '<') && (NXT(1) == '?') &&
7880 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7881 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7882
7883 /*
7884 * Note that we will switch encoding on the fly.
7885 */
7886 xmlParseXMLDecl(ctxt);
7887 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7888 /*
7889 * The XML REC instructs us to stop parsing right here
7890 */
7891 return(-1);
7892 }
7893 ctxt->standalone = ctxt->input->standalone;
7894 SKIP_BLANKS;
7895 } else {
7896 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7897 }
7898 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7899 ctxt->sax->startDocument(ctxt->userData);
7900
7901 /*
7902 * The Misc part of the Prolog
7903 */
7904 GROW;
7905 xmlParseMisc(ctxt);
7906
7907 /*
7908 * Then possibly doc type declaration(s) and more Misc
7909 * (doctypedecl Misc*)?
7910 */
7911 GROW;
7912 if ((RAW == '<') && (NXT(1) == '!') &&
7913 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7914 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7915 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7916 (NXT(8) == 'E')) {
7917
7918 ctxt->inSubset = 1;
7919 xmlParseDocTypeDecl(ctxt);
7920 if (RAW == '[') {
7921 ctxt->instate = XML_PARSER_DTD;
7922 xmlParseInternalSubset(ctxt);
7923 }
7924
7925 /*
7926 * Create and update the external subset.
7927 */
7928 ctxt->inSubset = 2;
7929 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7930 (!ctxt->disableSAX))
7931 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7932 ctxt->extSubSystem, ctxt->extSubURI);
7933 ctxt->inSubset = 0;
7934
7935
7936 ctxt->instate = XML_PARSER_PROLOG;
7937 xmlParseMisc(ctxt);
7938 }
7939
7940 /*
7941 * Time to start parsing the tree itself
7942 */
7943 GROW;
7944 if (RAW != '<') {
7945 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7947 ctxt->sax->error(ctxt->userData,
7948 "Start tag expected, '<' not found\n");
7949 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007950 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007951 ctxt->instate = XML_PARSER_EOF;
7952 } else {
7953 ctxt->instate = XML_PARSER_CONTENT;
7954 xmlParseElement(ctxt);
7955 ctxt->instate = XML_PARSER_EPILOG;
7956
7957
7958 /*
7959 * The Misc part at the end
7960 */
7961 xmlParseMisc(ctxt);
7962
Daniel Veillard561b7f82002-03-20 21:55:57 +00007963 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007964 ctxt->errNo = XML_ERR_DOCUMENT_END;
7965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7966 ctxt->sax->error(ctxt->userData,
7967 "Extra content at the end of the document\n");
7968 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007969 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007970 }
7971 ctxt->instate = XML_PARSER_EOF;
7972 }
7973
7974 /*
7975 * SAX: end of the document processing.
7976 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007977 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007978 ctxt->sax->endDocument(ctxt->userData);
7979
Daniel Veillard5997aca2002-03-18 18:36:20 +00007980 /*
7981 * Remove locally kept entity definitions if the tree was not built
7982 */
7983 if ((ctxt->myDoc != NULL) &&
7984 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7985 xmlFreeDoc(ctxt->myDoc);
7986 ctxt->myDoc = NULL;
7987 }
7988
Daniel Veillardc7612992002-02-17 22:47:37 +00007989 if (! ctxt->wellFormed) {
7990 ctxt->valid = 0;
7991 return(-1);
7992 }
Owen Taylor3473f882001-02-23 17:55:21 +00007993 return(0);
7994}
7995
7996/**
7997 * xmlParseExtParsedEnt:
7998 * @ctxt: an XML parser context
7999 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008000 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008001 * An external general parsed entity is well-formed if it matches the
8002 * production labeled extParsedEnt.
8003 *
8004 * [78] extParsedEnt ::= TextDecl? content
8005 *
8006 * Returns 0, -1 in case of error. the parser context is augmented
8007 * as a result of the parsing.
8008 */
8009
8010int
8011xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8012 xmlChar start[4];
8013 xmlCharEncoding enc;
8014
8015 xmlDefaultSAXHandlerInit();
8016
8017 GROW;
8018
8019 /*
8020 * SAX: beginning of the document processing.
8021 */
8022 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8023 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8024
8025 /*
8026 * Get the 4 first bytes and decode the charset
8027 * if enc != XML_CHAR_ENCODING_NONE
8028 * plug some encoding conversion routines.
8029 */
8030 start[0] = RAW;
8031 start[1] = NXT(1);
8032 start[2] = NXT(2);
8033 start[3] = NXT(3);
8034 enc = xmlDetectCharEncoding(start, 4);
8035 if (enc != XML_CHAR_ENCODING_NONE) {
8036 xmlSwitchEncoding(ctxt, enc);
8037 }
8038
8039
8040 if (CUR == 0) {
8041 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8042 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8043 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8044 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008045 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008046 }
8047
8048 /*
8049 * Check for the XMLDecl in the Prolog.
8050 */
8051 GROW;
8052 if ((RAW == '<') && (NXT(1) == '?') &&
8053 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8054 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8055
8056 /*
8057 * Note that we will switch encoding on the fly.
8058 */
8059 xmlParseXMLDecl(ctxt);
8060 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8061 /*
8062 * The XML REC instructs us to stop parsing right here
8063 */
8064 return(-1);
8065 }
8066 SKIP_BLANKS;
8067 } else {
8068 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8069 }
8070 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8071 ctxt->sax->startDocument(ctxt->userData);
8072
8073 /*
8074 * Doing validity checking on chunk doesn't make sense
8075 */
8076 ctxt->instate = XML_PARSER_CONTENT;
8077 ctxt->validate = 0;
8078 ctxt->loadsubset = 0;
8079 ctxt->depth = 0;
8080
8081 xmlParseContent(ctxt);
8082
8083 if ((RAW == '<') && (NXT(1) == '/')) {
8084 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8086 ctxt->sax->error(ctxt->userData,
8087 "chunk is not well balanced\n");
8088 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008089 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008090 } else if (RAW != 0) {
8091 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8093 ctxt->sax->error(ctxt->userData,
8094 "extra content at the end of well balanced chunk\n");
8095 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008096 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008097 }
8098
8099 /*
8100 * SAX: end of the document processing.
8101 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008102 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008103 ctxt->sax->endDocument(ctxt->userData);
8104
8105 if (! ctxt->wellFormed) return(-1);
8106 return(0);
8107}
8108
8109/************************************************************************
8110 * *
8111 * Progressive parsing interfaces *
8112 * *
8113 ************************************************************************/
8114
8115/**
8116 * xmlParseLookupSequence:
8117 * @ctxt: an XML parser context
8118 * @first: the first char to lookup
8119 * @next: the next char to lookup or zero
8120 * @third: the next char to lookup or zero
8121 *
8122 * Try to find if a sequence (first, next, third) or just (first next) or
8123 * (first) is available in the input stream.
8124 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8125 * to avoid rescanning sequences of bytes, it DOES change the state of the
8126 * parser, do not use liberally.
8127 *
8128 * Returns the index to the current parsing point if the full sequence
8129 * is available, -1 otherwise.
8130 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008131static int
Owen Taylor3473f882001-02-23 17:55:21 +00008132xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8133 xmlChar next, xmlChar third) {
8134 int base, len;
8135 xmlParserInputPtr in;
8136 const xmlChar *buf;
8137
8138 in = ctxt->input;
8139 if (in == NULL) return(-1);
8140 base = in->cur - in->base;
8141 if (base < 0) return(-1);
8142 if (ctxt->checkIndex > base)
8143 base = ctxt->checkIndex;
8144 if (in->buf == NULL) {
8145 buf = in->base;
8146 len = in->length;
8147 } else {
8148 buf = in->buf->buffer->content;
8149 len = in->buf->buffer->use;
8150 }
8151 /* take into account the sequence length */
8152 if (third) len -= 2;
8153 else if (next) len --;
8154 for (;base < len;base++) {
8155 if (buf[base] == first) {
8156 if (third != 0) {
8157 if ((buf[base + 1] != next) ||
8158 (buf[base + 2] != third)) continue;
8159 } else if (next != 0) {
8160 if (buf[base + 1] != next) continue;
8161 }
8162 ctxt->checkIndex = 0;
8163#ifdef DEBUG_PUSH
8164 if (next == 0)
8165 xmlGenericError(xmlGenericErrorContext,
8166 "PP: lookup '%c' found at %d\n",
8167 first, base);
8168 else if (third == 0)
8169 xmlGenericError(xmlGenericErrorContext,
8170 "PP: lookup '%c%c' found at %d\n",
8171 first, next, base);
8172 else
8173 xmlGenericError(xmlGenericErrorContext,
8174 "PP: lookup '%c%c%c' found at %d\n",
8175 first, next, third, base);
8176#endif
8177 return(base - (in->cur - in->base));
8178 }
8179 }
8180 ctxt->checkIndex = base;
8181#ifdef DEBUG_PUSH
8182 if (next == 0)
8183 xmlGenericError(xmlGenericErrorContext,
8184 "PP: lookup '%c' failed\n", first);
8185 else if (third == 0)
8186 xmlGenericError(xmlGenericErrorContext,
8187 "PP: lookup '%c%c' failed\n", first, next);
8188 else
8189 xmlGenericError(xmlGenericErrorContext,
8190 "PP: lookup '%c%c%c' failed\n", first, next, third);
8191#endif
8192 return(-1);
8193}
8194
8195/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008196 * xmlParseGetLasts:
8197 * @ctxt: an XML parser context
8198 * @lastlt: pointer to store the last '<' from the input
8199 * @lastgt: pointer to store the last '>' from the input
8200 *
8201 * Lookup the last < and > in the current chunk
8202 */
8203static void
8204xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8205 const xmlChar **lastgt) {
8206 const xmlChar *tmp;
8207
8208 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8209 xmlGenericError(xmlGenericErrorContext,
8210 "Internal error: xmlParseGetLasts\n");
8211 return;
8212 }
8213 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
8214 tmp = ctxt->input->end;
8215 tmp--;
8216 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8217 (*tmp != '>')) tmp--;
8218 if (tmp < ctxt->input->base) {
8219 *lastlt = NULL;
8220 *lastgt = NULL;
8221 } else if (*tmp == '<') {
8222 *lastlt = tmp;
8223 tmp--;
8224 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8225 if (tmp < ctxt->input->base)
8226 *lastgt = NULL;
8227 else
8228 *lastgt = tmp;
8229 } else {
8230 *lastgt = tmp;
8231 tmp--;
8232 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8233 if (tmp < ctxt->input->base)
8234 *lastlt = NULL;
8235 else
8236 *lastlt = tmp;
8237 }
8238
8239 } else {
8240 *lastlt = NULL;
8241 *lastgt = NULL;
8242 }
8243}
8244/**
Owen Taylor3473f882001-02-23 17:55:21 +00008245 * xmlParseTryOrFinish:
8246 * @ctxt: an XML parser context
8247 * @terminate: last chunk indicator
8248 *
8249 * Try to progress on parsing
8250 *
8251 * Returns zero if no parsing was possible
8252 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008253static int
Owen Taylor3473f882001-02-23 17:55:21 +00008254xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8255 int ret = 0;
8256 int avail;
8257 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008258 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008259
8260#ifdef DEBUG_PUSH
8261 switch (ctxt->instate) {
8262 case XML_PARSER_EOF:
8263 xmlGenericError(xmlGenericErrorContext,
8264 "PP: try EOF\n"); break;
8265 case XML_PARSER_START:
8266 xmlGenericError(xmlGenericErrorContext,
8267 "PP: try START\n"); break;
8268 case XML_PARSER_MISC:
8269 xmlGenericError(xmlGenericErrorContext,
8270 "PP: try MISC\n");break;
8271 case XML_PARSER_COMMENT:
8272 xmlGenericError(xmlGenericErrorContext,
8273 "PP: try COMMENT\n");break;
8274 case XML_PARSER_PROLOG:
8275 xmlGenericError(xmlGenericErrorContext,
8276 "PP: try PROLOG\n");break;
8277 case XML_PARSER_START_TAG:
8278 xmlGenericError(xmlGenericErrorContext,
8279 "PP: try START_TAG\n");break;
8280 case XML_PARSER_CONTENT:
8281 xmlGenericError(xmlGenericErrorContext,
8282 "PP: try CONTENT\n");break;
8283 case XML_PARSER_CDATA_SECTION:
8284 xmlGenericError(xmlGenericErrorContext,
8285 "PP: try CDATA_SECTION\n");break;
8286 case XML_PARSER_END_TAG:
8287 xmlGenericError(xmlGenericErrorContext,
8288 "PP: try END_TAG\n");break;
8289 case XML_PARSER_ENTITY_DECL:
8290 xmlGenericError(xmlGenericErrorContext,
8291 "PP: try ENTITY_DECL\n");break;
8292 case XML_PARSER_ENTITY_VALUE:
8293 xmlGenericError(xmlGenericErrorContext,
8294 "PP: try ENTITY_VALUE\n");break;
8295 case XML_PARSER_ATTRIBUTE_VALUE:
8296 xmlGenericError(xmlGenericErrorContext,
8297 "PP: try ATTRIBUTE_VALUE\n");break;
8298 case XML_PARSER_DTD:
8299 xmlGenericError(xmlGenericErrorContext,
8300 "PP: try DTD\n");break;
8301 case XML_PARSER_EPILOG:
8302 xmlGenericError(xmlGenericErrorContext,
8303 "PP: try EPILOG\n");break;
8304 case XML_PARSER_PI:
8305 xmlGenericError(xmlGenericErrorContext,
8306 "PP: try PI\n");break;
8307 case XML_PARSER_IGNORE:
8308 xmlGenericError(xmlGenericErrorContext,
8309 "PP: try IGNORE\n");break;
8310 }
8311#endif
8312
Daniel Veillarda880b122003-04-21 21:36:41 +00008313 if (ctxt->input->cur - ctxt->input->base > 4096) {
8314 xmlSHRINK(ctxt);
8315 ctxt->checkIndex = 0;
8316 }
8317 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008318
Daniel Veillarda880b122003-04-21 21:36:41 +00008319 while (1) {
Owen Taylor3473f882001-02-23 17:55:21 +00008320 /*
8321 * Pop-up of finished entities.
8322 */
8323 while ((RAW == 0) && (ctxt->inputNr > 1))
8324 xmlPopInput(ctxt);
8325
8326 if (ctxt->input ==NULL) break;
8327 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008328 avail = ctxt->input->length -
8329 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008330 else {
8331 /*
8332 * If we are operating on converted input, try to flush
8333 * remainng chars to avoid them stalling in the non-converted
8334 * buffer.
8335 */
8336 if ((ctxt->input->buf->raw != NULL) &&
8337 (ctxt->input->buf->raw->use > 0)) {
8338 int base = ctxt->input->base -
8339 ctxt->input->buf->buffer->content;
8340 int current = ctxt->input->cur - ctxt->input->base;
8341
8342 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8343 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8344 ctxt->input->cur = ctxt->input->base + current;
8345 ctxt->input->end =
8346 &ctxt->input->buf->buffer->content[
8347 ctxt->input->buf->buffer->use];
8348 }
8349 avail = ctxt->input->buf->buffer->use -
8350 (ctxt->input->cur - ctxt->input->base);
8351 }
Owen Taylor3473f882001-02-23 17:55:21 +00008352 if (avail < 1)
8353 goto done;
8354 switch (ctxt->instate) {
8355 case XML_PARSER_EOF:
8356 /*
8357 * Document parsing is done !
8358 */
8359 goto done;
8360 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008361 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8362 xmlChar start[4];
8363 xmlCharEncoding enc;
8364
8365 /*
8366 * Very first chars read from the document flow.
8367 */
8368 if (avail < 4)
8369 goto done;
8370
8371 /*
8372 * Get the 4 first bytes and decode the charset
8373 * if enc != XML_CHAR_ENCODING_NONE
8374 * plug some encoding conversion routines.
8375 */
8376 start[0] = RAW;
8377 start[1] = NXT(1);
8378 start[2] = NXT(2);
8379 start[3] = NXT(3);
8380 enc = xmlDetectCharEncoding(start, 4);
8381 if (enc != XML_CHAR_ENCODING_NONE) {
8382 xmlSwitchEncoding(ctxt, enc);
8383 }
8384 break;
8385 }
Owen Taylor3473f882001-02-23 17:55:21 +00008386
8387 cur = ctxt->input->cur[0];
8388 next = ctxt->input->cur[1];
8389 if (cur == 0) {
8390 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8391 ctxt->sax->setDocumentLocator(ctxt->userData,
8392 &xmlDefaultSAXLocator);
8393 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8394 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8395 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8396 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008397 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008398 ctxt->instate = XML_PARSER_EOF;
8399#ifdef DEBUG_PUSH
8400 xmlGenericError(xmlGenericErrorContext,
8401 "PP: entering EOF\n");
8402#endif
8403 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8404 ctxt->sax->endDocument(ctxt->userData);
8405 goto done;
8406 }
8407 if ((cur == '<') && (next == '?')) {
8408 /* PI or XML decl */
8409 if (avail < 5) return(ret);
8410 if ((!terminate) &&
8411 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8412 return(ret);
8413 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8414 ctxt->sax->setDocumentLocator(ctxt->userData,
8415 &xmlDefaultSAXLocator);
8416 if ((ctxt->input->cur[2] == 'x') &&
8417 (ctxt->input->cur[3] == 'm') &&
8418 (ctxt->input->cur[4] == 'l') &&
8419 (IS_BLANK(ctxt->input->cur[5]))) {
8420 ret += 5;
8421#ifdef DEBUG_PUSH
8422 xmlGenericError(xmlGenericErrorContext,
8423 "PP: Parsing XML Decl\n");
8424#endif
8425 xmlParseXMLDecl(ctxt);
8426 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8427 /*
8428 * The XML REC instructs us to stop parsing right
8429 * here
8430 */
8431 ctxt->instate = XML_PARSER_EOF;
8432 return(0);
8433 }
8434 ctxt->standalone = ctxt->input->standalone;
8435 if ((ctxt->encoding == NULL) &&
8436 (ctxt->input->encoding != NULL))
8437 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8438 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8439 (!ctxt->disableSAX))
8440 ctxt->sax->startDocument(ctxt->userData);
8441 ctxt->instate = XML_PARSER_MISC;
8442#ifdef DEBUG_PUSH
8443 xmlGenericError(xmlGenericErrorContext,
8444 "PP: entering MISC\n");
8445#endif
8446 } else {
8447 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8448 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8449 (!ctxt->disableSAX))
8450 ctxt->sax->startDocument(ctxt->userData);
8451 ctxt->instate = XML_PARSER_MISC;
8452#ifdef DEBUG_PUSH
8453 xmlGenericError(xmlGenericErrorContext,
8454 "PP: entering MISC\n");
8455#endif
8456 }
8457 } else {
8458 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8459 ctxt->sax->setDocumentLocator(ctxt->userData,
8460 &xmlDefaultSAXLocator);
8461 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8462 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8463 (!ctxt->disableSAX))
8464 ctxt->sax->startDocument(ctxt->userData);
8465 ctxt->instate = XML_PARSER_MISC;
8466#ifdef DEBUG_PUSH
8467 xmlGenericError(xmlGenericErrorContext,
8468 "PP: entering MISC\n");
8469#endif
8470 }
8471 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00008472 case XML_PARSER_START_TAG: {
8473 xmlChar *name, *oldname;
8474
8475 if ((avail < 2) && (ctxt->inputNr == 1))
8476 goto done;
8477 cur = ctxt->input->cur[0];
8478 if (cur != '<') {
8479 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8480 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8481 ctxt->sax->error(ctxt->userData,
8482 "Start tag expect, '<' not found\n");
8483 ctxt->wellFormed = 0;
8484 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8485 ctxt->instate = XML_PARSER_EOF;
8486#ifdef DEBUG_PUSH
8487 xmlGenericError(xmlGenericErrorContext,
8488 "PP: entering EOF\n");
8489#endif
8490 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8491 ctxt->sax->endDocument(ctxt->userData);
8492 goto done;
8493 }
8494 if (!terminate) {
8495 if (ctxt->progressive) {
8496 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8497 goto done;
8498 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8499 goto done;
8500 }
8501 }
8502 if (ctxt->spaceNr == 0)
8503 spacePush(ctxt, -1);
8504 else
8505 spacePush(ctxt, *ctxt->space);
8506 name = xmlParseStartTag(ctxt);
8507 if (name == NULL) {
8508 spacePop(ctxt);
8509 ctxt->instate = XML_PARSER_EOF;
8510#ifdef DEBUG_PUSH
8511 xmlGenericError(xmlGenericErrorContext,
8512 "PP: entering EOF\n");
8513#endif
8514 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8515 ctxt->sax->endDocument(ctxt->userData);
8516 goto done;
8517 }
8518 namePush(ctxt, name);
8519
8520 /*
8521 * [ VC: Root Element Type ]
8522 * The Name in the document type declaration must match
8523 * the element type of the root element.
8524 */
8525 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8526 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8527 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8528
8529 /*
8530 * Check for an Empty Element.
8531 */
8532 if ((RAW == '/') && (NXT(1) == '>')) {
8533 SKIP(2);
8534 if ((ctxt->sax != NULL) &&
8535 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8536 ctxt->sax->endElement(ctxt->userData, name);
8537 oldname = namePop(ctxt);
8538 spacePop(ctxt);
8539 if (oldname != NULL) {
8540#ifdef DEBUG_STACK
8541 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8542#endif
8543 xmlFree(oldname);
8544 }
8545 if (ctxt->name == NULL) {
8546 ctxt->instate = XML_PARSER_EPILOG;
8547#ifdef DEBUG_PUSH
8548 xmlGenericError(xmlGenericErrorContext,
8549 "PP: entering EPILOG\n");
8550#endif
8551 } else {
8552 ctxt->instate = XML_PARSER_CONTENT;
8553#ifdef DEBUG_PUSH
8554 xmlGenericError(xmlGenericErrorContext,
8555 "PP: entering CONTENT\n");
8556#endif
8557 }
8558 break;
8559 }
8560 if (RAW == '>') {
8561 NEXT;
8562 } else {
8563 ctxt->errNo = XML_ERR_GT_REQUIRED;
8564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8565 ctxt->sax->error(ctxt->userData,
8566 "Couldn't find end of Start Tag %s\n",
8567 name);
8568 ctxt->wellFormed = 0;
8569 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8570
8571 /*
8572 * end of parsing of this node.
8573 */
8574 nodePop(ctxt);
8575 oldname = namePop(ctxt);
8576 spacePop(ctxt);
8577 if (oldname != NULL) {
8578#ifdef DEBUG_STACK
8579 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8580#endif
8581 xmlFree(oldname);
8582 }
8583 }
8584 ctxt->instate = XML_PARSER_CONTENT;
8585#ifdef DEBUG_PUSH
8586 xmlGenericError(xmlGenericErrorContext,
8587 "PP: entering CONTENT\n");
8588#endif
8589 break;
8590 }
8591 case XML_PARSER_CONTENT: {
8592 const xmlChar *test;
8593 unsigned int cons;
8594 if ((avail < 2) && (ctxt->inputNr == 1))
8595 goto done;
8596 cur = ctxt->input->cur[0];
8597 next = ctxt->input->cur[1];
8598
8599 test = CUR_PTR;
8600 cons = ctxt->input->consumed;
8601 if ((cur == '<') && (next == '/')) {
8602 ctxt->instate = XML_PARSER_END_TAG;
8603#ifdef DEBUG_PUSH
8604 xmlGenericError(xmlGenericErrorContext,
8605 "PP: entering END_TAG\n");
8606#endif
8607 break;
8608 } else if ((cur == '<') && (next == '?')) {
8609 if ((!terminate) &&
8610 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8611 goto done;
8612#ifdef DEBUG_PUSH
8613 xmlGenericError(xmlGenericErrorContext,
8614 "PP: Parsing PI\n");
8615#endif
8616 xmlParsePI(ctxt);
8617 } else if ((cur == '<') && (next != '!')) {
8618 ctxt->instate = XML_PARSER_START_TAG;
8619#ifdef DEBUG_PUSH
8620 xmlGenericError(xmlGenericErrorContext,
8621 "PP: entering START_TAG\n");
8622#endif
8623 break;
8624 } else if ((cur == '<') && (next == '!') &&
8625 (ctxt->input->cur[2] == '-') &&
8626 (ctxt->input->cur[3] == '-')) {
8627 if ((!terminate) &&
8628 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8629 goto done;
8630#ifdef DEBUG_PUSH
8631 xmlGenericError(xmlGenericErrorContext,
8632 "PP: Parsing Comment\n");
8633#endif
8634 xmlParseComment(ctxt);
8635 ctxt->instate = XML_PARSER_CONTENT;
8636 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8637 (ctxt->input->cur[2] == '[') &&
8638 (ctxt->input->cur[3] == 'C') &&
8639 (ctxt->input->cur[4] == 'D') &&
8640 (ctxt->input->cur[5] == 'A') &&
8641 (ctxt->input->cur[6] == 'T') &&
8642 (ctxt->input->cur[7] == 'A') &&
8643 (ctxt->input->cur[8] == '[')) {
8644 SKIP(9);
8645 ctxt->instate = XML_PARSER_CDATA_SECTION;
8646#ifdef DEBUG_PUSH
8647 xmlGenericError(xmlGenericErrorContext,
8648 "PP: entering CDATA_SECTION\n");
8649#endif
8650 break;
8651 } else if ((cur == '<') && (next == '!') &&
8652 (avail < 9)) {
8653 goto done;
8654 } else if (cur == '&') {
8655 if ((!terminate) &&
8656 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8657 goto done;
8658#ifdef DEBUG_PUSH
8659 xmlGenericError(xmlGenericErrorContext,
8660 "PP: Parsing Reference\n");
8661#endif
8662 xmlParseReference(ctxt);
8663 } else {
8664 /* TODO Avoid the extra copy, handle directly !!! */
8665 /*
8666 * Goal of the following test is:
8667 * - minimize calls to the SAX 'character' callback
8668 * when they are mergeable
8669 * - handle an problem for isBlank when we only parse
8670 * a sequence of blank chars and the next one is
8671 * not available to check against '<' presence.
8672 * - tries to homogenize the differences in SAX
8673 * callbacks between the push and pull versions
8674 * of the parser.
8675 */
8676 if ((ctxt->inputNr == 1) &&
8677 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8678 if (!terminate) {
8679 if (ctxt->progressive) {
8680 if ((lastlt == NULL) ||
8681 (ctxt->input->cur > lastlt))
8682 goto done;
8683 } else if (xmlParseLookupSequence(ctxt,
8684 '<', 0, 0) < 0) {
8685 goto done;
8686 }
8687 }
8688 }
8689 ctxt->checkIndex = 0;
8690#ifdef DEBUG_PUSH
8691 xmlGenericError(xmlGenericErrorContext,
8692 "PP: Parsing char data\n");
8693#endif
8694 xmlParseCharData(ctxt, 0);
8695 }
8696 /*
8697 * Pop-up of finished entities.
8698 */
8699 while ((RAW == 0) && (ctxt->inputNr > 1))
8700 xmlPopInput(ctxt);
8701 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8702 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8704 ctxt->sax->error(ctxt->userData,
8705 "detected an error in element content\n");
8706 ctxt->wellFormed = 0;
8707 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8708 ctxt->instate = XML_PARSER_EOF;
8709 break;
8710 }
8711 break;
8712 }
8713 case XML_PARSER_END_TAG:
8714 if (avail < 2)
8715 goto done;
8716 if (!terminate) {
8717 if (ctxt->progressive) {
8718 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8719 goto done;
8720 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8721 goto done;
8722 }
8723 }
8724 xmlParseEndTag(ctxt);
8725 if (ctxt->name == NULL) {
8726 ctxt->instate = XML_PARSER_EPILOG;
8727#ifdef DEBUG_PUSH
8728 xmlGenericError(xmlGenericErrorContext,
8729 "PP: entering EPILOG\n");
8730#endif
8731 } else {
8732 ctxt->instate = XML_PARSER_CONTENT;
8733#ifdef DEBUG_PUSH
8734 xmlGenericError(xmlGenericErrorContext,
8735 "PP: entering CONTENT\n");
8736#endif
8737 }
8738 break;
8739 case XML_PARSER_CDATA_SECTION: {
8740 /*
8741 * The Push mode need to have the SAX callback for
8742 * cdataBlock merge back contiguous callbacks.
8743 */
8744 int base;
8745
8746 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8747 if (base < 0) {
8748 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8749 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8750 if (ctxt->sax->cdataBlock != NULL)
8751 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8752 XML_PARSER_BIG_BUFFER_SIZE);
8753 }
8754 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8755 ctxt->checkIndex = 0;
8756 }
8757 goto done;
8758 } else {
8759 if ((ctxt->sax != NULL) && (base > 0) &&
8760 (!ctxt->disableSAX)) {
8761 if (ctxt->sax->cdataBlock != NULL)
8762 ctxt->sax->cdataBlock(ctxt->userData,
8763 ctxt->input->cur, base);
8764 }
8765 SKIP(base + 3);
8766 ctxt->checkIndex = 0;
8767 ctxt->instate = XML_PARSER_CONTENT;
8768#ifdef DEBUG_PUSH
8769 xmlGenericError(xmlGenericErrorContext,
8770 "PP: entering CONTENT\n");
8771#endif
8772 }
8773 break;
8774 }
Owen Taylor3473f882001-02-23 17:55:21 +00008775 case XML_PARSER_MISC:
8776 SKIP_BLANKS;
8777 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008778 avail = ctxt->input->length -
8779 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00008780 else
Daniel Veillarda880b122003-04-21 21:36:41 +00008781 avail = ctxt->input->buf->buffer->use -
8782 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00008783 if (avail < 2)
8784 goto done;
8785 cur = ctxt->input->cur[0];
8786 next = ctxt->input->cur[1];
8787 if ((cur == '<') && (next == '?')) {
8788 if ((!terminate) &&
8789 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8790 goto done;
8791#ifdef DEBUG_PUSH
8792 xmlGenericError(xmlGenericErrorContext,
8793 "PP: Parsing PI\n");
8794#endif
8795 xmlParsePI(ctxt);
8796 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00008797 (ctxt->input->cur[2] == '-') &&
8798 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008799 if ((!terminate) &&
8800 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8801 goto done;
8802#ifdef DEBUG_PUSH
8803 xmlGenericError(xmlGenericErrorContext,
8804 "PP: Parsing Comment\n");
8805#endif
8806 xmlParseComment(ctxt);
8807 ctxt->instate = XML_PARSER_MISC;
8808 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00008809 (ctxt->input->cur[2] == 'D') &&
8810 (ctxt->input->cur[3] == 'O') &&
8811 (ctxt->input->cur[4] == 'C') &&
8812 (ctxt->input->cur[5] == 'T') &&
8813 (ctxt->input->cur[6] == 'Y') &&
8814 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008815 (ctxt->input->cur[8] == 'E')) {
8816 if ((!terminate) &&
8817 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8818 goto done;
8819#ifdef DEBUG_PUSH
8820 xmlGenericError(xmlGenericErrorContext,
8821 "PP: Parsing internal subset\n");
8822#endif
8823 ctxt->inSubset = 1;
8824 xmlParseDocTypeDecl(ctxt);
8825 if (RAW == '[') {
8826 ctxt->instate = XML_PARSER_DTD;
8827#ifdef DEBUG_PUSH
8828 xmlGenericError(xmlGenericErrorContext,
8829 "PP: entering DTD\n");
8830#endif
8831 } else {
8832 /*
8833 * Create and update the external subset.
8834 */
8835 ctxt->inSubset = 2;
8836 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8837 (ctxt->sax->externalSubset != NULL))
8838 ctxt->sax->externalSubset(ctxt->userData,
8839 ctxt->intSubName, ctxt->extSubSystem,
8840 ctxt->extSubURI);
8841 ctxt->inSubset = 0;
8842 ctxt->instate = XML_PARSER_PROLOG;
8843#ifdef DEBUG_PUSH
8844 xmlGenericError(xmlGenericErrorContext,
8845 "PP: entering PROLOG\n");
8846#endif
8847 }
8848 } else if ((cur == '<') && (next == '!') &&
8849 (avail < 9)) {
8850 goto done;
8851 } else {
8852 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00008853 ctxt->progressive = 1;
8854 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00008855#ifdef DEBUG_PUSH
8856 xmlGenericError(xmlGenericErrorContext,
8857 "PP: entering START_TAG\n");
8858#endif
8859 }
8860 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008861 case XML_PARSER_PROLOG:
8862 SKIP_BLANKS;
8863 if (ctxt->input->buf == NULL)
8864 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8865 else
8866 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8867 if (avail < 2)
8868 goto done;
8869 cur = ctxt->input->cur[0];
8870 next = ctxt->input->cur[1];
8871 if ((cur == '<') && (next == '?')) {
8872 if ((!terminate) &&
8873 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8874 goto done;
8875#ifdef DEBUG_PUSH
8876 xmlGenericError(xmlGenericErrorContext,
8877 "PP: Parsing PI\n");
8878#endif
8879 xmlParsePI(ctxt);
8880 } else if ((cur == '<') && (next == '!') &&
8881 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8882 if ((!terminate) &&
8883 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8884 goto done;
8885#ifdef DEBUG_PUSH
8886 xmlGenericError(xmlGenericErrorContext,
8887 "PP: Parsing Comment\n");
8888#endif
8889 xmlParseComment(ctxt);
8890 ctxt->instate = XML_PARSER_PROLOG;
8891 } else if ((cur == '<') && (next == '!') &&
8892 (avail < 4)) {
8893 goto done;
8894 } else {
8895 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00008896 ctxt->progressive = 1;
8897 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00008898#ifdef DEBUG_PUSH
8899 xmlGenericError(xmlGenericErrorContext,
8900 "PP: entering START_TAG\n");
8901#endif
8902 }
8903 break;
8904 case XML_PARSER_EPILOG:
8905 SKIP_BLANKS;
8906 if (ctxt->input->buf == NULL)
8907 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8908 else
8909 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8910 if (avail < 2)
8911 goto done;
8912 cur = ctxt->input->cur[0];
8913 next = ctxt->input->cur[1];
8914 if ((cur == '<') && (next == '?')) {
8915 if ((!terminate) &&
8916 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8917 goto done;
8918#ifdef DEBUG_PUSH
8919 xmlGenericError(xmlGenericErrorContext,
8920 "PP: Parsing PI\n");
8921#endif
8922 xmlParsePI(ctxt);
8923 ctxt->instate = XML_PARSER_EPILOG;
8924 } else if ((cur == '<') && (next == '!') &&
8925 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8926 if ((!terminate) &&
8927 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8928 goto done;
8929#ifdef DEBUG_PUSH
8930 xmlGenericError(xmlGenericErrorContext,
8931 "PP: Parsing Comment\n");
8932#endif
8933 xmlParseComment(ctxt);
8934 ctxt->instate = XML_PARSER_EPILOG;
8935 } else if ((cur == '<') && (next == '!') &&
8936 (avail < 4)) {
8937 goto done;
8938 } else {
8939 ctxt->errNo = XML_ERR_DOCUMENT_END;
8940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8941 ctxt->sax->error(ctxt->userData,
8942 "Extra content at the end of the document\n");
8943 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008944 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008945 ctxt->instate = XML_PARSER_EOF;
8946#ifdef DEBUG_PUSH
8947 xmlGenericError(xmlGenericErrorContext,
8948 "PP: entering EOF\n");
8949#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008950 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008951 ctxt->sax->endDocument(ctxt->userData);
8952 goto done;
8953 }
8954 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008955 case XML_PARSER_DTD: {
8956 /*
8957 * Sorry but progressive parsing of the internal subset
8958 * is not expected to be supported. We first check that
8959 * the full content of the internal subset is available and
8960 * the parsing is launched only at that point.
8961 * Internal subset ends up with "']' S? '>'" in an unescaped
8962 * section and not in a ']]>' sequence which are conditional
8963 * sections (whoever argued to keep that crap in XML deserve
8964 * a place in hell !).
8965 */
8966 int base, i;
8967 xmlChar *buf;
8968 xmlChar quote = 0;
8969
8970 base = ctxt->input->cur - ctxt->input->base;
8971 if (base < 0) return(0);
8972 if (ctxt->checkIndex > base)
8973 base = ctxt->checkIndex;
8974 buf = ctxt->input->buf->buffer->content;
8975 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8976 base++) {
8977 if (quote != 0) {
8978 if (buf[base] == quote)
8979 quote = 0;
8980 continue;
8981 }
8982 if (buf[base] == '"') {
8983 quote = '"';
8984 continue;
8985 }
8986 if (buf[base] == '\'') {
8987 quote = '\'';
8988 continue;
8989 }
8990 if (buf[base] == ']') {
8991 if ((unsigned int) base +1 >=
8992 ctxt->input->buf->buffer->use)
8993 break;
8994 if (buf[base + 1] == ']') {
8995 /* conditional crap, skip both ']' ! */
8996 base++;
8997 continue;
8998 }
8999 for (i = 0;
9000 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9001 i++) {
9002 if (buf[base + i] == '>')
9003 goto found_end_int_subset;
9004 }
9005 break;
9006 }
9007 }
9008 /*
9009 * We didn't found the end of the Internal subset
9010 */
9011 if (quote == 0)
9012 ctxt->checkIndex = base;
9013#ifdef DEBUG_PUSH
9014 if (next == 0)
9015 xmlGenericError(xmlGenericErrorContext,
9016 "PP: lookup of int subset end filed\n");
9017#endif
9018 goto done;
9019
9020found_end_int_subset:
9021 xmlParseInternalSubset(ctxt);
9022 ctxt->inSubset = 2;
9023 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9024 (ctxt->sax->externalSubset != NULL))
9025 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9026 ctxt->extSubSystem, ctxt->extSubURI);
9027 ctxt->inSubset = 0;
9028 ctxt->instate = XML_PARSER_PROLOG;
9029 ctxt->checkIndex = 0;
9030#ifdef DEBUG_PUSH
9031 xmlGenericError(xmlGenericErrorContext,
9032 "PP: entering PROLOG\n");
9033#endif
9034 break;
9035 }
9036 case XML_PARSER_COMMENT:
9037 xmlGenericError(xmlGenericErrorContext,
9038 "PP: internal error, state == COMMENT\n");
9039 ctxt->instate = XML_PARSER_CONTENT;
9040#ifdef DEBUG_PUSH
9041 xmlGenericError(xmlGenericErrorContext,
9042 "PP: entering CONTENT\n");
9043#endif
9044 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009045 case XML_PARSER_IGNORE:
9046 xmlGenericError(xmlGenericErrorContext,
9047 "PP: internal error, state == IGNORE");
9048 ctxt->instate = XML_PARSER_DTD;
9049#ifdef DEBUG_PUSH
9050 xmlGenericError(xmlGenericErrorContext,
9051 "PP: entering DTD\n");
9052#endif
9053 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009054 case XML_PARSER_PI:
9055 xmlGenericError(xmlGenericErrorContext,
9056 "PP: internal error, state == PI\n");
9057 ctxt->instate = XML_PARSER_CONTENT;
9058#ifdef DEBUG_PUSH
9059 xmlGenericError(xmlGenericErrorContext,
9060 "PP: entering CONTENT\n");
9061#endif
9062 break;
9063 case XML_PARSER_ENTITY_DECL:
9064 xmlGenericError(xmlGenericErrorContext,
9065 "PP: internal error, state == ENTITY_DECL\n");
9066 ctxt->instate = XML_PARSER_DTD;
9067#ifdef DEBUG_PUSH
9068 xmlGenericError(xmlGenericErrorContext,
9069 "PP: entering DTD\n");
9070#endif
9071 break;
9072 case XML_PARSER_ENTITY_VALUE:
9073 xmlGenericError(xmlGenericErrorContext,
9074 "PP: internal error, state == ENTITY_VALUE\n");
9075 ctxt->instate = XML_PARSER_CONTENT;
9076#ifdef DEBUG_PUSH
9077 xmlGenericError(xmlGenericErrorContext,
9078 "PP: entering DTD\n");
9079#endif
9080 break;
9081 case XML_PARSER_ATTRIBUTE_VALUE:
9082 xmlGenericError(xmlGenericErrorContext,
9083 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9084 ctxt->instate = XML_PARSER_START_TAG;
9085#ifdef DEBUG_PUSH
9086 xmlGenericError(xmlGenericErrorContext,
9087 "PP: entering START_TAG\n");
9088#endif
9089 break;
9090 case XML_PARSER_SYSTEM_LITERAL:
9091 xmlGenericError(xmlGenericErrorContext,
9092 "PP: internal error, state == SYSTEM_LITERAL\n");
9093 ctxt->instate = XML_PARSER_START_TAG;
9094#ifdef DEBUG_PUSH
9095 xmlGenericError(xmlGenericErrorContext,
9096 "PP: entering START_TAG\n");
9097#endif
9098 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009099 case XML_PARSER_PUBLIC_LITERAL:
9100 xmlGenericError(xmlGenericErrorContext,
9101 "PP: internal error, state == PUBLIC_LITERAL\n");
9102 ctxt->instate = XML_PARSER_START_TAG;
9103#ifdef DEBUG_PUSH
9104 xmlGenericError(xmlGenericErrorContext,
9105 "PP: entering START_TAG\n");
9106#endif
9107 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009108 }
9109 }
9110done:
9111#ifdef DEBUG_PUSH
9112 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9113#endif
9114 return(ret);
9115}
9116
9117/**
Owen Taylor3473f882001-02-23 17:55:21 +00009118 * xmlParseChunk:
9119 * @ctxt: an XML parser context
9120 * @chunk: an char array
9121 * @size: the size in byte of the chunk
9122 * @terminate: last chunk indicator
9123 *
9124 * Parse a Chunk of memory
9125 *
9126 * Returns zero if no error, the xmlParserErrors otherwise.
9127 */
9128int
9129xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9130 int terminate) {
9131 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9132 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9133 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9134 int cur = ctxt->input->cur - ctxt->input->base;
9135
9136 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9137 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9138 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009139 ctxt->input->end =
9140 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009141#ifdef DEBUG_PUSH
9142 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9143#endif
9144
Daniel Veillarda880b122003-04-21 21:36:41 +00009145#if 0
Owen Taylor3473f882001-02-23 17:55:21 +00009146 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9147 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda880b122003-04-21 21:36:41 +00009148#endif
Owen Taylor3473f882001-02-23 17:55:21 +00009149 } else if (ctxt->instate != XML_PARSER_EOF) {
9150 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9151 xmlParserInputBufferPtr in = ctxt->input->buf;
9152 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9153 (in->raw != NULL)) {
9154 int nbchars;
9155
9156 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9157 if (nbchars < 0) {
9158 xmlGenericError(xmlGenericErrorContext,
9159 "xmlParseChunk: encoder error\n");
9160 return(XML_ERR_INVALID_ENCODING);
9161 }
9162 }
9163 }
9164 }
9165 xmlParseTryOrFinish(ctxt, terminate);
9166 if (terminate) {
9167 /*
9168 * Check for termination
9169 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009170 int avail = 0;
9171 if (ctxt->input->buf == NULL)
9172 avail = ctxt->input->length -
9173 (ctxt->input->cur - ctxt->input->base);
9174 else
9175 avail = ctxt->input->buf->buffer->use -
9176 (ctxt->input->cur - ctxt->input->base);
9177
Owen Taylor3473f882001-02-23 17:55:21 +00009178 if ((ctxt->instate != XML_PARSER_EOF) &&
9179 (ctxt->instate != XML_PARSER_EPILOG)) {
9180 ctxt->errNo = XML_ERR_DOCUMENT_END;
9181 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9182 ctxt->sax->error(ctxt->userData,
9183 "Extra content at the end of the document\n");
9184 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009185 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009186 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009187 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9188 ctxt->errNo = XML_ERR_DOCUMENT_END;
9189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9190 ctxt->sax->error(ctxt->userData,
9191 "Extra content at the end of the document\n");
9192 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009193 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009194
9195 }
Owen Taylor3473f882001-02-23 17:55:21 +00009196 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009197 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009198 ctxt->sax->endDocument(ctxt->userData);
9199 }
9200 ctxt->instate = XML_PARSER_EOF;
9201 }
9202 return((xmlParserErrors) ctxt->errNo);
9203}
9204
9205/************************************************************************
9206 * *
9207 * I/O front end functions to the parser *
9208 * *
9209 ************************************************************************/
9210
9211/**
9212 * xmlStopParser:
9213 * @ctxt: an XML parser context
9214 *
9215 * Blocks further parser processing
9216 */
9217void
9218xmlStopParser(xmlParserCtxtPtr ctxt) {
9219 ctxt->instate = XML_PARSER_EOF;
9220 if (ctxt->input != NULL)
9221 ctxt->input->cur = BAD_CAST"";
9222}
9223
9224/**
9225 * xmlCreatePushParserCtxt:
9226 * @sax: a SAX handler
9227 * @user_data: The user data returned on SAX callbacks
9228 * @chunk: a pointer to an array of chars
9229 * @size: number of chars in the array
9230 * @filename: an optional file name or URI
9231 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009232 * Create a parser context for using the XML parser in push mode.
9233 * If @buffer and @size are non-NULL, the data is used to detect
9234 * the encoding. The remaining characters will be parsed so they
9235 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009236 * To allow content encoding detection, @size should be >= 4
9237 * The value of @filename is used for fetching external entities
9238 * and error/warning reports.
9239 *
9240 * Returns the new parser context or NULL
9241 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009242
Owen Taylor3473f882001-02-23 17:55:21 +00009243xmlParserCtxtPtr
9244xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9245 const char *chunk, int size, const char *filename) {
9246 xmlParserCtxtPtr ctxt;
9247 xmlParserInputPtr inputStream;
9248 xmlParserInputBufferPtr buf;
9249 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9250
9251 /*
9252 * plug some encoding conversion routines
9253 */
9254 if ((chunk != NULL) && (size >= 4))
9255 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9256
9257 buf = xmlAllocParserInputBuffer(enc);
9258 if (buf == NULL) return(NULL);
9259
9260 ctxt = xmlNewParserCtxt();
9261 if (ctxt == NULL) {
9262 xmlFree(buf);
9263 return(NULL);
9264 }
9265 if (sax != NULL) {
9266 if (ctxt->sax != &xmlDefaultSAXHandler)
9267 xmlFree(ctxt->sax);
9268 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9269 if (ctxt->sax == NULL) {
9270 xmlFree(buf);
9271 xmlFree(ctxt);
9272 return(NULL);
9273 }
9274 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9275 if (user_data != NULL)
9276 ctxt->userData = user_data;
9277 }
9278 if (filename == NULL) {
9279 ctxt->directory = NULL;
9280 } else {
9281 ctxt->directory = xmlParserGetDirectory(filename);
9282 }
9283
9284 inputStream = xmlNewInputStream(ctxt);
9285 if (inputStream == NULL) {
9286 xmlFreeParserCtxt(ctxt);
Daniel Veillard77a90a72003-03-22 00:04:05 +00009287 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009288 return(NULL);
9289 }
9290
9291 if (filename == NULL)
9292 inputStream->filename = NULL;
9293 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009294 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009295 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009296 inputStream->buf = buf;
9297 inputStream->base = inputStream->buf->buffer->content;
9298 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009299 inputStream->end =
9300 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009301
9302 inputPush(ctxt, inputStream);
9303
9304 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9305 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009306 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9307 int cur = ctxt->input->cur - ctxt->input->base;
9308
Owen Taylor3473f882001-02-23 17:55:21 +00009309 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009310
9311 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9312 ctxt->input->cur = ctxt->input->base + cur;
9313 ctxt->input->end =
9314 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009315#ifdef DEBUG_PUSH
9316 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9317#endif
9318 }
9319
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009320 if (enc != XML_CHAR_ENCODING_NONE) {
9321 xmlSwitchEncoding(ctxt, enc);
9322 }
9323
Owen Taylor3473f882001-02-23 17:55:21 +00009324 return(ctxt);
9325}
9326
9327/**
9328 * xmlCreateIOParserCtxt:
9329 * @sax: a SAX handler
9330 * @user_data: The user data returned on SAX callbacks
9331 * @ioread: an I/O read function
9332 * @ioclose: an I/O close function
9333 * @ioctx: an I/O handler
9334 * @enc: the charset encoding if known
9335 *
9336 * Create a parser context for using the XML parser with an existing
9337 * I/O stream
9338 *
9339 * Returns the new parser context or NULL
9340 */
9341xmlParserCtxtPtr
9342xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9343 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9344 void *ioctx, xmlCharEncoding enc) {
9345 xmlParserCtxtPtr ctxt;
9346 xmlParserInputPtr inputStream;
9347 xmlParserInputBufferPtr buf;
9348
9349 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9350 if (buf == NULL) return(NULL);
9351
9352 ctxt = xmlNewParserCtxt();
9353 if (ctxt == NULL) {
9354 xmlFree(buf);
9355 return(NULL);
9356 }
9357 if (sax != NULL) {
9358 if (ctxt->sax != &xmlDefaultSAXHandler)
9359 xmlFree(ctxt->sax);
9360 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9361 if (ctxt->sax == NULL) {
9362 xmlFree(buf);
9363 xmlFree(ctxt);
9364 return(NULL);
9365 }
9366 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9367 if (user_data != NULL)
9368 ctxt->userData = user_data;
9369 }
9370
9371 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9372 if (inputStream == NULL) {
9373 xmlFreeParserCtxt(ctxt);
9374 return(NULL);
9375 }
9376 inputPush(ctxt, inputStream);
9377
9378 return(ctxt);
9379}
9380
9381/************************************************************************
9382 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009383 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009384 * *
9385 ************************************************************************/
9386
9387/**
9388 * xmlIOParseDTD:
9389 * @sax: the SAX handler block or NULL
9390 * @input: an Input Buffer
9391 * @enc: the charset encoding if known
9392 *
9393 * Load and parse a DTD
9394 *
9395 * Returns the resulting xmlDtdPtr or NULL in case of error.
9396 * @input will be freed at parsing end.
9397 */
9398
9399xmlDtdPtr
9400xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9401 xmlCharEncoding enc) {
9402 xmlDtdPtr ret = NULL;
9403 xmlParserCtxtPtr ctxt;
9404 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009405 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009406
9407 if (input == NULL)
9408 return(NULL);
9409
9410 ctxt = xmlNewParserCtxt();
9411 if (ctxt == NULL) {
9412 return(NULL);
9413 }
9414
9415 /*
9416 * Set-up the SAX context
9417 */
9418 if (sax != NULL) {
9419 if (ctxt->sax != NULL)
9420 xmlFree(ctxt->sax);
9421 ctxt->sax = sax;
9422 ctxt->userData = NULL;
9423 }
9424
9425 /*
9426 * generate a parser input from the I/O handler
9427 */
9428
9429 pinput = xmlNewIOInputStream(ctxt, input, enc);
9430 if (pinput == NULL) {
9431 if (sax != NULL) ctxt->sax = NULL;
9432 xmlFreeParserCtxt(ctxt);
9433 return(NULL);
9434 }
9435
9436 /*
9437 * plug some encoding conversion routines here.
9438 */
9439 xmlPushInput(ctxt, pinput);
9440
9441 pinput->filename = NULL;
9442 pinput->line = 1;
9443 pinput->col = 1;
9444 pinput->base = ctxt->input->cur;
9445 pinput->cur = ctxt->input->cur;
9446 pinput->free = NULL;
9447
9448 /*
9449 * let's parse that entity knowing it's an external subset.
9450 */
9451 ctxt->inSubset = 2;
9452 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9453 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9454 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009455
9456 if (enc == XML_CHAR_ENCODING_NONE) {
9457 /*
9458 * Get the 4 first bytes and decode the charset
9459 * if enc != XML_CHAR_ENCODING_NONE
9460 * plug some encoding conversion routines.
9461 */
9462 start[0] = RAW;
9463 start[1] = NXT(1);
9464 start[2] = NXT(2);
9465 start[3] = NXT(3);
9466 enc = xmlDetectCharEncoding(start, 4);
9467 if (enc != XML_CHAR_ENCODING_NONE) {
9468 xmlSwitchEncoding(ctxt, enc);
9469 }
9470 }
9471
Owen Taylor3473f882001-02-23 17:55:21 +00009472 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9473
9474 if (ctxt->myDoc != NULL) {
9475 if (ctxt->wellFormed) {
9476 ret = ctxt->myDoc->extSubset;
9477 ctxt->myDoc->extSubset = NULL;
9478 } else {
9479 ret = NULL;
9480 }
9481 xmlFreeDoc(ctxt->myDoc);
9482 ctxt->myDoc = NULL;
9483 }
9484 if (sax != NULL) ctxt->sax = NULL;
9485 xmlFreeParserCtxt(ctxt);
9486
9487 return(ret);
9488}
9489
9490/**
9491 * xmlSAXParseDTD:
9492 * @sax: the SAX handler block
9493 * @ExternalID: a NAME* containing the External ID of the DTD
9494 * @SystemID: a NAME* containing the URL to the DTD
9495 *
9496 * Load and parse an external subset.
9497 *
9498 * Returns the resulting xmlDtdPtr or NULL in case of error.
9499 */
9500
9501xmlDtdPtr
9502xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9503 const xmlChar *SystemID) {
9504 xmlDtdPtr ret = NULL;
9505 xmlParserCtxtPtr ctxt;
9506 xmlParserInputPtr input = NULL;
9507 xmlCharEncoding enc;
9508
9509 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9510
9511 ctxt = xmlNewParserCtxt();
9512 if (ctxt == NULL) {
9513 return(NULL);
9514 }
9515
9516 /*
9517 * Set-up the SAX context
9518 */
9519 if (sax != NULL) {
9520 if (ctxt->sax != NULL)
9521 xmlFree(ctxt->sax);
9522 ctxt->sax = sax;
9523 ctxt->userData = NULL;
9524 }
9525
9526 /*
9527 * Ask the Entity resolver to load the damn thing
9528 */
9529
9530 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9531 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9532 if (input == NULL) {
9533 if (sax != NULL) ctxt->sax = NULL;
9534 xmlFreeParserCtxt(ctxt);
9535 return(NULL);
9536 }
9537
9538 /*
9539 * plug some encoding conversion routines here.
9540 */
9541 xmlPushInput(ctxt, input);
9542 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9543 xmlSwitchEncoding(ctxt, enc);
9544
9545 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +00009546 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +00009547 input->line = 1;
9548 input->col = 1;
9549 input->base = ctxt->input->cur;
9550 input->cur = ctxt->input->cur;
9551 input->free = NULL;
9552
9553 /*
9554 * let's parse that entity knowing it's an external subset.
9555 */
9556 ctxt->inSubset = 2;
9557 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9558 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9559 ExternalID, SystemID);
9560 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9561
9562 if (ctxt->myDoc != NULL) {
9563 if (ctxt->wellFormed) {
9564 ret = ctxt->myDoc->extSubset;
9565 ctxt->myDoc->extSubset = NULL;
9566 } else {
9567 ret = NULL;
9568 }
9569 xmlFreeDoc(ctxt->myDoc);
9570 ctxt->myDoc = NULL;
9571 }
9572 if (sax != NULL) ctxt->sax = NULL;
9573 xmlFreeParserCtxt(ctxt);
9574
9575 return(ret);
9576}
9577
9578/**
9579 * xmlParseDTD:
9580 * @ExternalID: a NAME* containing the External ID of the DTD
9581 * @SystemID: a NAME* containing the URL to the DTD
9582 *
9583 * Load and parse an external subset.
9584 *
9585 * Returns the resulting xmlDtdPtr or NULL in case of error.
9586 */
9587
9588xmlDtdPtr
9589xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9590 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9591}
9592
9593/************************************************************************
9594 * *
9595 * Front ends when parsing an Entity *
9596 * *
9597 ************************************************************************/
9598
9599/**
Owen Taylor3473f882001-02-23 17:55:21 +00009600 * xmlParseCtxtExternalEntity:
9601 * @ctx: the existing parsing context
9602 * @URL: the URL for the entity to load
9603 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009604 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009605 *
9606 * Parse an external general entity within an existing parsing context
9607 * An external general parsed entity is well-formed if it matches the
9608 * production labeled extParsedEnt.
9609 *
9610 * [78] extParsedEnt ::= TextDecl? content
9611 *
9612 * Returns 0 if the entity is well formed, -1 in case of args problem and
9613 * the parser error code otherwise
9614 */
9615
9616int
9617xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009618 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009619 xmlParserCtxtPtr ctxt;
9620 xmlDocPtr newDoc;
9621 xmlSAXHandlerPtr oldsax = NULL;
9622 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009623 xmlChar start[4];
9624 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009625
9626 if (ctx->depth > 40) {
9627 return(XML_ERR_ENTITY_LOOP);
9628 }
9629
Daniel Veillardcda96922001-08-21 10:56:31 +00009630 if (lst != NULL)
9631 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009632 if ((URL == NULL) && (ID == NULL))
9633 return(-1);
9634 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9635 return(-1);
9636
9637
9638 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9639 if (ctxt == NULL) return(-1);
9640 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +00009641 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +00009642 oldsax = ctxt->sax;
9643 ctxt->sax = ctx->sax;
9644 newDoc = xmlNewDoc(BAD_CAST "1.0");
9645 if (newDoc == NULL) {
9646 xmlFreeParserCtxt(ctxt);
9647 return(-1);
9648 }
9649 if (ctx->myDoc != NULL) {
9650 newDoc->intSubset = ctx->myDoc->intSubset;
9651 newDoc->extSubset = ctx->myDoc->extSubset;
9652 }
9653 if (ctx->myDoc->URL != NULL) {
9654 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9655 }
9656 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9657 if (newDoc->children == NULL) {
9658 ctxt->sax = oldsax;
9659 xmlFreeParserCtxt(ctxt);
9660 newDoc->intSubset = NULL;
9661 newDoc->extSubset = NULL;
9662 xmlFreeDoc(newDoc);
9663 return(-1);
9664 }
9665 nodePush(ctxt, newDoc->children);
9666 if (ctx->myDoc == NULL) {
9667 ctxt->myDoc = newDoc;
9668 } else {
9669 ctxt->myDoc = ctx->myDoc;
9670 newDoc->children->doc = ctx->myDoc;
9671 }
9672
Daniel Veillard87a764e2001-06-20 17:41:10 +00009673 /*
9674 * Get the 4 first bytes and decode the charset
9675 * if enc != XML_CHAR_ENCODING_NONE
9676 * plug some encoding conversion routines.
9677 */
9678 GROW
9679 start[0] = RAW;
9680 start[1] = NXT(1);
9681 start[2] = NXT(2);
9682 start[3] = NXT(3);
9683 enc = xmlDetectCharEncoding(start, 4);
9684 if (enc != XML_CHAR_ENCODING_NONE) {
9685 xmlSwitchEncoding(ctxt, enc);
9686 }
9687
Owen Taylor3473f882001-02-23 17:55:21 +00009688 /*
9689 * Parse a possible text declaration first
9690 */
Owen Taylor3473f882001-02-23 17:55:21 +00009691 if ((RAW == '<') && (NXT(1) == '?') &&
9692 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9693 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9694 xmlParseTextDecl(ctxt);
9695 }
9696
9697 /*
9698 * Doing validity checking on chunk doesn't make sense
9699 */
9700 ctxt->instate = XML_PARSER_CONTENT;
9701 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009702 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009703 ctxt->loadsubset = ctx->loadsubset;
9704 ctxt->depth = ctx->depth + 1;
9705 ctxt->replaceEntities = ctx->replaceEntities;
9706 if (ctxt->validate) {
9707 ctxt->vctxt.error = ctx->vctxt.error;
9708 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009709 } else {
9710 ctxt->vctxt.error = NULL;
9711 ctxt->vctxt.warning = NULL;
9712 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009713 ctxt->vctxt.nodeTab = NULL;
9714 ctxt->vctxt.nodeNr = 0;
9715 ctxt->vctxt.nodeMax = 0;
9716 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009717
9718 xmlParseContent(ctxt);
9719
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009720 ctx->validate = ctxt->validate;
9721 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009722 if ((RAW == '<') && (NXT(1) == '/')) {
9723 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9724 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9725 ctxt->sax->error(ctxt->userData,
9726 "chunk is not well balanced\n");
9727 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009728 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009729 } else if (RAW != 0) {
9730 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9732 ctxt->sax->error(ctxt->userData,
9733 "extra content at the end of well balanced chunk\n");
9734 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009735 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009736 }
9737 if (ctxt->node != newDoc->children) {
9738 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9740 ctxt->sax->error(ctxt->userData,
9741 "chunk is not well balanced\n");
9742 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009743 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009744 }
9745
9746 if (!ctxt->wellFormed) {
9747 if (ctxt->errNo == 0)
9748 ret = 1;
9749 else
9750 ret = ctxt->errNo;
9751 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009752 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009753 xmlNodePtr cur;
9754
9755 /*
9756 * Return the newly created nodeset after unlinking it from
9757 * they pseudo parent.
9758 */
9759 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009760 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009761 while (cur != NULL) {
9762 cur->parent = NULL;
9763 cur = cur->next;
9764 }
9765 newDoc->children->children = NULL;
9766 }
9767 ret = 0;
9768 }
9769 ctxt->sax = oldsax;
9770 xmlFreeParserCtxt(ctxt);
9771 newDoc->intSubset = NULL;
9772 newDoc->extSubset = NULL;
9773 xmlFreeDoc(newDoc);
9774
9775 return(ret);
9776}
9777
9778/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009779 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009780 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009781 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009782 * @sax: the SAX handler bloc (possibly NULL)
9783 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9784 * @depth: Used for loop detection, use 0
9785 * @URL: the URL for the entity to load
9786 * @ID: the System ID for the entity to load
9787 * @list: the return value for the set of parsed nodes
9788 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009789 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009790 *
9791 * Returns 0 if the entity is well formed, -1 in case of args problem and
9792 * the parser error code otherwise
9793 */
9794
Daniel Veillard257d9102001-05-08 10:41:44 +00009795static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009796xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9797 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009798 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009799 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009800 xmlParserCtxtPtr ctxt;
9801 xmlDocPtr newDoc;
9802 xmlSAXHandlerPtr oldsax = NULL;
9803 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009804 xmlChar start[4];
9805 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009806
9807 if (depth > 40) {
9808 return(XML_ERR_ENTITY_LOOP);
9809 }
9810
9811
9812
9813 if (list != NULL)
9814 *list = NULL;
9815 if ((URL == NULL) && (ID == NULL))
9816 return(-1);
9817 if (doc == NULL) /* @@ relax but check for dereferences */
9818 return(-1);
9819
9820
9821 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9822 if (ctxt == NULL) return(-1);
9823 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009824 if (oldctxt != NULL) {
9825 ctxt->_private = oldctxt->_private;
9826 ctxt->loadsubset = oldctxt->loadsubset;
9827 ctxt->validate = oldctxt->validate;
9828 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009829 ctxt->record_info = oldctxt->record_info;
9830 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
9831 ctxt->node_seq.length = oldctxt->node_seq.length;
9832 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009833 } else {
9834 /*
9835 * Doing validity checking on chunk without context
9836 * doesn't make sense
9837 */
9838 ctxt->_private = NULL;
9839 ctxt->validate = 0;
9840 ctxt->external = 2;
9841 ctxt->loadsubset = 0;
9842 }
Owen Taylor3473f882001-02-23 17:55:21 +00009843 if (sax != NULL) {
9844 oldsax = ctxt->sax;
9845 ctxt->sax = sax;
9846 if (user_data != NULL)
9847 ctxt->userData = user_data;
9848 }
9849 newDoc = xmlNewDoc(BAD_CAST "1.0");
9850 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009851 ctxt->node_seq.maximum = 0;
9852 ctxt->node_seq.length = 0;
9853 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009854 xmlFreeParserCtxt(ctxt);
9855 return(-1);
9856 }
9857 if (doc != NULL) {
9858 newDoc->intSubset = doc->intSubset;
9859 newDoc->extSubset = doc->extSubset;
9860 }
9861 if (doc->URL != NULL) {
9862 newDoc->URL = xmlStrdup(doc->URL);
9863 }
9864 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9865 if (newDoc->children == NULL) {
9866 if (sax != NULL)
9867 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009868 ctxt->node_seq.maximum = 0;
9869 ctxt->node_seq.length = 0;
9870 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009871 xmlFreeParserCtxt(ctxt);
9872 newDoc->intSubset = NULL;
9873 newDoc->extSubset = NULL;
9874 xmlFreeDoc(newDoc);
9875 return(-1);
9876 }
9877 nodePush(ctxt, newDoc->children);
9878 if (doc == NULL) {
9879 ctxt->myDoc = newDoc;
9880 } else {
9881 ctxt->myDoc = doc;
9882 newDoc->children->doc = doc;
9883 }
9884
Daniel Veillard87a764e2001-06-20 17:41:10 +00009885 /*
9886 * Get the 4 first bytes and decode the charset
9887 * if enc != XML_CHAR_ENCODING_NONE
9888 * plug some encoding conversion routines.
9889 */
9890 GROW;
9891 start[0] = RAW;
9892 start[1] = NXT(1);
9893 start[2] = NXT(2);
9894 start[3] = NXT(3);
9895 enc = xmlDetectCharEncoding(start, 4);
9896 if (enc != XML_CHAR_ENCODING_NONE) {
9897 xmlSwitchEncoding(ctxt, enc);
9898 }
9899
Owen Taylor3473f882001-02-23 17:55:21 +00009900 /*
9901 * Parse a possible text declaration first
9902 */
Owen Taylor3473f882001-02-23 17:55:21 +00009903 if ((RAW == '<') && (NXT(1) == '?') &&
9904 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9905 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9906 xmlParseTextDecl(ctxt);
9907 }
9908
Owen Taylor3473f882001-02-23 17:55:21 +00009909 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009910 ctxt->depth = depth;
9911
9912 xmlParseContent(ctxt);
9913
Daniel Veillard561b7f82002-03-20 21:55:57 +00009914 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009915 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9917 ctxt->sax->error(ctxt->userData,
9918 "chunk is not well balanced\n");
9919 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009920 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009921 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009922 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9924 ctxt->sax->error(ctxt->userData,
9925 "extra content at the end of well balanced chunk\n");
9926 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009927 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009928 }
9929 if (ctxt->node != newDoc->children) {
9930 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9932 ctxt->sax->error(ctxt->userData,
9933 "chunk is not well balanced\n");
9934 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009935 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009936 }
9937
9938 if (!ctxt->wellFormed) {
9939 if (ctxt->errNo == 0)
9940 ret = 1;
9941 else
9942 ret = ctxt->errNo;
9943 } else {
9944 if (list != NULL) {
9945 xmlNodePtr cur;
9946
9947 /*
9948 * Return the newly created nodeset after unlinking it from
9949 * they pseudo parent.
9950 */
9951 cur = newDoc->children->children;
9952 *list = cur;
9953 while (cur != NULL) {
9954 cur->parent = NULL;
9955 cur = cur->next;
9956 }
9957 newDoc->children->children = NULL;
9958 }
9959 ret = 0;
9960 }
9961 if (sax != NULL)
9962 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +00009963 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
9964 oldctxt->node_seq.length = ctxt->node_seq.length;
9965 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009966 ctxt->node_seq.maximum = 0;
9967 ctxt->node_seq.length = 0;
9968 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009969 xmlFreeParserCtxt(ctxt);
9970 newDoc->intSubset = NULL;
9971 newDoc->extSubset = NULL;
9972 xmlFreeDoc(newDoc);
9973
9974 return(ret);
9975}
9976
9977/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009978 * xmlParseExternalEntity:
9979 * @doc: the document the chunk pertains to
9980 * @sax: the SAX handler bloc (possibly NULL)
9981 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9982 * @depth: Used for loop detection, use 0
9983 * @URL: the URL for the entity to load
9984 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009985 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009986 *
9987 * Parse an external general entity
9988 * An external general parsed entity is well-formed if it matches the
9989 * production labeled extParsedEnt.
9990 *
9991 * [78] extParsedEnt ::= TextDecl? content
9992 *
9993 * Returns 0 if the entity is well formed, -1 in case of args problem and
9994 * the parser error code otherwise
9995 */
9996
9997int
9998xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009999 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010000 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010001 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010002}
10003
10004/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010005 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010006 * @doc: the document the chunk pertains to
10007 * @sax: the SAX handler bloc (possibly NULL)
10008 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10009 * @depth: Used for loop detection, use 0
10010 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010011 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010012 *
10013 * Parse a well-balanced chunk of an XML document
10014 * called by the parser
10015 * The allowed sequence for the Well Balanced Chunk is the one defined by
10016 * the content production in the XML grammar:
10017 *
10018 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10019 *
10020 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10021 * the parser error code otherwise
10022 */
10023
10024int
10025xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010026 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010027 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10028 depth, string, lst, 0 );
10029}
10030
10031/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010032 * xmlParseBalancedChunkMemoryInternal:
10033 * @oldctxt: the existing parsing context
10034 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10035 * @user_data: the user data field for the parser context
10036 * @lst: the return value for the set of parsed nodes
10037 *
10038 *
10039 * Parse a well-balanced chunk of an XML document
10040 * called by the parser
10041 * The allowed sequence for the Well Balanced Chunk is the one defined by
10042 * the content production in the XML grammar:
10043 *
10044 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10045 *
10046 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10047 * the parser error code otherwise
10048 *
10049 * In case recover is set to 1, the nodelist will not be empty even if
10050 * the parsed chunk is not well balanced.
10051 */
10052static int
10053xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10054 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10055 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010056 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010057 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010058 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010059 int size;
10060 int ret = 0;
10061
10062 if (oldctxt->depth > 40) {
10063 return(XML_ERR_ENTITY_LOOP);
10064 }
10065
10066
10067 if (lst != NULL)
10068 *lst = NULL;
10069 if (string == NULL)
10070 return(-1);
10071
10072 size = xmlStrlen(string);
10073
10074 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10075 if (ctxt == NULL) return(-1);
10076 if (user_data != NULL)
10077 ctxt->userData = user_data;
10078 else
10079 ctxt->userData = ctxt;
10080
10081 oldsax = ctxt->sax;
10082 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010083 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010084 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010085 newDoc = xmlNewDoc(BAD_CAST "1.0");
10086 if (newDoc == NULL) {
10087 ctxt->sax = oldsax;
10088 xmlFreeParserCtxt(ctxt);
10089 return(-1);
10090 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010091 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010092 } else {
10093 ctxt->myDoc = oldctxt->myDoc;
10094 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010095 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010096 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010097 BAD_CAST "pseudoroot", NULL);
10098 if (ctxt->myDoc->children == NULL) {
10099 ctxt->sax = oldsax;
10100 xmlFreeParserCtxt(ctxt);
10101 if (newDoc != NULL)
10102 xmlFreeDoc(newDoc);
10103 return(-1);
10104 }
10105 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010106 ctxt->instate = XML_PARSER_CONTENT;
10107 ctxt->depth = oldctxt->depth + 1;
10108
Daniel Veillard328f48c2002-11-15 15:24:34 +000010109 ctxt->validate = 0;
10110 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010111 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10112 /*
10113 * ID/IDREF registration will be done in xmlValidateElement below
10114 */
10115 ctxt->loadsubset |= XML_SKIP_IDS;
10116 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010117
Daniel Veillard68e9e742002-11-16 15:35:11 +000010118 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010119 if ((RAW == '<') && (NXT(1) == '/')) {
10120 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10122 ctxt->sax->error(ctxt->userData,
10123 "chunk is not well balanced\n");
10124 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010125 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010126 } else if (RAW != 0) {
10127 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10129 ctxt->sax->error(ctxt->userData,
10130 "extra content at the end of well balanced chunk\n");
10131 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010132 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010133 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010134 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010135 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10136 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10137 ctxt->sax->error(ctxt->userData,
10138 "chunk is not well balanced\n");
10139 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010140 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010141 }
10142
10143 if (!ctxt->wellFormed) {
10144 if (ctxt->errNo == 0)
10145 ret = 1;
10146 else
10147 ret = ctxt->errNo;
10148 } else {
10149 ret = 0;
10150 }
10151
10152 if ((lst != NULL) && (ret == 0)) {
10153 xmlNodePtr cur;
10154
10155 /*
10156 * Return the newly created nodeset after unlinking it from
10157 * they pseudo parent.
10158 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010159 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010160 *lst = cur;
10161 while (cur != NULL) {
Daniel Veillard8d589042003-02-04 15:07:21 +000010162 if (oldctxt->validate && oldctxt->wellFormed &&
10163 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10164 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10165 oldctxt->myDoc, cur);
10166 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010167 cur->parent = NULL;
10168 cur = cur->next;
10169 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010170 ctxt->myDoc->children->children = NULL;
10171 }
10172 if (ctxt->myDoc != NULL) {
10173 xmlFreeNode(ctxt->myDoc->children);
10174 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010175 }
10176
10177 ctxt->sax = oldsax;
10178 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010179 if (newDoc != NULL)
10180 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010181
10182 return(ret);
10183}
10184
10185/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010186 * xmlParseBalancedChunkMemoryRecover:
10187 * @doc: the document the chunk pertains to
10188 * @sax: the SAX handler bloc (possibly NULL)
10189 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10190 * @depth: Used for loop detection, use 0
10191 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10192 * @lst: the return value for the set of parsed nodes
10193 * @recover: return nodes even if the data is broken (use 0)
10194 *
10195 *
10196 * Parse a well-balanced chunk of an XML document
10197 * called by the parser
10198 * The allowed sequence for the Well Balanced Chunk is the one defined by
10199 * the content production in the XML grammar:
10200 *
10201 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10202 *
10203 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10204 * the parser error code otherwise
10205 *
10206 * In case recover is set to 1, the nodelist will not be empty even if
10207 * the parsed chunk is not well balanced.
10208 */
10209int
10210xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10211 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10212 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010213 xmlParserCtxtPtr ctxt;
10214 xmlDocPtr newDoc;
10215 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010216 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010217 int size;
10218 int ret = 0;
10219
10220 if (depth > 40) {
10221 return(XML_ERR_ENTITY_LOOP);
10222 }
10223
10224
Daniel Veillardcda96922001-08-21 10:56:31 +000010225 if (lst != NULL)
10226 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010227 if (string == NULL)
10228 return(-1);
10229
10230 size = xmlStrlen(string);
10231
10232 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10233 if (ctxt == NULL) return(-1);
10234 ctxt->userData = ctxt;
10235 if (sax != NULL) {
10236 oldsax = ctxt->sax;
10237 ctxt->sax = sax;
10238 if (user_data != NULL)
10239 ctxt->userData = user_data;
10240 }
10241 newDoc = xmlNewDoc(BAD_CAST "1.0");
10242 if (newDoc == NULL) {
10243 xmlFreeParserCtxt(ctxt);
10244 return(-1);
10245 }
10246 if (doc != NULL) {
10247 newDoc->intSubset = doc->intSubset;
10248 newDoc->extSubset = doc->extSubset;
10249 }
10250 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10251 if (newDoc->children == NULL) {
10252 if (sax != NULL)
10253 ctxt->sax = oldsax;
10254 xmlFreeParserCtxt(ctxt);
10255 newDoc->intSubset = NULL;
10256 newDoc->extSubset = NULL;
10257 xmlFreeDoc(newDoc);
10258 return(-1);
10259 }
10260 nodePush(ctxt, newDoc->children);
10261 if (doc == NULL) {
10262 ctxt->myDoc = newDoc;
10263 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010264 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010265 newDoc->children->doc = doc;
10266 }
10267 ctxt->instate = XML_PARSER_CONTENT;
10268 ctxt->depth = depth;
10269
10270 /*
10271 * Doing validity checking on chunk doesn't make sense
10272 */
10273 ctxt->validate = 0;
10274 ctxt->loadsubset = 0;
10275
Daniel Veillardb39bc392002-10-26 19:29:51 +000010276 if ( doc != NULL ){
10277 content = doc->children;
10278 doc->children = NULL;
10279 xmlParseContent(ctxt);
10280 doc->children = content;
10281 }
10282 else {
10283 xmlParseContent(ctxt);
10284 }
Owen Taylor3473f882001-02-23 17:55:21 +000010285 if ((RAW == '<') && (NXT(1) == '/')) {
10286 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10287 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10288 ctxt->sax->error(ctxt->userData,
10289 "chunk is not well balanced\n");
10290 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010291 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010292 } else if (RAW != 0) {
10293 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10295 ctxt->sax->error(ctxt->userData,
10296 "extra content at the end of well balanced chunk\n");
10297 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010298 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010299 }
10300 if (ctxt->node != newDoc->children) {
10301 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10303 ctxt->sax->error(ctxt->userData,
10304 "chunk is not well balanced\n");
10305 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010306 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010307 }
10308
10309 if (!ctxt->wellFormed) {
10310 if (ctxt->errNo == 0)
10311 ret = 1;
10312 else
10313 ret = ctxt->errNo;
10314 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010315 ret = 0;
10316 }
10317
10318 if (lst != NULL && (ret == 0 || recover == 1)) {
10319 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010320
10321 /*
10322 * Return the newly created nodeset after unlinking it from
10323 * they pseudo parent.
10324 */
10325 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010326 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010327 while (cur != NULL) {
10328 cur->parent = NULL;
10329 cur = cur->next;
10330 }
10331 newDoc->children->children = NULL;
10332 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010333
Owen Taylor3473f882001-02-23 17:55:21 +000010334 if (sax != NULL)
10335 ctxt->sax = oldsax;
10336 xmlFreeParserCtxt(ctxt);
10337 newDoc->intSubset = NULL;
10338 newDoc->extSubset = NULL;
10339 xmlFreeDoc(newDoc);
10340
10341 return(ret);
10342}
10343
10344/**
10345 * xmlSAXParseEntity:
10346 * @sax: the SAX handler block
10347 * @filename: the filename
10348 *
10349 * parse an XML external entity out of context and build a tree.
10350 * It use the given SAX function block to handle the parsing callback.
10351 * If sax is NULL, fallback to the default DOM tree building routines.
10352 *
10353 * [78] extParsedEnt ::= TextDecl? content
10354 *
10355 * This correspond to a "Well Balanced" chunk
10356 *
10357 * Returns the resulting document tree
10358 */
10359
10360xmlDocPtr
10361xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10362 xmlDocPtr ret;
10363 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010364
10365 ctxt = xmlCreateFileParserCtxt(filename);
10366 if (ctxt == NULL) {
10367 return(NULL);
10368 }
10369 if (sax != NULL) {
10370 if (ctxt->sax != NULL)
10371 xmlFree(ctxt->sax);
10372 ctxt->sax = sax;
10373 ctxt->userData = NULL;
10374 }
10375
Owen Taylor3473f882001-02-23 17:55:21 +000010376 xmlParseExtParsedEnt(ctxt);
10377
10378 if (ctxt->wellFormed)
10379 ret = ctxt->myDoc;
10380 else {
10381 ret = NULL;
10382 xmlFreeDoc(ctxt->myDoc);
10383 ctxt->myDoc = NULL;
10384 }
10385 if (sax != NULL)
10386 ctxt->sax = NULL;
10387 xmlFreeParserCtxt(ctxt);
10388
10389 return(ret);
10390}
10391
10392/**
10393 * xmlParseEntity:
10394 * @filename: the filename
10395 *
10396 * parse an XML external entity out of context and build a tree.
10397 *
10398 * [78] extParsedEnt ::= TextDecl? content
10399 *
10400 * This correspond to a "Well Balanced" chunk
10401 *
10402 * Returns the resulting document tree
10403 */
10404
10405xmlDocPtr
10406xmlParseEntity(const char *filename) {
10407 return(xmlSAXParseEntity(NULL, filename));
10408}
10409
10410/**
10411 * xmlCreateEntityParserCtxt:
10412 * @URL: the entity URL
10413 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010414 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010415 *
10416 * Create a parser context for an external entity
10417 * Automatic support for ZLIB/Compress compressed document is provided
10418 * by default if found at compile-time.
10419 *
10420 * Returns the new parser context or NULL
10421 */
10422xmlParserCtxtPtr
10423xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10424 const xmlChar *base) {
10425 xmlParserCtxtPtr ctxt;
10426 xmlParserInputPtr inputStream;
10427 char *directory = NULL;
10428 xmlChar *uri;
10429
10430 ctxt = xmlNewParserCtxt();
10431 if (ctxt == NULL) {
10432 return(NULL);
10433 }
10434
10435 uri = xmlBuildURI(URL, base);
10436
10437 if (uri == NULL) {
10438 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10439 if (inputStream == NULL) {
10440 xmlFreeParserCtxt(ctxt);
10441 return(NULL);
10442 }
10443
10444 inputPush(ctxt, inputStream);
10445
10446 if ((ctxt->directory == NULL) && (directory == NULL))
10447 directory = xmlParserGetDirectory((char *)URL);
10448 if ((ctxt->directory == NULL) && (directory != NULL))
10449 ctxt->directory = directory;
10450 } else {
10451 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10452 if (inputStream == NULL) {
10453 xmlFree(uri);
10454 xmlFreeParserCtxt(ctxt);
10455 return(NULL);
10456 }
10457
10458 inputPush(ctxt, inputStream);
10459
10460 if ((ctxt->directory == NULL) && (directory == NULL))
10461 directory = xmlParserGetDirectory((char *)uri);
10462 if ((ctxt->directory == NULL) && (directory != NULL))
10463 ctxt->directory = directory;
10464 xmlFree(uri);
10465 }
10466
10467 return(ctxt);
10468}
10469
10470/************************************************************************
10471 * *
10472 * Front ends when parsing from a file *
10473 * *
10474 ************************************************************************/
10475
10476/**
10477 * xmlCreateFileParserCtxt:
10478 * @filename: the filename
10479 *
10480 * Create a parser context for a file content.
10481 * Automatic support for ZLIB/Compress compressed document is provided
10482 * by default if found at compile-time.
10483 *
10484 * Returns the new parser context or NULL
10485 */
10486xmlParserCtxtPtr
10487xmlCreateFileParserCtxt(const char *filename)
10488{
10489 xmlParserCtxtPtr ctxt;
10490 xmlParserInputPtr inputStream;
Igor Zlatkovicce076162003-02-23 13:39:39 +000010491 char *canonicFilename;
Owen Taylor3473f882001-02-23 17:55:21 +000010492 char *directory = NULL;
10493
Owen Taylor3473f882001-02-23 17:55:21 +000010494 ctxt = xmlNewParserCtxt();
10495 if (ctxt == NULL) {
10496 if (xmlDefaultSAXHandler.error != NULL) {
10497 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10498 }
10499 return(NULL);
10500 }
10501
Daniel Veillardc64b8e92003-02-24 11:47:13 +000010502 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename);
Igor Zlatkovicce076162003-02-23 13:39:39 +000010503 if (canonicFilename == NULL) {
10504 if (xmlDefaultSAXHandler.error != NULL) {
10505 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10506 }
10507 return(NULL);
10508 }
10509
10510 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
10511 xmlFree(canonicFilename);
Owen Taylor3473f882001-02-23 17:55:21 +000010512 if (inputStream == NULL) {
10513 xmlFreeParserCtxt(ctxt);
10514 return(NULL);
10515 }
10516
Owen Taylor3473f882001-02-23 17:55:21 +000010517 inputPush(ctxt, inputStream);
10518 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010519 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010520 if ((ctxt->directory == NULL) && (directory != NULL))
10521 ctxt->directory = directory;
10522
10523 return(ctxt);
10524}
10525
10526/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010527 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010528 * @sax: the SAX handler block
10529 * @filename: the filename
10530 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10531 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010532 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010533 *
10534 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10535 * compressed document is provided by default if found at compile-time.
10536 * It use the given SAX function block to handle the parsing callback.
10537 * If sax is NULL, fallback to the default DOM tree building routines.
10538 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010539 * User data (void *) is stored within the parser context in the
10540 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010541 *
Owen Taylor3473f882001-02-23 17:55:21 +000010542 * Returns the resulting document tree
10543 */
10544
10545xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010546xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10547 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010548 xmlDocPtr ret;
10549 xmlParserCtxtPtr ctxt;
10550 char *directory = NULL;
10551
Daniel Veillard635ef722001-10-29 11:48:19 +000010552 xmlInitParser();
10553
Owen Taylor3473f882001-02-23 17:55:21 +000010554 ctxt = xmlCreateFileParserCtxt(filename);
10555 if (ctxt == NULL) {
10556 return(NULL);
10557 }
10558 if (sax != NULL) {
10559 if (ctxt->sax != NULL)
10560 xmlFree(ctxt->sax);
10561 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010562 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010563 if (data!=NULL) {
10564 ctxt->_private=data;
10565 }
Owen Taylor3473f882001-02-23 17:55:21 +000010566
10567 if ((ctxt->directory == NULL) && (directory == NULL))
10568 directory = xmlParserGetDirectory(filename);
10569 if ((ctxt->directory == NULL) && (directory != NULL))
10570 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10571
Daniel Veillarddad3f682002-11-17 16:47:27 +000010572 ctxt->recovery = recovery;
10573
Owen Taylor3473f882001-02-23 17:55:21 +000010574 xmlParseDocument(ctxt);
10575
10576 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10577 else {
10578 ret = NULL;
10579 xmlFreeDoc(ctxt->myDoc);
10580 ctxt->myDoc = NULL;
10581 }
10582 if (sax != NULL)
10583 ctxt->sax = NULL;
10584 xmlFreeParserCtxt(ctxt);
10585
10586 return(ret);
10587}
10588
10589/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010590 * xmlSAXParseFile:
10591 * @sax: the SAX handler block
10592 * @filename: the filename
10593 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10594 * documents
10595 *
10596 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10597 * compressed document is provided by default if found at compile-time.
10598 * It use the given SAX function block to handle the parsing callback.
10599 * If sax is NULL, fallback to the default DOM tree building routines.
10600 *
10601 * Returns the resulting document tree
10602 */
10603
10604xmlDocPtr
10605xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10606 int recovery) {
10607 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10608}
10609
10610/**
Owen Taylor3473f882001-02-23 17:55:21 +000010611 * xmlRecoverDoc:
10612 * @cur: a pointer to an array of xmlChar
10613 *
10614 * parse an XML in-memory document and build a tree.
10615 * In the case the document is not Well Formed, a tree is built anyway
10616 *
10617 * Returns the resulting document tree
10618 */
10619
10620xmlDocPtr
10621xmlRecoverDoc(xmlChar *cur) {
10622 return(xmlSAXParseDoc(NULL, cur, 1));
10623}
10624
10625/**
10626 * xmlParseFile:
10627 * @filename: the filename
10628 *
10629 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10630 * compressed document is provided by default if found at compile-time.
10631 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010632 * Returns the resulting document tree if the file was wellformed,
10633 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010634 */
10635
10636xmlDocPtr
10637xmlParseFile(const char *filename) {
10638 return(xmlSAXParseFile(NULL, filename, 0));
10639}
10640
10641/**
10642 * xmlRecoverFile:
10643 * @filename: the filename
10644 *
10645 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10646 * compressed document is provided by default if found at compile-time.
10647 * In the case the document is not Well Formed, a tree is built anyway
10648 *
10649 * Returns the resulting document tree
10650 */
10651
10652xmlDocPtr
10653xmlRecoverFile(const char *filename) {
10654 return(xmlSAXParseFile(NULL, filename, 1));
10655}
10656
10657
10658/**
10659 * xmlSetupParserForBuffer:
10660 * @ctxt: an XML parser context
10661 * @buffer: a xmlChar * buffer
10662 * @filename: a file name
10663 *
10664 * Setup the parser context to parse a new buffer; Clears any prior
10665 * contents from the parser context. The buffer parameter must not be
10666 * NULL, but the filename parameter can be
10667 */
10668void
10669xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10670 const char* filename)
10671{
10672 xmlParserInputPtr input;
10673
10674 input = xmlNewInputStream(ctxt);
10675 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010676 xmlGenericError(xmlGenericErrorContext,
10677 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010678 xmlFree(ctxt);
10679 return;
10680 }
10681
10682 xmlClearParserCtxt(ctxt);
10683 if (filename != NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010684 input->filename = xmlCanonicPath(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010685 input->base = buffer;
10686 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010687 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010688 inputPush(ctxt, input);
10689}
10690
10691/**
10692 * xmlSAXUserParseFile:
10693 * @sax: a SAX handler
10694 * @user_data: The user data returned on SAX callbacks
10695 * @filename: a file name
10696 *
10697 * parse an XML file and call the given SAX handler routines.
10698 * Automatic support for ZLIB/Compress compressed document is provided
10699 *
10700 * Returns 0 in case of success or a error number otherwise
10701 */
10702int
10703xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10704 const char *filename) {
10705 int ret = 0;
10706 xmlParserCtxtPtr ctxt;
10707
10708 ctxt = xmlCreateFileParserCtxt(filename);
10709 if (ctxt == NULL) return -1;
10710 if (ctxt->sax != &xmlDefaultSAXHandler)
10711 xmlFree(ctxt->sax);
10712 ctxt->sax = sax;
10713 if (user_data != NULL)
10714 ctxt->userData = user_data;
10715
10716 xmlParseDocument(ctxt);
10717
10718 if (ctxt->wellFormed)
10719 ret = 0;
10720 else {
10721 if (ctxt->errNo != 0)
10722 ret = ctxt->errNo;
10723 else
10724 ret = -1;
10725 }
10726 if (sax != NULL)
10727 ctxt->sax = NULL;
10728 xmlFreeParserCtxt(ctxt);
10729
10730 return ret;
10731}
10732
10733/************************************************************************
10734 * *
10735 * Front ends when parsing from memory *
10736 * *
10737 ************************************************************************/
10738
10739/**
10740 * xmlCreateMemoryParserCtxt:
10741 * @buffer: a pointer to a char array
10742 * @size: the size of the array
10743 *
10744 * Create a parser context for an XML in-memory document.
10745 *
10746 * Returns the new parser context or NULL
10747 */
10748xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010749xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010750 xmlParserCtxtPtr ctxt;
10751 xmlParserInputPtr input;
10752 xmlParserInputBufferPtr buf;
10753
10754 if (buffer == NULL)
10755 return(NULL);
10756 if (size <= 0)
10757 return(NULL);
10758
10759 ctxt = xmlNewParserCtxt();
10760 if (ctxt == NULL)
10761 return(NULL);
10762
10763 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010764 if (buf == NULL) {
10765 xmlFreeParserCtxt(ctxt);
10766 return(NULL);
10767 }
Owen Taylor3473f882001-02-23 17:55:21 +000010768
10769 input = xmlNewInputStream(ctxt);
10770 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010771 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010772 xmlFreeParserCtxt(ctxt);
10773 return(NULL);
10774 }
10775
10776 input->filename = NULL;
10777 input->buf = buf;
10778 input->base = input->buf->buffer->content;
10779 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010780 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010781
10782 inputPush(ctxt, input);
10783 return(ctxt);
10784}
10785
10786/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010787 * xmlSAXParseMemoryWithData:
10788 * @sax: the SAX handler block
10789 * @buffer: an pointer to a char array
10790 * @size: the size of the array
10791 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10792 * documents
10793 * @data: the userdata
10794 *
10795 * parse an XML in-memory block and use the given SAX function block
10796 * to handle the parsing callback. If sax is NULL, fallback to the default
10797 * DOM tree building routines.
10798 *
10799 * User data (void *) is stored within the parser context in the
10800 * context's _private member, so it is available nearly everywhere in libxml
10801 *
10802 * Returns the resulting document tree
10803 */
10804
10805xmlDocPtr
10806xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10807 int size, int recovery, void *data) {
10808 xmlDocPtr ret;
10809 xmlParserCtxtPtr ctxt;
10810
10811 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10812 if (ctxt == NULL) return(NULL);
10813 if (sax != NULL) {
10814 if (ctxt->sax != NULL)
10815 xmlFree(ctxt->sax);
10816 ctxt->sax = sax;
10817 }
10818 if (data!=NULL) {
10819 ctxt->_private=data;
10820 }
10821
Daniel Veillardadba5f12003-04-04 16:09:01 +000010822 ctxt->recovery = recovery;
10823
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010824 xmlParseDocument(ctxt);
10825
10826 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10827 else {
10828 ret = NULL;
10829 xmlFreeDoc(ctxt->myDoc);
10830 ctxt->myDoc = NULL;
10831 }
10832 if (sax != NULL)
10833 ctxt->sax = NULL;
10834 xmlFreeParserCtxt(ctxt);
10835
10836 return(ret);
10837}
10838
10839/**
Owen Taylor3473f882001-02-23 17:55:21 +000010840 * xmlSAXParseMemory:
10841 * @sax: the SAX handler block
10842 * @buffer: an pointer to a char array
10843 * @size: the size of the array
10844 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10845 * documents
10846 *
10847 * parse an XML in-memory block and use the given SAX function block
10848 * to handle the parsing callback. If sax is NULL, fallback to the default
10849 * DOM tree building routines.
10850 *
10851 * Returns the resulting document tree
10852 */
10853xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010854xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10855 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010856 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010857}
10858
10859/**
10860 * xmlParseMemory:
10861 * @buffer: an pointer to a char array
10862 * @size: the size of the array
10863 *
10864 * parse an XML in-memory block and build a tree.
10865 *
10866 * Returns the resulting document tree
10867 */
10868
Daniel Veillard50822cb2001-07-26 20:05:51 +000010869xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010870 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10871}
10872
10873/**
10874 * xmlRecoverMemory:
10875 * @buffer: an pointer to a char array
10876 * @size: the size of the array
10877 *
10878 * parse an XML in-memory block and build a tree.
10879 * In the case the document is not Well Formed, a tree is built anyway
10880 *
10881 * Returns the resulting document tree
10882 */
10883
Daniel Veillard50822cb2001-07-26 20:05:51 +000010884xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010885 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10886}
10887
10888/**
10889 * xmlSAXUserParseMemory:
10890 * @sax: a SAX handler
10891 * @user_data: The user data returned on SAX callbacks
10892 * @buffer: an in-memory XML document input
10893 * @size: the length of the XML document in bytes
10894 *
10895 * A better SAX parsing routine.
10896 * parse an XML in-memory buffer and call the given SAX handler routines.
10897 *
10898 * Returns 0 in case of success or a error number otherwise
10899 */
10900int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010901 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010902 int ret = 0;
10903 xmlParserCtxtPtr ctxt;
10904 xmlSAXHandlerPtr oldsax = NULL;
10905
Daniel Veillard9e923512002-08-14 08:48:52 +000010906 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010907 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10908 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010909 oldsax = ctxt->sax;
10910 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010911 if (user_data != NULL)
10912 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010913
10914 xmlParseDocument(ctxt);
10915
10916 if (ctxt->wellFormed)
10917 ret = 0;
10918 else {
10919 if (ctxt->errNo != 0)
10920 ret = ctxt->errNo;
10921 else
10922 ret = -1;
10923 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010924 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010925 xmlFreeParserCtxt(ctxt);
10926
10927 return ret;
10928}
10929
10930/**
10931 * xmlCreateDocParserCtxt:
10932 * @cur: a pointer to an array of xmlChar
10933 *
10934 * Creates a parser context for an XML in-memory document.
10935 *
10936 * Returns the new parser context or NULL
10937 */
10938xmlParserCtxtPtr
10939xmlCreateDocParserCtxt(xmlChar *cur) {
10940 int len;
10941
10942 if (cur == NULL)
10943 return(NULL);
10944 len = xmlStrlen(cur);
10945 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10946}
10947
10948/**
10949 * xmlSAXParseDoc:
10950 * @sax: the SAX handler block
10951 * @cur: a pointer to an array of xmlChar
10952 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10953 * documents
10954 *
10955 * parse an XML in-memory document and build a tree.
10956 * It use the given SAX function block to handle the parsing callback.
10957 * If sax is NULL, fallback to the default DOM tree building routines.
10958 *
10959 * Returns the resulting document tree
10960 */
10961
10962xmlDocPtr
10963xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10964 xmlDocPtr ret;
10965 xmlParserCtxtPtr ctxt;
10966
10967 if (cur == NULL) return(NULL);
10968
10969
10970 ctxt = xmlCreateDocParserCtxt(cur);
10971 if (ctxt == NULL) return(NULL);
10972 if (sax != NULL) {
10973 ctxt->sax = sax;
10974 ctxt->userData = NULL;
10975 }
10976
10977 xmlParseDocument(ctxt);
10978 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10979 else {
10980 ret = NULL;
10981 xmlFreeDoc(ctxt->myDoc);
10982 ctxt->myDoc = NULL;
10983 }
10984 if (sax != NULL)
10985 ctxt->sax = NULL;
10986 xmlFreeParserCtxt(ctxt);
10987
10988 return(ret);
10989}
10990
10991/**
10992 * xmlParseDoc:
10993 * @cur: a pointer to an array of xmlChar
10994 *
10995 * parse an XML in-memory document and build a tree.
10996 *
10997 * Returns the resulting document tree
10998 */
10999
11000xmlDocPtr
11001xmlParseDoc(xmlChar *cur) {
11002 return(xmlSAXParseDoc(NULL, cur, 0));
11003}
11004
Daniel Veillard8107a222002-01-13 14:10:10 +000011005/************************************************************************
11006 * *
11007 * Specific function to keep track of entities references *
11008 * and used by the XSLT debugger *
11009 * *
11010 ************************************************************************/
11011
11012static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11013
11014/**
11015 * xmlAddEntityReference:
11016 * @ent : A valid entity
11017 * @firstNode : A valid first node for children of entity
11018 * @lastNode : A valid last node of children entity
11019 *
11020 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11021 */
11022static void
11023xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11024 xmlNodePtr lastNode)
11025{
11026 if (xmlEntityRefFunc != NULL) {
11027 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11028 }
11029}
11030
11031
11032/**
11033 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011034 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011035 *
11036 * Set the function to call call back when a xml reference has been made
11037 */
11038void
11039xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11040{
11041 xmlEntityRefFunc = func;
11042}
Owen Taylor3473f882001-02-23 17:55:21 +000011043
11044/************************************************************************
11045 * *
11046 * Miscellaneous *
11047 * *
11048 ************************************************************************/
11049
11050#ifdef LIBXML_XPATH_ENABLED
11051#include <libxml/xpath.h>
11052#endif
11053
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011054extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011055static int xmlParserInitialized = 0;
11056
11057/**
11058 * xmlInitParser:
11059 *
11060 * Initialization function for the XML parser.
11061 * This is not reentrant. Call once before processing in case of
11062 * use in multithreaded programs.
11063 */
11064
11065void
11066xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011067 if (xmlParserInitialized != 0)
11068 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011069
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011070 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11071 (xmlGenericError == NULL))
11072 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000011073 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011074 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011075 xmlInitCharEncodingHandlers();
11076 xmlInitializePredefinedEntities();
11077 xmlDefaultSAXHandlerInit();
11078 xmlRegisterDefaultInputCallbacks();
11079 xmlRegisterDefaultOutputCallbacks();
11080#ifdef LIBXML_HTML_ENABLED
11081 htmlInitAutoClose();
11082 htmlDefaultSAXHandlerInit();
11083#endif
11084#ifdef LIBXML_XPATH_ENABLED
11085 xmlXPathInit();
11086#endif
11087 xmlParserInitialized = 1;
11088}
11089
11090/**
11091 * xmlCleanupParser:
11092 *
11093 * Cleanup function for the XML parser. It tries to reclaim all
11094 * parsing related global memory allocated for the parser processing.
11095 * It doesn't deallocate any document related memory. Calling this
11096 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011097 * One should call xmlCleanupParser() only when the process has
11098 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011099 */
11100
11101void
11102xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000011103 xmlCleanupCharEncodingHandlers();
11104 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011105#ifdef LIBXML_CATALOG_ENABLED
11106 xmlCatalogCleanup();
11107#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011108 xmlCleanupThreads();
11109 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011110}