blob: af569703a2baa75036b0afc58766b0d1ddabaa0e [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Daniel Veillard328f48c2002-11-15 15:24:34 +0000108static int
109xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
110 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000111/************************************************************************
112 * *
113 * Parser stacks related functions and macros *
114 * *
115 ************************************************************************/
116
117xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
118 const xmlChar ** str);
119
120/*
121 * Generic function for accessing stacks in the Parser Context
122 */
123
124#define PUSH_AND_POP(scope, type, name) \
125scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
126 if (ctxt->name##Nr >= ctxt->name##Max) { \
127 ctxt->name##Max *= 2; \
128 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
129 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
130 if (ctxt->name##Tab == NULL) { \
131 xmlGenericError(xmlGenericErrorContext, \
132 "realloc failed !\n"); \
133 return(0); \
134 } \
135 } \
136 ctxt->name##Tab[ctxt->name##Nr] = value; \
137 ctxt->name = value; \
138 return(ctxt->name##Nr++); \
139} \
140scope type name##Pop(xmlParserCtxtPtr ctxt) { \
141 type ret; \
142 if (ctxt->name##Nr <= 0) return(0); \
143 ctxt->name##Nr--; \
144 if (ctxt->name##Nr > 0) \
145 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
146 else \
147 ctxt->name = NULL; \
148 ret = ctxt->name##Tab[ctxt->name##Nr]; \
149 ctxt->name##Tab[ctxt->name##Nr] = 0; \
150 return(ret); \
151} \
152
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000153/**
154 * inputPop:
155 * @ctxt: an XML parser context
156 *
157 * Pops the top parser input from the input stack
158 *
159 * Returns the input just removed
160 */
161/**
162 * inputPush:
163 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000164 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000165 *
166 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000167 *
168 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000169 */
170/**
171 * namePop:
172 * @ctxt: an XML parser context
173 *
174 * Pops the top element name from the name stack
175 *
176 * Returns the name just removed
177 */
178/**
179 * namePush:
180 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000181 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000182 *
183 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000184 *
185 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000186 */
187/**
188 * nodePop:
189 * @ctxt: an XML parser context
190 *
191 * Pops the top element node from the node stack
192 *
193 * Returns the node just removed
194 */
195/**
196 * nodePush:
197 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000198 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000199 *
200 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000201 *
202 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000203 */
Owen Taylor3473f882001-02-23 17:55:21 +0000204/*
205 * Those macros actually generate the functions
206 */
207PUSH_AND_POP(extern, xmlParserInputPtr, input)
208PUSH_AND_POP(extern, xmlNodePtr, node)
209PUSH_AND_POP(extern, xmlChar*, name)
210
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000211static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000212 if (ctxt->spaceNr >= ctxt->spaceMax) {
213 ctxt->spaceMax *= 2;
214 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
215 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
216 if (ctxt->spaceTab == NULL) {
217 xmlGenericError(xmlGenericErrorContext,
218 "realloc failed !\n");
219 return(0);
220 }
221 }
222 ctxt->spaceTab[ctxt->spaceNr] = val;
223 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
224 return(ctxt->spaceNr++);
225}
226
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000227static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000228 int ret;
229 if (ctxt->spaceNr <= 0) return(0);
230 ctxt->spaceNr--;
231 if (ctxt->spaceNr > 0)
232 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
233 else
234 ctxt->space = NULL;
235 ret = ctxt->spaceTab[ctxt->spaceNr];
236 ctxt->spaceTab[ctxt->spaceNr] = -1;
237 return(ret);
238}
239
240/*
241 * Macros for accessing the content. Those should be used only by the parser,
242 * and not exported.
243 *
244 * Dirty macros, i.e. one often need to make assumption on the context to
245 * use them
246 *
247 * CUR_PTR return the current pointer to the xmlChar to be parsed.
248 * To be used with extreme caution since operations consuming
249 * characters may move the input buffer to a different location !
250 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
251 * This should be used internally by the parser
252 * only to compare to ASCII values otherwise it would break when
253 * running with UTF-8 encoding.
254 * RAW same as CUR but in the input buffer, bypass any token
255 * extraction that may have been done
256 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
257 * to compare on ASCII based substring.
258 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
259 * strings within the parser.
260 *
261 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
262 *
263 * NEXT Skip to the next character, this does the proper decoding
264 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000265 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000266 * CUR_CHAR(l) returns the current unicode character (int), set l
267 * to the number of xmlChars used for the encoding [0-5].
268 * CUR_SCHAR same but operate on a string instead of the context
269 * COPY_BUF copy the current unicode char to the target buffer, increment
270 * the index
271 * GROW, SHRINK handling of input buffers
272 */
273
Daniel Veillardfdc91562002-07-01 21:52:03 +0000274#define RAW (*ctxt->input->cur)
275#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000276#define NXT(val) ctxt->input->cur[(val)]
277#define CUR_PTR ctxt->input->cur
278
279#define SKIP(val) do { \
280 ctxt->nbChars += (val),ctxt->input->cur += (val); \
281 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000282 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000283 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
284 xmlPopInput(ctxt); \
285 } while (0)
286
Daniel Veillard46de64e2002-05-29 08:21:33 +0000287#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
288 xmlSHRINK (ctxt);
289
290static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
291 xmlParserInputShrink(ctxt->input);
292 if ((*ctxt->input->cur == 0) &&
293 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
294 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000295 }
Owen Taylor3473f882001-02-23 17:55:21 +0000296
Daniel Veillard46de64e2002-05-29 08:21:33 +0000297#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
298 xmlGROW (ctxt);
299
300static void xmlGROW (xmlParserCtxtPtr ctxt) {
301 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
302 if ((*ctxt->input->cur == 0) &&
303 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
304 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000305 }
Owen Taylor3473f882001-02-23 17:55:21 +0000306
307#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
308
309#define NEXT xmlNextChar(ctxt)
310
Daniel Veillard21a0f912001-02-25 19:54:14 +0000311#define NEXT1 { \
312 ctxt->input->cur++; \
313 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000314 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000315 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
316 }
317
Owen Taylor3473f882001-02-23 17:55:21 +0000318#define NEXTL(l) do { \
319 if (*(ctxt->input->cur) == '\n') { \
320 ctxt->input->line++; ctxt->input->col = 1; \
321 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000322 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000323 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000324 } while (0)
325
326#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
327#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
328
329#define COPY_BUF(l,b,i,v) \
330 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000331 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000332
333/**
334 * xmlSkipBlankChars:
335 * @ctxt: the XML parser context
336 *
337 * skip all blanks character found at that point in the input streams.
338 * It pops up finished entities in the process if allowable at that point.
339 *
340 * Returns the number of space chars skipped
341 */
342
343int
344xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000345 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000346
347 /*
348 * It's Okay to use CUR/NEXT here since all the blanks are on
349 * the ASCII range.
350 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000351 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
352 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000353 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000354 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000355 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000356 cur = ctxt->input->cur;
357 while (IS_BLANK(*cur)) {
358 if (*cur == '\n') {
359 ctxt->input->line++; ctxt->input->col = 1;
360 }
361 cur++;
362 res++;
363 if (*cur == 0) {
364 ctxt->input->cur = cur;
365 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
366 cur = ctxt->input->cur;
367 }
368 }
369 ctxt->input->cur = cur;
370 } else {
371 int cur;
372 do {
373 cur = CUR;
374 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
375 NEXT;
376 cur = CUR;
377 res++;
378 }
379 while ((cur == 0) && (ctxt->inputNr > 1) &&
380 (ctxt->instate != XML_PARSER_COMMENT)) {
381 xmlPopInput(ctxt);
382 cur = CUR;
383 }
384 /*
385 * Need to handle support of entities branching here
386 */
387 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
388 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
389 }
Owen Taylor3473f882001-02-23 17:55:21 +0000390 return(res);
391}
392
393/************************************************************************
394 * *
395 * Commodity functions to handle entities *
396 * *
397 ************************************************************************/
398
399/**
400 * xmlPopInput:
401 * @ctxt: an XML parser context
402 *
403 * xmlPopInput: the current input pointed by ctxt->input came to an end
404 * pop it and return the next char.
405 *
406 * Returns the current xmlChar in the parser context
407 */
408xmlChar
409xmlPopInput(xmlParserCtxtPtr ctxt) {
410 if (ctxt->inputNr == 1) return(0); /* End of main Input */
411 if (xmlParserDebugEntities)
412 xmlGenericError(xmlGenericErrorContext,
413 "Popping input %d\n", ctxt->inputNr);
414 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000415 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000416 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
417 return(xmlPopInput(ctxt));
418 return(CUR);
419}
420
421/**
422 * xmlPushInput:
423 * @ctxt: an XML parser context
424 * @input: an XML parser input fragment (entity, XML fragment ...).
425 *
426 * xmlPushInput: switch to a new input stream which is stacked on top
427 * of the previous one(s).
428 */
429void
430xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
431 if (input == NULL) return;
432
433 if (xmlParserDebugEntities) {
434 if ((ctxt->input != NULL) && (ctxt->input->filename))
435 xmlGenericError(xmlGenericErrorContext,
436 "%s(%d): ", ctxt->input->filename,
437 ctxt->input->line);
438 xmlGenericError(xmlGenericErrorContext,
439 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
440 }
441 inputPush(ctxt, input);
442 GROW;
443}
444
445/**
446 * xmlParseCharRef:
447 * @ctxt: an XML parser context
448 *
449 * parse Reference declarations
450 *
451 * [66] CharRef ::= '&#' [0-9]+ ';' |
452 * '&#x' [0-9a-fA-F]+ ';'
453 *
454 * [ WFC: Legal Character ]
455 * Characters referred to using character references must match the
456 * production for Char.
457 *
458 * Returns the value parsed (as an int), 0 in case of error
459 */
460int
461xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000462 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000463 int count = 0;
464
Owen Taylor3473f882001-02-23 17:55:21 +0000465 /*
466 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
467 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000468 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000469 (NXT(2) == 'x')) {
470 SKIP(3);
471 GROW;
472 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000473 if (count++ > 20) {
474 count = 0;
475 GROW;
476 }
477 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000478 val = val * 16 + (CUR - '0');
479 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
480 val = val * 16 + (CUR - 'a') + 10;
481 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
482 val = val * 16 + (CUR - 'A') + 10;
483 else {
484 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
486 ctxt->sax->error(ctxt->userData,
487 "xmlParseCharRef: invalid hexadecimal value\n");
488 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000489 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000490 val = 0;
491 break;
492 }
493 NEXT;
494 count++;
495 }
496 if (RAW == ';') {
497 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
498 ctxt->nbChars ++;
499 ctxt->input->cur++;
500 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000501 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000502 SKIP(2);
503 GROW;
504 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000505 if (count++ > 20) {
506 count = 0;
507 GROW;
508 }
509 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000510 val = val * 10 + (CUR - '0');
511 else {
512 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
514 ctxt->sax->error(ctxt->userData,
515 "xmlParseCharRef: invalid decimal value\n");
516 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000517 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000518 val = 0;
519 break;
520 }
521 NEXT;
522 count++;
523 }
524 if (RAW == ';') {
525 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
526 ctxt->nbChars ++;
527 ctxt->input->cur++;
528 }
529 } else {
530 ctxt->errNo = XML_ERR_INVALID_CHARREF;
531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
532 ctxt->sax->error(ctxt->userData,
533 "xmlParseCharRef: invalid value\n");
534 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000535 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000536 }
537
538 /*
539 * [ WFC: Legal Character ]
540 * Characters referred to using character references must match the
541 * production for Char.
542 */
543 if (IS_CHAR(val)) {
544 return(val);
545 } else {
546 ctxt->errNo = XML_ERR_INVALID_CHAR;
547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000548 ctxt->sax->error(ctxt->userData,
549 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000550 val);
551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000553 }
554 return(0);
555}
556
557/**
558 * xmlParseStringCharRef:
559 * @ctxt: an XML parser context
560 * @str: a pointer to an index in the string
561 *
562 * parse Reference declarations, variant parsing from a string rather
563 * than an an input flow.
564 *
565 * [66] CharRef ::= '&#' [0-9]+ ';' |
566 * '&#x' [0-9a-fA-F]+ ';'
567 *
568 * [ WFC: Legal Character ]
569 * Characters referred to using character references must match the
570 * production for Char.
571 *
572 * Returns the value parsed (as an int), 0 in case of error, str will be
573 * updated to the current value of the index
574 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000575static int
Owen Taylor3473f882001-02-23 17:55:21 +0000576xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
577 const xmlChar *ptr;
578 xmlChar cur;
579 int val = 0;
580
581 if ((str == NULL) || (*str == NULL)) return(0);
582 ptr = *str;
583 cur = *ptr;
584 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
585 ptr += 3;
586 cur = *ptr;
587 while (cur != ';') { /* Non input consuming loop */
588 if ((cur >= '0') && (cur <= '9'))
589 val = val * 16 + (cur - '0');
590 else if ((cur >= 'a') && (cur <= 'f'))
591 val = val * 16 + (cur - 'a') + 10;
592 else if ((cur >= 'A') && (cur <= 'F'))
593 val = val * 16 + (cur - 'A') + 10;
594 else {
595 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
597 ctxt->sax->error(ctxt->userData,
598 "xmlParseStringCharRef: invalid hexadecimal value\n");
599 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000600 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000601 val = 0;
602 break;
603 }
604 ptr++;
605 cur = *ptr;
606 }
607 if (cur == ';')
608 ptr++;
609 } else if ((cur == '&') && (ptr[1] == '#')){
610 ptr += 2;
611 cur = *ptr;
612 while (cur != ';') { /* Non input consuming loops */
613 if ((cur >= '0') && (cur <= '9'))
614 val = val * 10 + (cur - '0');
615 else {
616 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
618 ctxt->sax->error(ctxt->userData,
619 "xmlParseStringCharRef: invalid decimal value\n");
620 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000621 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000622 val = 0;
623 break;
624 }
625 ptr++;
626 cur = *ptr;
627 }
628 if (cur == ';')
629 ptr++;
630 } else {
631 ctxt->errNo = XML_ERR_INVALID_CHARREF;
632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
633 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000634 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000635 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000636 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000637 return(0);
638 }
639 *str = ptr;
640
641 /*
642 * [ WFC: Legal Character ]
643 * Characters referred to using character references must match the
644 * production for Char.
645 */
646 if (IS_CHAR(val)) {
647 return(val);
648 } else {
649 ctxt->errNo = XML_ERR_INVALID_CHAR;
650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
651 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000652 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000653 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000654 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000655 }
656 return(0);
657}
658
659/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000660 * xmlNewBlanksWrapperInputStream:
661 * @ctxt: an XML parser context
662 * @entity: an Entity pointer
663 *
664 * Create a new input stream for wrapping
665 * blanks around a PEReference
666 *
667 * Returns the new input stream or NULL
668 */
669
670static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
671
Daniel Veillardf4862f02002-09-10 11:13:43 +0000672static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000673xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
674 xmlParserInputPtr input;
675 xmlChar *buffer;
676 size_t length;
677 if (entity == NULL) {
678 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
680 ctxt->sax->error(ctxt->userData,
681 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
682 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
683 return(NULL);
684 }
685 if (xmlParserDebugEntities)
686 xmlGenericError(xmlGenericErrorContext,
687 "new blanks wrapper for entity: %s\n", entity->name);
688 input = xmlNewInputStream(ctxt);
689 if (input == NULL) {
690 return(NULL);
691 }
692 length = xmlStrlen(entity->name) + 5;
693 buffer = xmlMalloc(length);
694 if (buffer == NULL) {
695 return(NULL);
696 }
697 buffer [0] = ' ';
698 buffer [1] = '%';
699 buffer [length-3] = ';';
700 buffer [length-2] = ' ';
701 buffer [length-1] = 0;
702 memcpy(buffer + 2, entity->name, length - 5);
703 input->free = deallocblankswrapper;
704 input->base = buffer;
705 input->cur = buffer;
706 input->length = length;
707 input->end = &buffer[length];
708 return(input);
709}
710
711/**
Owen Taylor3473f882001-02-23 17:55:21 +0000712 * xmlParserHandlePEReference:
713 * @ctxt: the parser context
714 *
715 * [69] PEReference ::= '%' Name ';'
716 *
717 * [ WFC: No Recursion ]
718 * A parsed entity must not contain a recursive
719 * reference to itself, either directly or indirectly.
720 *
721 * [ WFC: Entity Declared ]
722 * In a document without any DTD, a document with only an internal DTD
723 * subset which contains no parameter entity references, or a document
724 * with "standalone='yes'", ... ... The declaration of a parameter
725 * entity must precede any reference to it...
726 *
727 * [ VC: Entity Declared ]
728 * In a document with an external subset or external parameter entities
729 * with "standalone='no'", ... ... The declaration of a parameter entity
730 * must precede any reference to it...
731 *
732 * [ WFC: In DTD ]
733 * Parameter-entity references may only appear in the DTD.
734 * NOTE: misleading but this is handled.
735 *
736 * A PEReference may have been detected in the current input stream
737 * the handling is done accordingly to
738 * http://www.w3.org/TR/REC-xml#entproc
739 * i.e.
740 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000741 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000742 */
743void
744xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
745 xmlChar *name;
746 xmlEntityPtr entity = NULL;
747 xmlParserInputPtr input;
748
Owen Taylor3473f882001-02-23 17:55:21 +0000749 if (RAW != '%') return;
750 switch(ctxt->instate) {
751 case XML_PARSER_CDATA_SECTION:
752 return;
753 case XML_PARSER_COMMENT:
754 return;
755 case XML_PARSER_START_TAG:
756 return;
757 case XML_PARSER_END_TAG:
758 return;
759 case XML_PARSER_EOF:
760 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
762 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
763 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000764 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000765 return;
766 case XML_PARSER_PROLOG:
767 case XML_PARSER_START:
768 case XML_PARSER_MISC:
769 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
771 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
772 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000773 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000774 return;
775 case XML_PARSER_ENTITY_DECL:
776 case XML_PARSER_CONTENT:
777 case XML_PARSER_ATTRIBUTE_VALUE:
778 case XML_PARSER_PI:
779 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000780 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000781 /* we just ignore it there */
782 return;
783 case XML_PARSER_EPILOG:
784 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
786 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
787 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000788 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000789 return;
790 case XML_PARSER_ENTITY_VALUE:
791 /*
792 * NOTE: in the case of entity values, we don't do the
793 * substitution here since we need the literal
794 * entity value to be able to save the internal
795 * subset of the document.
796 * This will be handled by xmlStringDecodeEntities
797 */
798 return;
799 case XML_PARSER_DTD:
800 /*
801 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
802 * In the internal DTD subset, parameter-entity references
803 * can occur only where markup declarations can occur, not
804 * within markup declarations.
805 * In that case this is handled in xmlParseMarkupDecl
806 */
807 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
808 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000809 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
810 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000811 break;
812 case XML_PARSER_IGNORE:
813 return;
814 }
815
816 NEXT;
817 name = xmlParseName(ctxt);
818 if (xmlParserDebugEntities)
819 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000820 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000821 if (name == NULL) {
822 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000824 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000825 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000826 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000827 } else {
828 if (RAW == ';') {
829 NEXT;
830 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
831 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
832 if (entity == NULL) {
833
834 /*
835 * [ WFC: Entity Declared ]
836 * In a document without any DTD, a document with only an
837 * internal DTD subset which contains no parameter entity
838 * references, or a document with "standalone='yes'", ...
839 * ... The declaration of a parameter entity must precede
840 * any reference to it...
841 */
842 if ((ctxt->standalone == 1) ||
843 ((ctxt->hasExternalSubset == 0) &&
844 (ctxt->hasPErefs == 0))) {
845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
846 ctxt->sax->error(ctxt->userData,
847 "PEReference: %%%s; not found\n", name);
848 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000849 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000850 } else {
851 /*
852 * [ VC: Entity Declared ]
853 * In a document with an external subset or external
854 * parameter entities with "standalone='no'", ...
855 * ... The declaration of a parameter entity must precede
856 * any reference to it...
857 */
858 if ((!ctxt->disableSAX) &&
859 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
860 ctxt->vctxt.error(ctxt->vctxt.userData,
861 "PEReference: %%%s; not found\n", name);
862 } else if ((!ctxt->disableSAX) &&
863 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
864 ctxt->sax->warning(ctxt->userData,
865 "PEReference: %%%s; not found\n", name);
866 ctxt->valid = 0;
867 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000868 } else if (ctxt->input->free != deallocblankswrapper) {
869 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
870 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000871 } else {
872 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
873 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000874 xmlChar start[4];
875 xmlCharEncoding enc;
876
Owen Taylor3473f882001-02-23 17:55:21 +0000877 /*
878 * handle the extra spaces added before and after
879 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000880 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000881 */
882 input = xmlNewEntityInputStream(ctxt, entity);
883 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000884
885 /*
886 * Get the 4 first bytes and decode the charset
887 * if enc != XML_CHAR_ENCODING_NONE
888 * plug some encoding conversion routines.
889 */
890 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000891 if (entity->length >= 4) {
892 start[0] = RAW;
893 start[1] = NXT(1);
894 start[2] = NXT(2);
895 start[3] = NXT(3);
896 enc = xmlDetectCharEncoding(start, 4);
897 if (enc != XML_CHAR_ENCODING_NONE) {
898 xmlSwitchEncoding(ctxt, enc);
899 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000900 }
901
Owen Taylor3473f882001-02-23 17:55:21 +0000902 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
903 (RAW == '<') && (NXT(1) == '?') &&
904 (NXT(2) == 'x') && (NXT(3) == 'm') &&
905 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
906 xmlParseTextDecl(ctxt);
907 }
Owen Taylor3473f882001-02-23 17:55:21 +0000908 } else {
909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
910 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000911 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000912 name);
913 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000914 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000915 }
916 }
917 } else {
918 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
920 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000921 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000922 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000923 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000924 }
925 xmlFree(name);
926 }
927}
928
929/*
930 * Macro used to grow the current buffer.
931 */
932#define growBuffer(buffer) { \
933 buffer##_size *= 2; \
934 buffer = (xmlChar *) \
935 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
936 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000937 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +0000938 return(NULL); \
939 } \
940}
941
942/**
943 * xmlStringDecodeEntities:
944 * @ctxt: the parser context
945 * @str: the input string
946 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
947 * @end: an end marker xmlChar, 0 if none
948 * @end2: an end marker xmlChar, 0 if none
949 * @end3: an end marker xmlChar, 0 if none
950 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000951 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000952 *
953 * [67] Reference ::= EntityRef | CharRef
954 *
955 * [69] PEReference ::= '%' Name ';'
956 *
957 * Returns A newly allocated string with the substitution done. The caller
958 * must deallocate it !
959 */
960xmlChar *
961xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
962 xmlChar end, xmlChar end2, xmlChar end3) {
963 xmlChar *buffer = NULL;
964 int buffer_size = 0;
965
966 xmlChar *current = NULL;
967 xmlEntityPtr ent;
968 int c,l;
969 int nbchars = 0;
970
971 if (str == NULL)
972 return(NULL);
973
974 if (ctxt->depth > 40) {
975 ctxt->errNo = XML_ERR_ENTITY_LOOP;
976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
977 ctxt->sax->error(ctxt->userData,
978 "Detected entity reference loop\n");
979 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000980 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000981 return(NULL);
982 }
983
984 /*
985 * allocate a translation buffer.
986 */
987 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
988 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
989 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000990 xmlGenericError(xmlGenericErrorContext,
991 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000992 return(NULL);
993 }
994
995 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000996 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000997 * we are operating on already parsed values.
998 */
999 c = CUR_SCHAR(str, l);
1000 while ((c != 0) && (c != end) && /* non input consuming loop */
1001 (c != end2) && (c != end3)) {
1002
1003 if (c == 0) break;
1004 if ((c == '&') && (str[1] == '#')) {
1005 int val = xmlParseStringCharRef(ctxt, &str);
1006 if (val != 0) {
1007 COPY_BUF(0,buffer,nbchars,val);
1008 }
1009 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1010 if (xmlParserDebugEntities)
1011 xmlGenericError(xmlGenericErrorContext,
1012 "String decoding Entity Reference: %.30s\n",
1013 str);
1014 ent = xmlParseStringEntityRef(ctxt, &str);
1015 if ((ent != NULL) &&
1016 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1017 if (ent->content != NULL) {
1018 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1019 } else {
1020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1021 ctxt->sax->error(ctxt->userData,
1022 "internal error entity has no content\n");
1023 }
1024 } else if ((ent != NULL) && (ent->content != NULL)) {
1025 xmlChar *rep;
1026
1027 ctxt->depth++;
1028 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1029 0, 0, 0);
1030 ctxt->depth--;
1031 if (rep != NULL) {
1032 current = rep;
1033 while (*current != 0) { /* non input consuming loop */
1034 buffer[nbchars++] = *current++;
1035 if (nbchars >
1036 buffer_size - XML_PARSER_BUFFER_SIZE) {
1037 growBuffer(buffer);
1038 }
1039 }
1040 xmlFree(rep);
1041 }
1042 } else if (ent != NULL) {
1043 int i = xmlStrlen(ent->name);
1044 const xmlChar *cur = ent->name;
1045
1046 buffer[nbchars++] = '&';
1047 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1048 growBuffer(buffer);
1049 }
1050 for (;i > 0;i--)
1051 buffer[nbchars++] = *cur++;
1052 buffer[nbchars++] = ';';
1053 }
1054 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1055 if (xmlParserDebugEntities)
1056 xmlGenericError(xmlGenericErrorContext,
1057 "String decoding PE Reference: %.30s\n", str);
1058 ent = xmlParseStringPEReference(ctxt, &str);
1059 if (ent != NULL) {
1060 xmlChar *rep;
1061
1062 ctxt->depth++;
1063 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1064 0, 0, 0);
1065 ctxt->depth--;
1066 if (rep != NULL) {
1067 current = rep;
1068 while (*current != 0) { /* non input consuming loop */
1069 buffer[nbchars++] = *current++;
1070 if (nbchars >
1071 buffer_size - XML_PARSER_BUFFER_SIZE) {
1072 growBuffer(buffer);
1073 }
1074 }
1075 xmlFree(rep);
1076 }
1077 }
1078 } else {
1079 COPY_BUF(l,buffer,nbchars,c);
1080 str += l;
1081 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1082 growBuffer(buffer);
1083 }
1084 }
1085 c = CUR_SCHAR(str, l);
1086 }
1087 buffer[nbchars++] = 0;
1088 return(buffer);
1089}
1090
1091
1092/************************************************************************
1093 * *
1094 * Commodity functions to handle xmlChars *
1095 * *
1096 ************************************************************************/
1097
1098/**
1099 * xmlStrndup:
1100 * @cur: the input xmlChar *
1101 * @len: the len of @cur
1102 *
1103 * a strndup for array of xmlChar's
1104 *
1105 * Returns a new xmlChar * or NULL
1106 */
1107xmlChar *
1108xmlStrndup(const xmlChar *cur, int len) {
1109 xmlChar *ret;
1110
1111 if ((cur == NULL) || (len < 0)) return(NULL);
1112 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1113 if (ret == NULL) {
1114 xmlGenericError(xmlGenericErrorContext,
1115 "malloc of %ld byte failed\n",
1116 (len + 1) * (long)sizeof(xmlChar));
1117 return(NULL);
1118 }
1119 memcpy(ret, cur, len * sizeof(xmlChar));
1120 ret[len] = 0;
1121 return(ret);
1122}
1123
1124/**
1125 * xmlStrdup:
1126 * @cur: the input xmlChar *
1127 *
1128 * a strdup for array of xmlChar's. Since they are supposed to be
1129 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1130 * a termination mark of '0'.
1131 *
1132 * Returns a new xmlChar * or NULL
1133 */
1134xmlChar *
1135xmlStrdup(const xmlChar *cur) {
1136 const xmlChar *p = cur;
1137
1138 if (cur == NULL) return(NULL);
1139 while (*p != 0) p++; /* non input consuming */
1140 return(xmlStrndup(cur, p - cur));
1141}
1142
1143/**
1144 * xmlCharStrndup:
1145 * @cur: the input char *
1146 * @len: the len of @cur
1147 *
1148 * a strndup for char's to xmlChar's
1149 *
1150 * Returns a new xmlChar * or NULL
1151 */
1152
1153xmlChar *
1154xmlCharStrndup(const char *cur, int len) {
1155 int i;
1156 xmlChar *ret;
1157
1158 if ((cur == NULL) || (len < 0)) return(NULL);
1159 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1160 if (ret == NULL) {
1161 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1162 (len + 1) * (long)sizeof(xmlChar));
1163 return(NULL);
1164 }
1165 for (i = 0;i < len;i++)
1166 ret[i] = (xmlChar) cur[i];
1167 ret[len] = 0;
1168 return(ret);
1169}
1170
1171/**
1172 * xmlCharStrdup:
1173 * @cur: the input char *
1174 * @len: the len of @cur
1175 *
1176 * a strdup for char's to xmlChar's
1177 *
1178 * Returns a new xmlChar * or NULL
1179 */
1180
1181xmlChar *
1182xmlCharStrdup(const char *cur) {
1183 const char *p = cur;
1184
1185 if (cur == NULL) return(NULL);
1186 while (*p != '\0') p++; /* non input consuming */
1187 return(xmlCharStrndup(cur, p - cur));
1188}
1189
1190/**
1191 * xmlStrcmp:
1192 * @str1: the first xmlChar *
1193 * @str2: the second xmlChar *
1194 *
1195 * a strcmp for xmlChar's
1196 *
1197 * Returns the integer result of the comparison
1198 */
1199
1200int
1201xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1202 register int tmp;
1203
1204 if (str1 == str2) return(0);
1205 if (str1 == NULL) return(-1);
1206 if (str2 == NULL) return(1);
1207 do {
1208 tmp = *str1++ - *str2;
1209 if (tmp != 0) return(tmp);
1210 } while (*str2++ != 0);
1211 return 0;
1212}
1213
1214/**
1215 * xmlStrEqual:
1216 * @str1: the first xmlChar *
1217 * @str2: the second xmlChar *
1218 *
1219 * Check if both string are equal of have same content
1220 * Should be a bit more readable and faster than xmlStrEqual()
1221 *
1222 * Returns 1 if they are equal, 0 if they are different
1223 */
1224
1225int
1226xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1227 if (str1 == str2) return(1);
1228 if (str1 == NULL) return(0);
1229 if (str2 == NULL) return(0);
1230 do {
1231 if (*str1++ != *str2) return(0);
1232 } while (*str2++);
1233 return(1);
1234}
1235
1236/**
1237 * xmlStrncmp:
1238 * @str1: the first xmlChar *
1239 * @str2: the second xmlChar *
1240 * @len: the max comparison length
1241 *
1242 * a strncmp for xmlChar's
1243 *
1244 * Returns the integer result of the comparison
1245 */
1246
1247int
1248xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1249 register int tmp;
1250
1251 if (len <= 0) return(0);
1252 if (str1 == str2) return(0);
1253 if (str1 == NULL) return(-1);
1254 if (str2 == NULL) return(1);
1255 do {
1256 tmp = *str1++ - *str2;
1257 if (tmp != 0 || --len == 0) return(tmp);
1258 } while (*str2++ != 0);
1259 return 0;
1260}
1261
Daniel Veillardb44025c2001-10-11 22:55:55 +00001262static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001263 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1264 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1265 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1266 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1267 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1268 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1269 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1270 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1271 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1272 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1273 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1274 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1275 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1276 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1277 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1278 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1279 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1280 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1281 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1282 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1283 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1284 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1285 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1286 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1287 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1288 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1289 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1290 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1291 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1292 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1293 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1294 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1295};
1296
1297/**
1298 * xmlStrcasecmp:
1299 * @str1: the first xmlChar *
1300 * @str2: the second xmlChar *
1301 *
1302 * a strcasecmp for xmlChar's
1303 *
1304 * Returns the integer result of the comparison
1305 */
1306
1307int
1308xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1309 register int tmp;
1310
1311 if (str1 == str2) return(0);
1312 if (str1 == NULL) return(-1);
1313 if (str2 == NULL) return(1);
1314 do {
1315 tmp = casemap[*str1++] - casemap[*str2];
1316 if (tmp != 0) return(tmp);
1317 } while (*str2++ != 0);
1318 return 0;
1319}
1320
1321/**
1322 * xmlStrncasecmp:
1323 * @str1: the first xmlChar *
1324 * @str2: the second xmlChar *
1325 * @len: the max comparison length
1326 *
1327 * a strncasecmp for xmlChar's
1328 *
1329 * Returns the integer result of the comparison
1330 */
1331
1332int
1333xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1334 register int tmp;
1335
1336 if (len <= 0) return(0);
1337 if (str1 == str2) return(0);
1338 if (str1 == NULL) return(-1);
1339 if (str2 == NULL) return(1);
1340 do {
1341 tmp = casemap[*str1++] - casemap[*str2];
1342 if (tmp != 0 || --len == 0) return(tmp);
1343 } while (*str2++ != 0);
1344 return 0;
1345}
1346
1347/**
1348 * xmlStrchr:
1349 * @str: the xmlChar * array
1350 * @val: the xmlChar to search
1351 *
1352 * a strchr for xmlChar's
1353 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001354 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001355 */
1356
1357const xmlChar *
1358xmlStrchr(const xmlChar *str, xmlChar val) {
1359 if (str == NULL) return(NULL);
1360 while (*str != 0) { /* non input consuming */
1361 if (*str == val) return((xmlChar *) str);
1362 str++;
1363 }
1364 return(NULL);
1365}
1366
1367/**
1368 * xmlStrstr:
1369 * @str: the xmlChar * array (haystack)
1370 * @val: the xmlChar to search (needle)
1371 *
1372 * a strstr for xmlChar's
1373 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001374 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001375 */
1376
1377const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001378xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001379 int n;
1380
1381 if (str == NULL) return(NULL);
1382 if (val == NULL) return(NULL);
1383 n = xmlStrlen(val);
1384
1385 if (n == 0) return(str);
1386 while (*str != 0) { /* non input consuming */
1387 if (*str == *val) {
1388 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1389 }
1390 str++;
1391 }
1392 return(NULL);
1393}
1394
1395/**
1396 * xmlStrcasestr:
1397 * @str: the xmlChar * array (haystack)
1398 * @val: the xmlChar to search (needle)
1399 *
1400 * a case-ignoring strstr for xmlChar's
1401 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001402 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001403 */
1404
1405const xmlChar *
1406xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1407 int n;
1408
1409 if (str == NULL) return(NULL);
1410 if (val == NULL) return(NULL);
1411 n = xmlStrlen(val);
1412
1413 if (n == 0) return(str);
1414 while (*str != 0) { /* non input consuming */
1415 if (casemap[*str] == casemap[*val])
1416 if (!xmlStrncasecmp(str, val, n)) return(str);
1417 str++;
1418 }
1419 return(NULL);
1420}
1421
1422/**
1423 * xmlStrsub:
1424 * @str: the xmlChar * array (haystack)
1425 * @start: the index of the first char (zero based)
1426 * @len: the length of the substring
1427 *
1428 * Extract a substring of a given string
1429 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001430 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001431 */
1432
1433xmlChar *
1434xmlStrsub(const xmlChar *str, int start, int len) {
1435 int i;
1436
1437 if (str == NULL) return(NULL);
1438 if (start < 0) return(NULL);
1439 if (len < 0) return(NULL);
1440
1441 for (i = 0;i < start;i++) {
1442 if (*str == 0) return(NULL);
1443 str++;
1444 }
1445 if (*str == 0) return(NULL);
1446 return(xmlStrndup(str, len));
1447}
1448
1449/**
1450 * xmlStrlen:
1451 * @str: the xmlChar * array
1452 *
1453 * length of a xmlChar's string
1454 *
1455 * Returns the number of xmlChar contained in the ARRAY.
1456 */
1457
1458int
1459xmlStrlen(const xmlChar *str) {
1460 int len = 0;
1461
1462 if (str == NULL) return(0);
1463 while (*str != 0) { /* non input consuming */
1464 str++;
1465 len++;
1466 }
1467 return(len);
1468}
1469
1470/**
1471 * xmlStrncat:
1472 * @cur: the original xmlChar * array
1473 * @add: the xmlChar * array added
1474 * @len: the length of @add
1475 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001476 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001477 * first bytes of @add.
1478 *
1479 * Returns a new xmlChar *, the original @cur is reallocated if needed
1480 * and should not be freed
1481 */
1482
1483xmlChar *
1484xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1485 int size;
1486 xmlChar *ret;
1487
1488 if ((add == NULL) || (len == 0))
1489 return(cur);
1490 if (cur == NULL)
1491 return(xmlStrndup(add, len));
1492
1493 size = xmlStrlen(cur);
1494 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1495 if (ret == NULL) {
1496 xmlGenericError(xmlGenericErrorContext,
1497 "xmlStrncat: realloc of %ld byte failed\n",
1498 (size + len + 1) * (long)sizeof(xmlChar));
1499 return(cur);
1500 }
1501 memcpy(&ret[size], add, len * sizeof(xmlChar));
1502 ret[size + len] = 0;
1503 return(ret);
1504}
1505
1506/**
1507 * xmlStrcat:
1508 * @cur: the original xmlChar * array
1509 * @add: the xmlChar * array added
1510 *
1511 * a strcat for array of xmlChar's. Since they are supposed to be
1512 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1513 * a termination mark of '0'.
1514 *
1515 * Returns a new xmlChar * containing the concatenated string.
1516 */
1517xmlChar *
1518xmlStrcat(xmlChar *cur, const xmlChar *add) {
1519 const xmlChar *p = add;
1520
1521 if (add == NULL) return(cur);
1522 if (cur == NULL)
1523 return(xmlStrdup(add));
1524
1525 while (*p != 0) p++; /* non input consuming */
1526 return(xmlStrncat(cur, add, p - add));
1527}
1528
1529/************************************************************************
1530 * *
1531 * Commodity functions, cleanup needed ? *
1532 * *
1533 ************************************************************************/
1534
1535/**
1536 * areBlanks:
1537 * @ctxt: an XML parser context
1538 * @str: a xmlChar *
1539 * @len: the size of @str
1540 *
1541 * Is this a sequence of blank chars that one can ignore ?
1542 *
1543 * Returns 1 if ignorable 0 otherwise.
1544 */
1545
1546static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1547 int i, ret;
1548 xmlNodePtr lastChild;
1549
Daniel Veillard05c13a22001-09-09 08:38:09 +00001550 /*
1551 * Don't spend time trying to differentiate them, the same callback is
1552 * used !
1553 */
1554 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001555 return(0);
1556
Owen Taylor3473f882001-02-23 17:55:21 +00001557 /*
1558 * Check for xml:space value.
1559 */
1560 if (*(ctxt->space) == 1)
1561 return(0);
1562
1563 /*
1564 * Check that the string is made of blanks
1565 */
1566 for (i = 0;i < len;i++)
1567 if (!(IS_BLANK(str[i]))) return(0);
1568
1569 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001570 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001571 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001572 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001573 if (ctxt->myDoc != NULL) {
1574 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1575 if (ret == 0) return(1);
1576 if (ret == 1) return(0);
1577 }
1578
1579 /*
1580 * Otherwise, heuristic :-\
1581 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001582 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001583 if ((ctxt->node->children == NULL) &&
1584 (RAW == '<') && (NXT(1) == '/')) return(0);
1585
1586 lastChild = xmlGetLastChild(ctxt->node);
1587 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001588 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1589 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001590 } else if (xmlNodeIsText(lastChild))
1591 return(0);
1592 else if ((ctxt->node->children != NULL) &&
1593 (xmlNodeIsText(ctxt->node->children)))
1594 return(0);
1595 return(1);
1596}
1597
Owen Taylor3473f882001-02-23 17:55:21 +00001598/************************************************************************
1599 * *
1600 * Extra stuff for namespace support *
1601 * Relates to http://www.w3.org/TR/WD-xml-names *
1602 * *
1603 ************************************************************************/
1604
1605/**
1606 * xmlSplitQName:
1607 * @ctxt: an XML parser context
1608 * @name: an XML parser context
1609 * @prefix: a xmlChar **
1610 *
1611 * parse an UTF8 encoded XML qualified name string
1612 *
1613 * [NS 5] QName ::= (Prefix ':')? LocalPart
1614 *
1615 * [NS 6] Prefix ::= NCName
1616 *
1617 * [NS 7] LocalPart ::= NCName
1618 *
1619 * Returns the local part, and prefix is updated
1620 * to get the Prefix if any.
1621 */
1622
1623xmlChar *
1624xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1625 xmlChar buf[XML_MAX_NAMELEN + 5];
1626 xmlChar *buffer = NULL;
1627 int len = 0;
1628 int max = XML_MAX_NAMELEN;
1629 xmlChar *ret = NULL;
1630 const xmlChar *cur = name;
1631 int c;
1632
1633 *prefix = NULL;
1634
1635#ifndef XML_XML_NAMESPACE
1636 /* xml: prefix is not really a namespace */
1637 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1638 (cur[2] == 'l') && (cur[3] == ':'))
1639 return(xmlStrdup(name));
1640#endif
1641
1642 /* nasty but valid */
1643 if (cur[0] == ':')
1644 return(xmlStrdup(name));
1645
1646 c = *cur++;
1647 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1648 buf[len++] = c;
1649 c = *cur++;
1650 }
1651 if (len >= max) {
1652 /*
1653 * Okay someone managed to make a huge name, so he's ready to pay
1654 * for the processing speed.
1655 */
1656 max = len * 2;
1657
1658 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1659 if (buffer == NULL) {
1660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1661 ctxt->sax->error(ctxt->userData,
1662 "xmlSplitQName: out of memory\n");
1663 return(NULL);
1664 }
1665 memcpy(buffer, buf, len);
1666 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1667 if (len + 10 > max) {
1668 max *= 2;
1669 buffer = (xmlChar *) xmlRealloc(buffer,
1670 max * sizeof(xmlChar));
1671 if (buffer == NULL) {
1672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1673 ctxt->sax->error(ctxt->userData,
1674 "xmlSplitQName: out of memory\n");
1675 return(NULL);
1676 }
1677 }
1678 buffer[len++] = c;
1679 c = *cur++;
1680 }
1681 buffer[len] = 0;
1682 }
1683
1684 if (buffer == NULL)
1685 ret = xmlStrndup(buf, len);
1686 else {
1687 ret = buffer;
1688 buffer = NULL;
1689 max = XML_MAX_NAMELEN;
1690 }
1691
1692
1693 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001694 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001695 if (c == 0) return(ret);
1696 *prefix = ret;
1697 len = 0;
1698
Daniel Veillardbb284f42002-10-16 18:02:47 +00001699 /*
1700 * Check that the first character is proper to start
1701 * a new name
1702 */
1703 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1704 ((c >= 0x41) && (c <= 0x5A)) ||
1705 (c == '_') || (c == ':'))) {
1706 int l;
1707 int first = CUR_SCHAR(cur, l);
1708
1709 if (!IS_LETTER(first) && (first != '_')) {
1710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1711 ctxt->sax->error(ctxt->userData,
1712 "Name %s is not XML Namespace compliant\n",
1713 name);
1714 }
1715 }
1716 cur++;
1717
Owen Taylor3473f882001-02-23 17:55:21 +00001718 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1719 buf[len++] = c;
1720 c = *cur++;
1721 }
1722 if (len >= max) {
1723 /*
1724 * Okay someone managed to make a huge name, so he's ready to pay
1725 * for the processing speed.
1726 */
1727 max = len * 2;
1728
1729 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1730 if (buffer == NULL) {
1731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1732 ctxt->sax->error(ctxt->userData,
1733 "xmlSplitQName: out of memory\n");
1734 return(NULL);
1735 }
1736 memcpy(buffer, buf, len);
1737 while (c != 0) { /* tested bigname2.xml */
1738 if (len + 10 > max) {
1739 max *= 2;
1740 buffer = (xmlChar *) xmlRealloc(buffer,
1741 max * sizeof(xmlChar));
1742 if (buffer == NULL) {
1743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1744 ctxt->sax->error(ctxt->userData,
1745 "xmlSplitQName: out of memory\n");
1746 return(NULL);
1747 }
1748 }
1749 buffer[len++] = c;
1750 c = *cur++;
1751 }
1752 buffer[len] = 0;
1753 }
1754
1755 if (buffer == NULL)
1756 ret = xmlStrndup(buf, len);
1757 else {
1758 ret = buffer;
1759 }
1760 }
1761
1762 return(ret);
1763}
1764
1765/************************************************************************
1766 * *
1767 * The parser itself *
1768 * Relates to http://www.w3.org/TR/REC-xml *
1769 * *
1770 ************************************************************************/
1771
Daniel Veillard76d66f42001-05-16 21:05:17 +00001772static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001773/**
1774 * xmlParseName:
1775 * @ctxt: an XML parser context
1776 *
1777 * parse an XML name.
1778 *
1779 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1780 * CombiningChar | Extender
1781 *
1782 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1783 *
1784 * [6] Names ::= Name (S Name)*
1785 *
1786 * Returns the Name parsed or NULL
1787 */
1788
1789xmlChar *
1790xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001791 const xmlChar *in;
1792 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001793 int count = 0;
1794
1795 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001796
1797 /*
1798 * Accelerator for simple ASCII names
1799 */
1800 in = ctxt->input->cur;
1801 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1802 ((*in >= 0x41) && (*in <= 0x5A)) ||
1803 (*in == '_') || (*in == ':')) {
1804 in++;
1805 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1806 ((*in >= 0x41) && (*in <= 0x5A)) ||
1807 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001808 (*in == '_') || (*in == '-') ||
1809 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001810 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001811 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001812 count = in - ctxt->input->cur;
1813 ret = xmlStrndup(ctxt->input->cur, count);
1814 ctxt->input->cur = in;
1815 return(ret);
1816 }
1817 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001818 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001819}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001820
Daniel Veillard46de64e2002-05-29 08:21:33 +00001821/**
1822 * xmlParseNameAndCompare:
1823 * @ctxt: an XML parser context
1824 *
1825 * parse an XML name and compares for match
1826 * (specialized for endtag parsing)
1827 *
1828 *
1829 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1830 * and the name for mismatch
1831 */
1832
Daniel Veillardf4862f02002-09-10 11:13:43 +00001833static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001834xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1835 const xmlChar *cmp = other;
1836 const xmlChar *in;
1837 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001838
1839 GROW;
1840
1841 in = ctxt->input->cur;
1842 while (*in != 0 && *in == *cmp) {
1843 ++in;
1844 ++cmp;
1845 }
1846 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1847 /* success */
1848 ctxt->input->cur = in;
1849 return (xmlChar*) 1;
1850 }
1851 /* failure (or end of input buffer), check with full function */
1852 ret = xmlParseName (ctxt);
1853 if (ret != 0 && xmlStrEqual (ret, other)) {
1854 xmlFree (ret);
1855 return (xmlChar*) 1;
1856 }
1857 return ret;
1858}
1859
Daniel Veillard76d66f42001-05-16 21:05:17 +00001860static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001861xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1862 xmlChar buf[XML_MAX_NAMELEN + 5];
1863 int len = 0, l;
1864 int c;
1865 int count = 0;
1866
1867 /*
1868 * Handler for more complex cases
1869 */
1870 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001871 c = CUR_CHAR(l);
1872 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1873 (!IS_LETTER(c) && (c != '_') &&
1874 (c != ':'))) {
1875 return(NULL);
1876 }
1877
1878 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1879 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1880 (c == '.') || (c == '-') ||
1881 (c == '_') || (c == ':') ||
1882 (IS_COMBINING(c)) ||
1883 (IS_EXTENDER(c)))) {
1884 if (count++ > 100) {
1885 count = 0;
1886 GROW;
1887 }
1888 COPY_BUF(l,buf,len,c);
1889 NEXTL(l);
1890 c = CUR_CHAR(l);
1891 if (len >= XML_MAX_NAMELEN) {
1892 /*
1893 * Okay someone managed to make a huge name, so he's ready to pay
1894 * for the processing speed.
1895 */
1896 xmlChar *buffer;
1897 int max = len * 2;
1898
1899 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1900 if (buffer == NULL) {
1901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1902 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001903 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001904 return(NULL);
1905 }
1906 memcpy(buffer, buf, len);
1907 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1908 (c == '.') || (c == '-') ||
1909 (c == '_') || (c == ':') ||
1910 (IS_COMBINING(c)) ||
1911 (IS_EXTENDER(c))) {
1912 if (count++ > 100) {
1913 count = 0;
1914 GROW;
1915 }
1916 if (len + 10 > max) {
1917 max *= 2;
1918 buffer = (xmlChar *) xmlRealloc(buffer,
1919 max * sizeof(xmlChar));
1920 if (buffer == NULL) {
1921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1922 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001923 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001924 return(NULL);
1925 }
1926 }
1927 COPY_BUF(l,buffer,len,c);
1928 NEXTL(l);
1929 c = CUR_CHAR(l);
1930 }
1931 buffer[len] = 0;
1932 return(buffer);
1933 }
1934 }
1935 return(xmlStrndup(buf, len));
1936}
1937
1938/**
1939 * xmlParseStringName:
1940 * @ctxt: an XML parser context
1941 * @str: a pointer to the string pointer (IN/OUT)
1942 *
1943 * parse an XML name.
1944 *
1945 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1946 * CombiningChar | Extender
1947 *
1948 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1949 *
1950 * [6] Names ::= Name (S Name)*
1951 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001952 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001953 * is updated to the current location in the string.
1954 */
1955
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001956static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001957xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1958 xmlChar buf[XML_MAX_NAMELEN + 5];
1959 const xmlChar *cur = *str;
1960 int len = 0, l;
1961 int c;
1962
1963 c = CUR_SCHAR(cur, l);
1964 if (!IS_LETTER(c) && (c != '_') &&
1965 (c != ':')) {
1966 return(NULL);
1967 }
1968
1969 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1970 (c == '.') || (c == '-') ||
1971 (c == '_') || (c == ':') ||
1972 (IS_COMBINING(c)) ||
1973 (IS_EXTENDER(c))) {
1974 COPY_BUF(l,buf,len,c);
1975 cur += l;
1976 c = CUR_SCHAR(cur, l);
1977 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1978 /*
1979 * Okay someone managed to make a huge name, so he's ready to pay
1980 * for the processing speed.
1981 */
1982 xmlChar *buffer;
1983 int max = len * 2;
1984
1985 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1986 if (buffer == NULL) {
1987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1988 ctxt->sax->error(ctxt->userData,
1989 "xmlParseStringName: out of memory\n");
1990 return(NULL);
1991 }
1992 memcpy(buffer, buf, len);
1993 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1994 (c == '.') || (c == '-') ||
1995 (c == '_') || (c == ':') ||
1996 (IS_COMBINING(c)) ||
1997 (IS_EXTENDER(c))) {
1998 if (len + 10 > max) {
1999 max *= 2;
2000 buffer = (xmlChar *) xmlRealloc(buffer,
2001 max * sizeof(xmlChar));
2002 if (buffer == NULL) {
2003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2004 ctxt->sax->error(ctxt->userData,
2005 "xmlParseStringName: out of memory\n");
2006 return(NULL);
2007 }
2008 }
2009 COPY_BUF(l,buffer,len,c);
2010 cur += l;
2011 c = CUR_SCHAR(cur, l);
2012 }
2013 buffer[len] = 0;
2014 *str = cur;
2015 return(buffer);
2016 }
2017 }
2018 *str = cur;
2019 return(xmlStrndup(buf, len));
2020}
2021
2022/**
2023 * xmlParseNmtoken:
2024 * @ctxt: an XML parser context
2025 *
2026 * parse an XML Nmtoken.
2027 *
2028 * [7] Nmtoken ::= (NameChar)+
2029 *
2030 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2031 *
2032 * Returns the Nmtoken parsed or NULL
2033 */
2034
2035xmlChar *
2036xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2037 xmlChar buf[XML_MAX_NAMELEN + 5];
2038 int len = 0, l;
2039 int c;
2040 int count = 0;
2041
2042 GROW;
2043 c = CUR_CHAR(l);
2044
2045 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2046 (c == '.') || (c == '-') ||
2047 (c == '_') || (c == ':') ||
2048 (IS_COMBINING(c)) ||
2049 (IS_EXTENDER(c))) {
2050 if (count++ > 100) {
2051 count = 0;
2052 GROW;
2053 }
2054 COPY_BUF(l,buf,len,c);
2055 NEXTL(l);
2056 c = CUR_CHAR(l);
2057 if (len >= XML_MAX_NAMELEN) {
2058 /*
2059 * Okay someone managed to make a huge token, so he's ready to pay
2060 * for the processing speed.
2061 */
2062 xmlChar *buffer;
2063 int max = len * 2;
2064
2065 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2066 if (buffer == NULL) {
2067 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2068 ctxt->sax->error(ctxt->userData,
2069 "xmlParseNmtoken: out of memory\n");
2070 return(NULL);
2071 }
2072 memcpy(buffer, buf, len);
2073 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2074 (c == '.') || (c == '-') ||
2075 (c == '_') || (c == ':') ||
2076 (IS_COMBINING(c)) ||
2077 (IS_EXTENDER(c))) {
2078 if (count++ > 100) {
2079 count = 0;
2080 GROW;
2081 }
2082 if (len + 10 > max) {
2083 max *= 2;
2084 buffer = (xmlChar *) xmlRealloc(buffer,
2085 max * sizeof(xmlChar));
2086 if (buffer == NULL) {
2087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2088 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002089 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002090 return(NULL);
2091 }
2092 }
2093 COPY_BUF(l,buffer,len,c);
2094 NEXTL(l);
2095 c = CUR_CHAR(l);
2096 }
2097 buffer[len] = 0;
2098 return(buffer);
2099 }
2100 }
2101 if (len == 0)
2102 return(NULL);
2103 return(xmlStrndup(buf, len));
2104}
2105
2106/**
2107 * xmlParseEntityValue:
2108 * @ctxt: an XML parser context
2109 * @orig: if non-NULL store a copy of the original entity value
2110 *
2111 * parse a value for ENTITY declarations
2112 *
2113 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2114 * "'" ([^%&'] | PEReference | Reference)* "'"
2115 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002116 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002117 */
2118
2119xmlChar *
2120xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2121 xmlChar *buf = NULL;
2122 int len = 0;
2123 int size = XML_PARSER_BUFFER_SIZE;
2124 int c, l;
2125 xmlChar stop;
2126 xmlChar *ret = NULL;
2127 const xmlChar *cur = NULL;
2128 xmlParserInputPtr input;
2129
2130 if (RAW == '"') stop = '"';
2131 else if (RAW == '\'') stop = '\'';
2132 else {
2133 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2135 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2136 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002137 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002138 return(NULL);
2139 }
2140 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2141 if (buf == NULL) {
2142 xmlGenericError(xmlGenericErrorContext,
2143 "malloc of %d byte failed\n", size);
2144 return(NULL);
2145 }
2146
2147 /*
2148 * The content of the entity definition is copied in a buffer.
2149 */
2150
2151 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2152 input = ctxt->input;
2153 GROW;
2154 NEXT;
2155 c = CUR_CHAR(l);
2156 /*
2157 * NOTE: 4.4.5 Included in Literal
2158 * When a parameter entity reference appears in a literal entity
2159 * value, ... a single or double quote character in the replacement
2160 * text is always treated as a normal data character and will not
2161 * terminate the literal.
2162 * In practice it means we stop the loop only when back at parsing
2163 * the initial entity and the quote is found
2164 */
2165 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2166 (ctxt->input != input))) {
2167 if (len + 5 >= size) {
2168 size *= 2;
2169 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2170 if (buf == NULL) {
2171 xmlGenericError(xmlGenericErrorContext,
2172 "realloc of %d byte failed\n", size);
2173 return(NULL);
2174 }
2175 }
2176 COPY_BUF(l,buf,len,c);
2177 NEXTL(l);
2178 /*
2179 * Pop-up of finished entities.
2180 */
2181 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2182 xmlPopInput(ctxt);
2183
2184 GROW;
2185 c = CUR_CHAR(l);
2186 if (c == 0) {
2187 GROW;
2188 c = CUR_CHAR(l);
2189 }
2190 }
2191 buf[len] = 0;
2192
2193 /*
2194 * Raise problem w.r.t. '&' and '%' being used in non-entities
2195 * reference constructs. Note Charref will be handled in
2196 * xmlStringDecodeEntities()
2197 */
2198 cur = buf;
2199 while (*cur != 0) { /* non input consuming */
2200 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2201 xmlChar *name;
2202 xmlChar tmp = *cur;
2203
2204 cur++;
2205 name = xmlParseStringName(ctxt, &cur);
2206 if ((name == NULL) || (*cur != ';')) {
2207 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2209 ctxt->sax->error(ctxt->userData,
2210 "EntityValue: '%c' forbidden except for entities references\n",
2211 tmp);
2212 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002213 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002214 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002215 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2216 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002217 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2219 ctxt->sax->error(ctxt->userData,
2220 "EntityValue: PEReferences forbidden in internal subset\n",
2221 tmp);
2222 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002223 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002224 }
2225 if (name != NULL)
2226 xmlFree(name);
2227 }
2228 cur++;
2229 }
2230
2231 /*
2232 * Then PEReference entities are substituted.
2233 */
2234 if (c != stop) {
2235 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2237 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2238 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002239 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002240 xmlFree(buf);
2241 } else {
2242 NEXT;
2243 /*
2244 * NOTE: 4.4.7 Bypassed
2245 * When a general entity reference appears in the EntityValue in
2246 * an entity declaration, it is bypassed and left as is.
2247 * so XML_SUBSTITUTE_REF is not set here.
2248 */
2249 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2250 0, 0, 0);
2251 if (orig != NULL)
2252 *orig = buf;
2253 else
2254 xmlFree(buf);
2255 }
2256
2257 return(ret);
2258}
2259
2260/**
2261 * xmlParseAttValue:
2262 * @ctxt: an XML parser context
2263 *
2264 * parse a value for an attribute
2265 * Note: the parser won't do substitution of entities here, this
2266 * will be handled later in xmlStringGetNodeList
2267 *
2268 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2269 * "'" ([^<&'] | Reference)* "'"
2270 *
2271 * 3.3.3 Attribute-Value Normalization:
2272 * Before the value of an attribute is passed to the application or
2273 * checked for validity, the XML processor must normalize it as follows:
2274 * - a character reference is processed by appending the referenced
2275 * character to the attribute value
2276 * - an entity reference is processed by recursively processing the
2277 * replacement text of the entity
2278 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2279 * appending #x20 to the normalized value, except that only a single
2280 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2281 * parsed entity or the literal entity value of an internal parsed entity
2282 * - other characters are processed by appending them to the normalized value
2283 * If the declared value is not CDATA, then the XML processor must further
2284 * process the normalized attribute value by discarding any leading and
2285 * trailing space (#x20) characters, and by replacing sequences of space
2286 * (#x20) characters by a single space (#x20) character.
2287 * All attributes for which no declaration has been read should be treated
2288 * by a non-validating parser as if declared CDATA.
2289 *
2290 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2291 */
2292
2293xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002294xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2295
2296xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002297xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2298 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002299 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002300 xmlChar *ret = NULL;
2301 SHRINK;
2302 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002303 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002304 if (*in != '"' && *in != '\'') {
2305 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2307 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2308 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002309 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002310 return(NULL);
2311 }
2312 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2313 limit = *in;
2314 ++in;
2315
2316 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2317 *in != '&' && *in != '<'
2318 ) {
2319 ++in;
2320 }
2321 if (*in != limit) {
2322 return xmlParseAttValueComplex(ctxt);
2323 }
2324 ++in;
2325 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2326 CUR_PTR = in;
2327 return ret;
2328}
2329
2330xmlChar *
2331xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2332 xmlChar limit = 0;
2333 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002334 int len = 0;
2335 int buf_size = 0;
2336 int c, l;
2337 xmlChar *current = NULL;
2338 xmlEntityPtr ent;
2339
2340
2341 SHRINK;
2342 if (NXT(0) == '"') {
2343 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2344 limit = '"';
2345 NEXT;
2346 } else if (NXT(0) == '\'') {
2347 limit = '\'';
2348 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2349 NEXT;
2350 } else {
2351 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2353 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2354 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002355 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002356 return(NULL);
2357 }
2358
2359 /*
2360 * allocate a translation buffer.
2361 */
2362 buf_size = XML_PARSER_BUFFER_SIZE;
2363 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2364 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002365 xmlGenericError(xmlGenericErrorContext,
2366 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002367 return(NULL);
2368 }
2369
2370 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002371 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002372 */
2373 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002374 while ((NXT(0) != limit) && /* checked */
2375 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002376 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002377 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002378 if (NXT(1) == '#') {
2379 int val = xmlParseCharRef(ctxt);
2380 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002381 if (ctxt->replaceEntities) {
2382 if (len > buf_size - 10) {
2383 growBuffer(buf);
2384 }
2385 buf[len++] = '&';
2386 } else {
2387 /*
2388 * The reparsing will be done in xmlStringGetNodeList()
2389 * called by the attribute() function in SAX.c
2390 */
2391 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002392
Daniel Veillard319a7422001-09-11 09:27:09 +00002393 if (len > buf_size - 10) {
2394 growBuffer(buf);
2395 }
2396 current = &buffer[0];
2397 while (*current != 0) { /* non input consuming */
2398 buf[len++] = *current++;
2399 }
Owen Taylor3473f882001-02-23 17:55:21 +00002400 }
2401 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002402 if (len > buf_size - 10) {
2403 growBuffer(buf);
2404 }
Owen Taylor3473f882001-02-23 17:55:21 +00002405 len += xmlCopyChar(0, &buf[len], val);
2406 }
2407 } else {
2408 ent = xmlParseEntityRef(ctxt);
2409 if ((ent != NULL) &&
2410 (ctxt->replaceEntities != 0)) {
2411 xmlChar *rep;
2412
2413 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2414 rep = xmlStringDecodeEntities(ctxt, ent->content,
2415 XML_SUBSTITUTE_REF, 0, 0, 0);
2416 if (rep != NULL) {
2417 current = rep;
2418 while (*current != 0) { /* non input consuming */
2419 buf[len++] = *current++;
2420 if (len > buf_size - 10) {
2421 growBuffer(buf);
2422 }
2423 }
2424 xmlFree(rep);
2425 }
2426 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002427 if (len > buf_size - 10) {
2428 growBuffer(buf);
2429 }
Owen Taylor3473f882001-02-23 17:55:21 +00002430 if (ent->content != NULL)
2431 buf[len++] = ent->content[0];
2432 }
2433 } else if (ent != NULL) {
2434 int i = xmlStrlen(ent->name);
2435 const xmlChar *cur = ent->name;
2436
2437 /*
2438 * This may look absurd but is needed to detect
2439 * entities problems
2440 */
2441 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2442 (ent->content != NULL)) {
2443 xmlChar *rep;
2444 rep = xmlStringDecodeEntities(ctxt, ent->content,
2445 XML_SUBSTITUTE_REF, 0, 0, 0);
2446 if (rep != NULL)
2447 xmlFree(rep);
2448 }
2449
2450 /*
2451 * Just output the reference
2452 */
2453 buf[len++] = '&';
2454 if (len > buf_size - i - 10) {
2455 growBuffer(buf);
2456 }
2457 for (;i > 0;i--)
2458 buf[len++] = *cur++;
2459 buf[len++] = ';';
2460 }
2461 }
2462 } else {
2463 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2464 COPY_BUF(l,buf,len,0x20);
2465 if (len > buf_size - 10) {
2466 growBuffer(buf);
2467 }
2468 } else {
2469 COPY_BUF(l,buf,len,c);
2470 if (len > buf_size - 10) {
2471 growBuffer(buf);
2472 }
2473 }
2474 NEXTL(l);
2475 }
2476 GROW;
2477 c = CUR_CHAR(l);
2478 }
2479 buf[len++] = 0;
2480 if (RAW == '<') {
2481 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2483 ctxt->sax->error(ctxt->userData,
2484 "Unescaped '<' not allowed in attributes values\n");
2485 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002486 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002487 } else if (RAW != limit) {
2488 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2490 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2491 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002492 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002493 } else
2494 NEXT;
2495 return(buf);
2496}
2497
2498/**
2499 * xmlParseSystemLiteral:
2500 * @ctxt: an XML parser context
2501 *
2502 * parse an XML Literal
2503 *
2504 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2505 *
2506 * Returns the SystemLiteral parsed or NULL
2507 */
2508
2509xmlChar *
2510xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2511 xmlChar *buf = NULL;
2512 int len = 0;
2513 int size = XML_PARSER_BUFFER_SIZE;
2514 int cur, l;
2515 xmlChar stop;
2516 int state = ctxt->instate;
2517 int count = 0;
2518
2519 SHRINK;
2520 if (RAW == '"') {
2521 NEXT;
2522 stop = '"';
2523 } else if (RAW == '\'') {
2524 NEXT;
2525 stop = '\'';
2526 } else {
2527 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2529 ctxt->sax->error(ctxt->userData,
2530 "SystemLiteral \" or ' expected\n");
2531 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002532 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002533 return(NULL);
2534 }
2535
2536 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2537 if (buf == NULL) {
2538 xmlGenericError(xmlGenericErrorContext,
2539 "malloc of %d byte failed\n", size);
2540 return(NULL);
2541 }
2542 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2543 cur = CUR_CHAR(l);
2544 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2545 if (len + 5 >= size) {
2546 size *= 2;
2547 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2548 if (buf == NULL) {
2549 xmlGenericError(xmlGenericErrorContext,
2550 "realloc of %d byte failed\n", size);
2551 ctxt->instate = (xmlParserInputState) state;
2552 return(NULL);
2553 }
2554 }
2555 count++;
2556 if (count > 50) {
2557 GROW;
2558 count = 0;
2559 }
2560 COPY_BUF(l,buf,len,cur);
2561 NEXTL(l);
2562 cur = CUR_CHAR(l);
2563 if (cur == 0) {
2564 GROW;
2565 SHRINK;
2566 cur = CUR_CHAR(l);
2567 }
2568 }
2569 buf[len] = 0;
2570 ctxt->instate = (xmlParserInputState) state;
2571 if (!IS_CHAR(cur)) {
2572 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2574 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2575 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002576 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002577 } else {
2578 NEXT;
2579 }
2580 return(buf);
2581}
2582
2583/**
2584 * xmlParsePubidLiteral:
2585 * @ctxt: an XML parser context
2586 *
2587 * parse an XML public literal
2588 *
2589 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2590 *
2591 * Returns the PubidLiteral parsed or NULL.
2592 */
2593
2594xmlChar *
2595xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2596 xmlChar *buf = NULL;
2597 int len = 0;
2598 int size = XML_PARSER_BUFFER_SIZE;
2599 xmlChar cur;
2600 xmlChar stop;
2601 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002602 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002603
2604 SHRINK;
2605 if (RAW == '"') {
2606 NEXT;
2607 stop = '"';
2608 } else if (RAW == '\'') {
2609 NEXT;
2610 stop = '\'';
2611 } else {
2612 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2614 ctxt->sax->error(ctxt->userData,
2615 "SystemLiteral \" or ' expected\n");
2616 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002617 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002618 return(NULL);
2619 }
2620 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2621 if (buf == NULL) {
2622 xmlGenericError(xmlGenericErrorContext,
2623 "malloc of %d byte failed\n", size);
2624 return(NULL);
2625 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002626 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002627 cur = CUR;
2628 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2629 if (len + 1 >= size) {
2630 size *= 2;
2631 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2632 if (buf == NULL) {
2633 xmlGenericError(xmlGenericErrorContext,
2634 "realloc of %d byte failed\n", size);
2635 return(NULL);
2636 }
2637 }
2638 buf[len++] = cur;
2639 count++;
2640 if (count > 50) {
2641 GROW;
2642 count = 0;
2643 }
2644 NEXT;
2645 cur = CUR;
2646 if (cur == 0) {
2647 GROW;
2648 SHRINK;
2649 cur = CUR;
2650 }
2651 }
2652 buf[len] = 0;
2653 if (cur != stop) {
2654 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2656 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2657 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002658 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002659 } else {
2660 NEXT;
2661 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002662 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002663 return(buf);
2664}
2665
Daniel Veillard48b2f892001-02-25 16:11:03 +00002666void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002667/**
2668 * xmlParseCharData:
2669 * @ctxt: an XML parser context
2670 * @cdata: int indicating whether we are within a CDATA section
2671 *
2672 * parse a CharData section.
2673 * if we are within a CDATA section ']]>' marks an end of section.
2674 *
2675 * The right angle bracket (>) may be represented using the string "&gt;",
2676 * and must, for compatibility, be escaped using "&gt;" or a character
2677 * reference when it appears in the string "]]>" in content, when that
2678 * string is not marking the end of a CDATA section.
2679 *
2680 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2681 */
2682
2683void
2684xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002685 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002686 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002687 int line = ctxt->input->line;
2688 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002689
2690 SHRINK;
2691 GROW;
2692 /*
2693 * Accelerated common case where input don't need to be
2694 * modified before passing it to the handler.
2695 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002696 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002697 in = ctxt->input->cur;
2698 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002699get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002700 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2701 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002702 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002703 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002704 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002705 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002706 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002707 ctxt->input->line++;
2708 in++;
2709 }
2710 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002711 }
2712 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002713 if ((in[1] == ']') && (in[2] == '>')) {
2714 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2716 ctxt->sax->error(ctxt->userData,
2717 "Sequence ']]>' not allowed in content\n");
2718 ctxt->input->cur = in;
2719 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002720 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002721 return;
2722 }
2723 in++;
2724 goto get_more;
2725 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002726 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002727 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002728 if (IS_BLANK(*ctxt->input->cur)) {
2729 const xmlChar *tmp = ctxt->input->cur;
2730 ctxt->input->cur = in;
2731 if (areBlanks(ctxt, tmp, nbchar)) {
2732 if (ctxt->sax->ignorableWhitespace != NULL)
2733 ctxt->sax->ignorableWhitespace(ctxt->userData,
2734 tmp, nbchar);
2735 } else {
2736 if (ctxt->sax->characters != NULL)
2737 ctxt->sax->characters(ctxt->userData,
2738 tmp, nbchar);
2739 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002740 line = ctxt->input->line;
2741 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002742 } else {
2743 if (ctxt->sax->characters != NULL)
2744 ctxt->sax->characters(ctxt->userData,
2745 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002746 line = ctxt->input->line;
2747 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002748 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002749 }
2750 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002751 if (*in == 0xD) {
2752 in++;
2753 if (*in == 0xA) {
2754 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002755 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002756 ctxt->input->line++;
2757 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002758 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002759 in--;
2760 }
2761 if (*in == '<') {
2762 return;
2763 }
2764 if (*in == '&') {
2765 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002766 }
2767 SHRINK;
2768 GROW;
2769 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002770 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002771 nbchar = 0;
2772 }
Daniel Veillard50582112001-03-26 22:52:16 +00002773 ctxt->input->line = line;
2774 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002775 xmlParseCharDataComplex(ctxt, cdata);
2776}
2777
2778void
2779xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002780 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2781 int nbchar = 0;
2782 int cur, l;
2783 int count = 0;
2784
2785 SHRINK;
2786 GROW;
2787 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002788 while ((cur != '<') && /* checked */
2789 (cur != '&') &&
2790 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002791 if ((cur == ']') && (NXT(1) == ']') &&
2792 (NXT(2) == '>')) {
2793 if (cdata) break;
2794 else {
2795 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2797 ctxt->sax->error(ctxt->userData,
2798 "Sequence ']]>' not allowed in content\n");
2799 /* Should this be relaxed ??? I see a "must here */
2800 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002801 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002802 }
2803 }
2804 COPY_BUF(l,buf,nbchar,cur);
2805 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2806 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002807 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002808 */
2809 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2810 if (areBlanks(ctxt, buf, nbchar)) {
2811 if (ctxt->sax->ignorableWhitespace != NULL)
2812 ctxt->sax->ignorableWhitespace(ctxt->userData,
2813 buf, nbchar);
2814 } else {
2815 if (ctxt->sax->characters != NULL)
2816 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2817 }
2818 }
2819 nbchar = 0;
2820 }
2821 count++;
2822 if (count > 50) {
2823 GROW;
2824 count = 0;
2825 }
2826 NEXTL(l);
2827 cur = CUR_CHAR(l);
2828 }
2829 if (nbchar != 0) {
2830 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002831 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002832 */
2833 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2834 if (areBlanks(ctxt, buf, nbchar)) {
2835 if (ctxt->sax->ignorableWhitespace != NULL)
2836 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2837 } else {
2838 if (ctxt->sax->characters != NULL)
2839 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2840 }
2841 }
2842 }
2843}
2844
2845/**
2846 * xmlParseExternalID:
2847 * @ctxt: an XML parser context
2848 * @publicID: a xmlChar** receiving PubidLiteral
2849 * @strict: indicate whether we should restrict parsing to only
2850 * production [75], see NOTE below
2851 *
2852 * Parse an External ID or a Public ID
2853 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002854 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002855 * 'PUBLIC' S PubidLiteral S SystemLiteral
2856 *
2857 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2858 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2859 *
2860 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2861 *
2862 * Returns the function returns SystemLiteral and in the second
2863 * case publicID receives PubidLiteral, is strict is off
2864 * it is possible to return NULL and have publicID set.
2865 */
2866
2867xmlChar *
2868xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2869 xmlChar *URI = NULL;
2870
2871 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002872
2873 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002874 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2875 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2876 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2877 SKIP(6);
2878 if (!IS_BLANK(CUR)) {
2879 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882 "Space required after 'SYSTEM'\n");
2883 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002884 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002885 }
2886 SKIP_BLANKS;
2887 URI = xmlParseSystemLiteral(ctxt);
2888 if (URI == NULL) {
2889 ctxt->errNo = XML_ERR_URI_REQUIRED;
2890 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2891 ctxt->sax->error(ctxt->userData,
2892 "xmlParseExternalID: SYSTEM, no URI\n");
2893 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002894 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002895 }
2896 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2897 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2898 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2899 SKIP(6);
2900 if (!IS_BLANK(CUR)) {
2901 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2903 ctxt->sax->error(ctxt->userData,
2904 "Space required after 'PUBLIC'\n");
2905 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002906 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002907 }
2908 SKIP_BLANKS;
2909 *publicID = xmlParsePubidLiteral(ctxt);
2910 if (*publicID == NULL) {
2911 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2913 ctxt->sax->error(ctxt->userData,
2914 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2915 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002916 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002917 }
2918 if (strict) {
2919 /*
2920 * We don't handle [83] so "S SystemLiteral" is required.
2921 */
2922 if (!IS_BLANK(CUR)) {
2923 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2925 ctxt->sax->error(ctxt->userData,
2926 "Space required after the Public Identifier\n");
2927 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002928 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002929 }
2930 } else {
2931 /*
2932 * We handle [83] so we return immediately, if
2933 * "S SystemLiteral" is not detected. From a purely parsing
2934 * point of view that's a nice mess.
2935 */
2936 const xmlChar *ptr;
2937 GROW;
2938
2939 ptr = CUR_PTR;
2940 if (!IS_BLANK(*ptr)) return(NULL);
2941
2942 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2943 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2944 }
2945 SKIP_BLANKS;
2946 URI = xmlParseSystemLiteral(ctxt);
2947 if (URI == NULL) {
2948 ctxt->errNo = XML_ERR_URI_REQUIRED;
2949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2950 ctxt->sax->error(ctxt->userData,
2951 "xmlParseExternalID: PUBLIC, no URI\n");
2952 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002953 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002954 }
2955 }
2956 return(URI);
2957}
2958
2959/**
2960 * xmlParseComment:
2961 * @ctxt: an XML parser context
2962 *
2963 * Skip an XML (SGML) comment <!-- .... -->
2964 * The spec says that "For compatibility, the string "--" (double-hyphen)
2965 * must not occur within comments. "
2966 *
2967 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2968 */
2969void
2970xmlParseComment(xmlParserCtxtPtr ctxt) {
2971 xmlChar *buf = NULL;
2972 int len;
2973 int size = XML_PARSER_BUFFER_SIZE;
2974 int q, ql;
2975 int r, rl;
2976 int cur, l;
2977 xmlParserInputState state;
2978 xmlParserInputPtr input = ctxt->input;
2979 int count = 0;
2980
2981 /*
2982 * Check that there is a comment right here.
2983 */
2984 if ((RAW != '<') || (NXT(1) != '!') ||
2985 (NXT(2) != '-') || (NXT(3) != '-')) return;
2986
2987 state = ctxt->instate;
2988 ctxt->instate = XML_PARSER_COMMENT;
2989 SHRINK;
2990 SKIP(4);
2991 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2992 if (buf == NULL) {
2993 xmlGenericError(xmlGenericErrorContext,
2994 "malloc of %d byte failed\n", size);
2995 ctxt->instate = state;
2996 return;
2997 }
2998 q = CUR_CHAR(ql);
2999 NEXTL(ql);
3000 r = CUR_CHAR(rl);
3001 NEXTL(rl);
3002 cur = CUR_CHAR(l);
3003 len = 0;
3004 while (IS_CHAR(cur) && /* checked */
3005 ((cur != '>') ||
3006 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003007 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003008 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3010 ctxt->sax->error(ctxt->userData,
3011 "Comment must not contain '--' (double-hyphen)`\n");
3012 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003013 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003014 }
3015 if (len + 5 >= size) {
3016 size *= 2;
3017 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3018 if (buf == NULL) {
3019 xmlGenericError(xmlGenericErrorContext,
3020 "realloc of %d byte failed\n", size);
3021 ctxt->instate = state;
3022 return;
3023 }
3024 }
3025 COPY_BUF(ql,buf,len,q);
3026 q = r;
3027 ql = rl;
3028 r = cur;
3029 rl = l;
3030
3031 count++;
3032 if (count > 50) {
3033 GROW;
3034 count = 0;
3035 }
3036 NEXTL(l);
3037 cur = CUR_CHAR(l);
3038 if (cur == 0) {
3039 SHRINK;
3040 GROW;
3041 cur = CUR_CHAR(l);
3042 }
3043 }
3044 buf[len] = 0;
3045 if (!IS_CHAR(cur)) {
3046 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3048 ctxt->sax->error(ctxt->userData,
3049 "Comment not terminated \n<!--%.50s\n", buf);
3050 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003051 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003052 xmlFree(buf);
3053 } else {
3054 if (input != ctxt->input) {
3055 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3057 ctxt->sax->error(ctxt->userData,
3058"Comment doesn't start and stop in the same entity\n");
3059 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003060 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003061 }
3062 NEXT;
3063 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3064 (!ctxt->disableSAX))
3065 ctxt->sax->comment(ctxt->userData, buf);
3066 xmlFree(buf);
3067 }
3068 ctxt->instate = state;
3069}
3070
3071/**
3072 * xmlParsePITarget:
3073 * @ctxt: an XML parser context
3074 *
3075 * parse the name of a PI
3076 *
3077 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3078 *
3079 * Returns the PITarget name or NULL
3080 */
3081
3082xmlChar *
3083xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3084 xmlChar *name;
3085
3086 name = xmlParseName(ctxt);
3087 if ((name != NULL) &&
3088 ((name[0] == 'x') || (name[0] == 'X')) &&
3089 ((name[1] == 'm') || (name[1] == 'M')) &&
3090 ((name[2] == 'l') || (name[2] == 'L'))) {
3091 int i;
3092 if ((name[0] == 'x') && (name[1] == 'm') &&
3093 (name[2] == 'l') && (name[3] == 0)) {
3094 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3096 ctxt->sax->error(ctxt->userData,
3097 "XML declaration allowed only at the start of the document\n");
3098 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003099 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003100 return(name);
3101 } else if (name[3] == 0) {
3102 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3104 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3105 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003106 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003107 return(name);
3108 }
3109 for (i = 0;;i++) {
3110 if (xmlW3CPIs[i] == NULL) break;
3111 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3112 return(name);
3113 }
3114 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3115 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3116 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003117 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003118 }
3119 }
3120 return(name);
3121}
3122
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003123#ifdef LIBXML_CATALOG_ENABLED
3124/**
3125 * xmlParseCatalogPI:
3126 * @ctxt: an XML parser context
3127 * @catalog: the PI value string
3128 *
3129 * parse an XML Catalog Processing Instruction.
3130 *
3131 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3132 *
3133 * Occurs only if allowed by the user and if happening in the Misc
3134 * part of the document before any doctype informations
3135 * This will add the given catalog to the parsing context in order
3136 * to be used if there is a resolution need further down in the document
3137 */
3138
3139static void
3140xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3141 xmlChar *URL = NULL;
3142 const xmlChar *tmp, *base;
3143 xmlChar marker;
3144
3145 tmp = catalog;
3146 while (IS_BLANK(*tmp)) tmp++;
3147 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3148 goto error;
3149 tmp += 7;
3150 while (IS_BLANK(*tmp)) tmp++;
3151 if (*tmp != '=') {
3152 return;
3153 }
3154 tmp++;
3155 while (IS_BLANK(*tmp)) tmp++;
3156 marker = *tmp;
3157 if ((marker != '\'') && (marker != '"'))
3158 goto error;
3159 tmp++;
3160 base = tmp;
3161 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3162 if (*tmp == 0)
3163 goto error;
3164 URL = xmlStrndup(base, tmp - base);
3165 tmp++;
3166 while (IS_BLANK(*tmp)) tmp++;
3167 if (*tmp != 0)
3168 goto error;
3169
3170 if (URL != NULL) {
3171 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3172 xmlFree(URL);
3173 }
3174 return;
3175
3176error:
3177 ctxt->errNo = XML_WAR_CATALOG_PI;
3178 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3179 ctxt->sax->warning(ctxt->userData,
3180 "Catalog PI syntax error: %s\n", catalog);
3181 if (URL != NULL)
3182 xmlFree(URL);
3183}
3184#endif
3185
Owen Taylor3473f882001-02-23 17:55:21 +00003186/**
3187 * xmlParsePI:
3188 * @ctxt: an XML parser context
3189 *
3190 * parse an XML Processing Instruction.
3191 *
3192 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3193 *
3194 * The processing is transfered to SAX once parsed.
3195 */
3196
3197void
3198xmlParsePI(xmlParserCtxtPtr ctxt) {
3199 xmlChar *buf = NULL;
3200 int len = 0;
3201 int size = XML_PARSER_BUFFER_SIZE;
3202 int cur, l;
3203 xmlChar *target;
3204 xmlParserInputState state;
3205 int count = 0;
3206
3207 if ((RAW == '<') && (NXT(1) == '?')) {
3208 xmlParserInputPtr input = ctxt->input;
3209 state = ctxt->instate;
3210 ctxt->instate = XML_PARSER_PI;
3211 /*
3212 * this is a Processing Instruction.
3213 */
3214 SKIP(2);
3215 SHRINK;
3216
3217 /*
3218 * Parse the target name and check for special support like
3219 * namespace.
3220 */
3221 target = xmlParsePITarget(ctxt);
3222 if (target != NULL) {
3223 if ((RAW == '?') && (NXT(1) == '>')) {
3224 if (input != ctxt->input) {
3225 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3227 ctxt->sax->error(ctxt->userData,
3228 "PI declaration doesn't start and stop in the same entity\n");
3229 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003230 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003231 }
3232 SKIP(2);
3233
3234 /*
3235 * SAX: PI detected.
3236 */
3237 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3238 (ctxt->sax->processingInstruction != NULL))
3239 ctxt->sax->processingInstruction(ctxt->userData,
3240 target, NULL);
3241 ctxt->instate = state;
3242 xmlFree(target);
3243 return;
3244 }
3245 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3246 if (buf == NULL) {
3247 xmlGenericError(xmlGenericErrorContext,
3248 "malloc of %d byte failed\n", size);
3249 ctxt->instate = state;
3250 return;
3251 }
3252 cur = CUR;
3253 if (!IS_BLANK(cur)) {
3254 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3256 ctxt->sax->error(ctxt->userData,
3257 "xmlParsePI: PI %s space expected\n", target);
3258 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003259 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003260 }
3261 SKIP_BLANKS;
3262 cur = CUR_CHAR(l);
3263 while (IS_CHAR(cur) && /* checked */
3264 ((cur != '?') || (NXT(1) != '>'))) {
3265 if (len + 5 >= size) {
3266 size *= 2;
3267 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3268 if (buf == NULL) {
3269 xmlGenericError(xmlGenericErrorContext,
3270 "realloc of %d byte failed\n", size);
3271 ctxt->instate = state;
3272 return;
3273 }
3274 }
3275 count++;
3276 if (count > 50) {
3277 GROW;
3278 count = 0;
3279 }
3280 COPY_BUF(l,buf,len,cur);
3281 NEXTL(l);
3282 cur = CUR_CHAR(l);
3283 if (cur == 0) {
3284 SHRINK;
3285 GROW;
3286 cur = CUR_CHAR(l);
3287 }
3288 }
3289 buf[len] = 0;
3290 if (cur != '?') {
3291 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3292 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3293 ctxt->sax->error(ctxt->userData,
3294 "xmlParsePI: PI %s never end ...\n", target);
3295 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003296 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003297 } else {
3298 if (input != ctxt->input) {
3299 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3301 ctxt->sax->error(ctxt->userData,
3302 "PI declaration doesn't start and stop in the same entity\n");
3303 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003305 }
3306 SKIP(2);
3307
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003308#ifdef LIBXML_CATALOG_ENABLED
3309 if (((state == XML_PARSER_MISC) ||
3310 (state == XML_PARSER_START)) &&
3311 (xmlStrEqual(target, XML_CATALOG_PI))) {
3312 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3313 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3314 (allow == XML_CATA_ALLOW_ALL))
3315 xmlParseCatalogPI(ctxt, buf);
3316 }
3317#endif
3318
3319
Owen Taylor3473f882001-02-23 17:55:21 +00003320 /*
3321 * SAX: PI detected.
3322 */
3323 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3324 (ctxt->sax->processingInstruction != NULL))
3325 ctxt->sax->processingInstruction(ctxt->userData,
3326 target, buf);
3327 }
3328 xmlFree(buf);
3329 xmlFree(target);
3330 } else {
3331 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3333 ctxt->sax->error(ctxt->userData,
3334 "xmlParsePI : no target name\n");
3335 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003336 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003337 }
3338 ctxt->instate = state;
3339 }
3340}
3341
3342/**
3343 * xmlParseNotationDecl:
3344 * @ctxt: an XML parser context
3345 *
3346 * parse a notation declaration
3347 *
3348 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3349 *
3350 * Hence there is actually 3 choices:
3351 * 'PUBLIC' S PubidLiteral
3352 * 'PUBLIC' S PubidLiteral S SystemLiteral
3353 * and 'SYSTEM' S SystemLiteral
3354 *
3355 * See the NOTE on xmlParseExternalID().
3356 */
3357
3358void
3359xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3360 xmlChar *name;
3361 xmlChar *Pubid;
3362 xmlChar *Systemid;
3363
3364 if ((RAW == '<') && (NXT(1) == '!') &&
3365 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3366 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3367 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3368 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3369 xmlParserInputPtr input = ctxt->input;
3370 SHRINK;
3371 SKIP(10);
3372 if (!IS_BLANK(CUR)) {
3373 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3375 ctxt->sax->error(ctxt->userData,
3376 "Space required after '<!NOTATION'\n");
3377 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003378 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 return;
3380 }
3381 SKIP_BLANKS;
3382
Daniel Veillard76d66f42001-05-16 21:05:17 +00003383 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003384 if (name == NULL) {
3385 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3387 ctxt->sax->error(ctxt->userData,
3388 "NOTATION: Name expected here\n");
3389 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003390 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003391 return;
3392 }
3393 if (!IS_BLANK(CUR)) {
3394 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3396 ctxt->sax->error(ctxt->userData,
3397 "Space required after the NOTATION name'\n");
3398 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003399 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003400 return;
3401 }
3402 SKIP_BLANKS;
3403
3404 /*
3405 * Parse the IDs.
3406 */
3407 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3408 SKIP_BLANKS;
3409
3410 if (RAW == '>') {
3411 if (input != ctxt->input) {
3412 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3414 ctxt->sax->error(ctxt->userData,
3415"Notation declaration doesn't start and stop in the same entity\n");
3416 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003417 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003418 }
3419 NEXT;
3420 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3421 (ctxt->sax->notationDecl != NULL))
3422 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3423 } else {
3424 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3425 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3426 ctxt->sax->error(ctxt->userData,
3427 "'>' required to close NOTATION declaration\n");
3428 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003429 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003430 }
3431 xmlFree(name);
3432 if (Systemid != NULL) xmlFree(Systemid);
3433 if (Pubid != NULL) xmlFree(Pubid);
3434 }
3435}
3436
3437/**
3438 * xmlParseEntityDecl:
3439 * @ctxt: an XML parser context
3440 *
3441 * parse <!ENTITY declarations
3442 *
3443 * [70] EntityDecl ::= GEDecl | PEDecl
3444 *
3445 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3446 *
3447 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3448 *
3449 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3450 *
3451 * [74] PEDef ::= EntityValue | ExternalID
3452 *
3453 * [76] NDataDecl ::= S 'NDATA' S Name
3454 *
3455 * [ VC: Notation Declared ]
3456 * The Name must match the declared name of a notation.
3457 */
3458
3459void
3460xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3461 xmlChar *name = NULL;
3462 xmlChar *value = NULL;
3463 xmlChar *URI = NULL, *literal = NULL;
3464 xmlChar *ndata = NULL;
3465 int isParameter = 0;
3466 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003467 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003468
3469 GROW;
3470 if ((RAW == '<') && (NXT(1) == '!') &&
3471 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3472 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3473 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3474 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003475 SHRINK;
3476 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003477 skipped = SKIP_BLANKS;
3478 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003479 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3480 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3481 ctxt->sax->error(ctxt->userData,
3482 "Space required after '<!ENTITY'\n");
3483 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003484 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003485 }
Owen Taylor3473f882001-02-23 17:55:21 +00003486
3487 if (RAW == '%') {
3488 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003489 skipped = SKIP_BLANKS;
3490 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003491 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3493 ctxt->sax->error(ctxt->userData,
3494 "Space required after '%'\n");
3495 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003496 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003497 }
Owen Taylor3473f882001-02-23 17:55:21 +00003498 isParameter = 1;
3499 }
3500
Daniel Veillard76d66f42001-05-16 21:05:17 +00003501 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003502 if (name == NULL) {
3503 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3505 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3506 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003507 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003508 return;
3509 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003510 skipped = SKIP_BLANKS;
3511 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003512 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3514 ctxt->sax->error(ctxt->userData,
3515 "Space required after the entity name\n");
3516 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003517 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003518 }
Owen Taylor3473f882001-02-23 17:55:21 +00003519
Daniel Veillardf5582f12002-06-11 10:08:16 +00003520 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003521 /*
3522 * handle the various case of definitions...
3523 */
3524 if (isParameter) {
3525 if ((RAW == '"') || (RAW == '\'')) {
3526 value = xmlParseEntityValue(ctxt, &orig);
3527 if (value) {
3528 if ((ctxt->sax != NULL) &&
3529 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3530 ctxt->sax->entityDecl(ctxt->userData, name,
3531 XML_INTERNAL_PARAMETER_ENTITY,
3532 NULL, NULL, value);
3533 }
3534 } else {
3535 URI = xmlParseExternalID(ctxt, &literal, 1);
3536 if ((URI == NULL) && (literal == NULL)) {
3537 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3539 ctxt->sax->error(ctxt->userData,
3540 "Entity value required\n");
3541 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003542 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003543 }
3544 if (URI) {
3545 xmlURIPtr uri;
3546
3547 uri = xmlParseURI((const char *) URI);
3548 if (uri == NULL) {
3549 ctxt->errNo = XML_ERR_INVALID_URI;
3550 if ((ctxt->sax != NULL) &&
3551 (!ctxt->disableSAX) &&
3552 (ctxt->sax->error != NULL))
3553 ctxt->sax->error(ctxt->userData,
3554 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003555 /*
3556 * This really ought to be a well formedness error
3557 * but the XML Core WG decided otherwise c.f. issue
3558 * E26 of the XML erratas.
3559 */
Owen Taylor3473f882001-02-23 17:55:21 +00003560 } else {
3561 if (uri->fragment != NULL) {
3562 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3563 if ((ctxt->sax != NULL) &&
3564 (!ctxt->disableSAX) &&
3565 (ctxt->sax->error != NULL))
3566 ctxt->sax->error(ctxt->userData,
3567 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003568 /*
3569 * Okay this is foolish to block those but not
3570 * invalid URIs.
3571 */
Owen Taylor3473f882001-02-23 17:55:21 +00003572 ctxt->wellFormed = 0;
3573 } else {
3574 if ((ctxt->sax != NULL) &&
3575 (!ctxt->disableSAX) &&
3576 (ctxt->sax->entityDecl != NULL))
3577 ctxt->sax->entityDecl(ctxt->userData, name,
3578 XML_EXTERNAL_PARAMETER_ENTITY,
3579 literal, URI, NULL);
3580 }
3581 xmlFreeURI(uri);
3582 }
3583 }
3584 }
3585 } else {
3586 if ((RAW == '"') || (RAW == '\'')) {
3587 value = xmlParseEntityValue(ctxt, &orig);
3588 if ((ctxt->sax != NULL) &&
3589 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3590 ctxt->sax->entityDecl(ctxt->userData, name,
3591 XML_INTERNAL_GENERAL_ENTITY,
3592 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003593 /*
3594 * For expat compatibility in SAX mode.
3595 */
3596 if ((ctxt->myDoc == NULL) ||
3597 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3598 if (ctxt->myDoc == NULL) {
3599 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3600 }
3601 if (ctxt->myDoc->intSubset == NULL)
3602 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3603 BAD_CAST "fake", NULL, NULL);
3604
3605 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3606 NULL, NULL, value);
3607 }
Owen Taylor3473f882001-02-23 17:55:21 +00003608 } else {
3609 URI = xmlParseExternalID(ctxt, &literal, 1);
3610 if ((URI == NULL) && (literal == NULL)) {
3611 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3613 ctxt->sax->error(ctxt->userData,
3614 "Entity value required\n");
3615 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003616 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003617 }
3618 if (URI) {
3619 xmlURIPtr uri;
3620
3621 uri = xmlParseURI((const char *)URI);
3622 if (uri == NULL) {
3623 ctxt->errNo = XML_ERR_INVALID_URI;
3624 if ((ctxt->sax != NULL) &&
3625 (!ctxt->disableSAX) &&
3626 (ctxt->sax->error != NULL))
3627 ctxt->sax->error(ctxt->userData,
3628 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003629 /*
3630 * This really ought to be a well formedness error
3631 * but the XML Core WG decided otherwise c.f. issue
3632 * E26 of the XML erratas.
3633 */
Owen Taylor3473f882001-02-23 17:55:21 +00003634 } else {
3635 if (uri->fragment != NULL) {
3636 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3637 if ((ctxt->sax != NULL) &&
3638 (!ctxt->disableSAX) &&
3639 (ctxt->sax->error != NULL))
3640 ctxt->sax->error(ctxt->userData,
3641 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003642 /*
3643 * Okay this is foolish to block those but not
3644 * invalid URIs.
3645 */
Owen Taylor3473f882001-02-23 17:55:21 +00003646 ctxt->wellFormed = 0;
3647 }
3648 xmlFreeURI(uri);
3649 }
3650 }
3651 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3652 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3654 ctxt->sax->error(ctxt->userData,
3655 "Space required before 'NDATA'\n");
3656 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003657 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003658 }
3659 SKIP_BLANKS;
3660 if ((RAW == 'N') && (NXT(1) == 'D') &&
3661 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3662 (NXT(4) == 'A')) {
3663 SKIP(5);
3664 if (!IS_BLANK(CUR)) {
3665 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3667 ctxt->sax->error(ctxt->userData,
3668 "Space required after 'NDATA'\n");
3669 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003670 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003671 }
3672 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003673 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003674 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3675 (ctxt->sax->unparsedEntityDecl != NULL))
3676 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3677 literal, URI, ndata);
3678 } else {
3679 if ((ctxt->sax != NULL) &&
3680 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3681 ctxt->sax->entityDecl(ctxt->userData, name,
3682 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3683 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003684 /*
3685 * For expat compatibility in SAX mode.
3686 * assuming the entity repalcement was asked for
3687 */
3688 if ((ctxt->replaceEntities != 0) &&
3689 ((ctxt->myDoc == NULL) ||
3690 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3691 if (ctxt->myDoc == NULL) {
3692 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3693 }
3694
3695 if (ctxt->myDoc->intSubset == NULL)
3696 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3697 BAD_CAST "fake", NULL, NULL);
3698 entityDecl(ctxt, name,
3699 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3700 literal, URI, NULL);
3701 }
Owen Taylor3473f882001-02-23 17:55:21 +00003702 }
3703 }
3704 }
3705 SKIP_BLANKS;
3706 if (RAW != '>') {
3707 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3709 ctxt->sax->error(ctxt->userData,
3710 "xmlParseEntityDecl: entity %s not terminated\n", name);
3711 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003712 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003713 } else {
3714 if (input != ctxt->input) {
3715 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3717 ctxt->sax->error(ctxt->userData,
3718"Entity declaration doesn't start and stop in the same entity\n");
3719 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003720 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003721 }
3722 NEXT;
3723 }
3724 if (orig != NULL) {
3725 /*
3726 * Ugly mechanism to save the raw entity value.
3727 */
3728 xmlEntityPtr cur = NULL;
3729
3730 if (isParameter) {
3731 if ((ctxt->sax != NULL) &&
3732 (ctxt->sax->getParameterEntity != NULL))
3733 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3734 } else {
3735 if ((ctxt->sax != NULL) &&
3736 (ctxt->sax->getEntity != NULL))
3737 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003738 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3739 cur = getEntity(ctxt, name);
3740 }
Owen Taylor3473f882001-02-23 17:55:21 +00003741 }
3742 if (cur != NULL) {
3743 if (cur->orig != NULL)
3744 xmlFree(orig);
3745 else
3746 cur->orig = orig;
3747 } else
3748 xmlFree(orig);
3749 }
3750 if (name != NULL) xmlFree(name);
3751 if (value != NULL) xmlFree(value);
3752 if (URI != NULL) xmlFree(URI);
3753 if (literal != NULL) xmlFree(literal);
3754 if (ndata != NULL) xmlFree(ndata);
3755 }
3756}
3757
3758/**
3759 * xmlParseDefaultDecl:
3760 * @ctxt: an XML parser context
3761 * @value: Receive a possible fixed default value for the attribute
3762 *
3763 * Parse an attribute default declaration
3764 *
3765 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3766 *
3767 * [ VC: Required Attribute ]
3768 * if the default declaration is the keyword #REQUIRED, then the
3769 * attribute must be specified for all elements of the type in the
3770 * attribute-list declaration.
3771 *
3772 * [ VC: Attribute Default Legal ]
3773 * The declared default value must meet the lexical constraints of
3774 * the declared attribute type c.f. xmlValidateAttributeDecl()
3775 *
3776 * [ VC: Fixed Attribute Default ]
3777 * if an attribute has a default value declared with the #FIXED
3778 * keyword, instances of that attribute must match the default value.
3779 *
3780 * [ WFC: No < in Attribute Values ]
3781 * handled in xmlParseAttValue()
3782 *
3783 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3784 * or XML_ATTRIBUTE_FIXED.
3785 */
3786
3787int
3788xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3789 int val;
3790 xmlChar *ret;
3791
3792 *value = NULL;
3793 if ((RAW == '#') && (NXT(1) == 'R') &&
3794 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3795 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3796 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3797 (NXT(8) == 'D')) {
3798 SKIP(9);
3799 return(XML_ATTRIBUTE_REQUIRED);
3800 }
3801 if ((RAW == '#') && (NXT(1) == 'I') &&
3802 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3803 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3804 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3805 SKIP(8);
3806 return(XML_ATTRIBUTE_IMPLIED);
3807 }
3808 val = XML_ATTRIBUTE_NONE;
3809 if ((RAW == '#') && (NXT(1) == 'F') &&
3810 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3811 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3812 SKIP(6);
3813 val = XML_ATTRIBUTE_FIXED;
3814 if (!IS_BLANK(CUR)) {
3815 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3817 ctxt->sax->error(ctxt->userData,
3818 "Space required after '#FIXED'\n");
3819 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003821 }
3822 SKIP_BLANKS;
3823 }
3824 ret = xmlParseAttValue(ctxt);
3825 ctxt->instate = XML_PARSER_DTD;
3826 if (ret == NULL) {
3827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3828 ctxt->sax->error(ctxt->userData,
3829 "Attribute default value declaration error\n");
3830 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003831 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003832 } else
3833 *value = ret;
3834 return(val);
3835}
3836
3837/**
3838 * xmlParseNotationType:
3839 * @ctxt: an XML parser context
3840 *
3841 * parse an Notation attribute type.
3842 *
3843 * Note: the leading 'NOTATION' S part has already being parsed...
3844 *
3845 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3846 *
3847 * [ VC: Notation Attributes ]
3848 * Values of this type must match one of the notation names included
3849 * in the declaration; all notation names in the declaration must be declared.
3850 *
3851 * Returns: the notation attribute tree built while parsing
3852 */
3853
3854xmlEnumerationPtr
3855xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3856 xmlChar *name;
3857 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3858
3859 if (RAW != '(') {
3860 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3862 ctxt->sax->error(ctxt->userData,
3863 "'(' required to start 'NOTATION'\n");
3864 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003865 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003866 return(NULL);
3867 }
3868 SHRINK;
3869 do {
3870 NEXT;
3871 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003872 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003873 if (name == NULL) {
3874 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3876 ctxt->sax->error(ctxt->userData,
3877 "Name expected in NOTATION declaration\n");
3878 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003879 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003880 return(ret);
3881 }
3882 cur = xmlCreateEnumeration(name);
3883 xmlFree(name);
3884 if (cur == NULL) return(ret);
3885 if (last == NULL) ret = last = cur;
3886 else {
3887 last->next = cur;
3888 last = cur;
3889 }
3890 SKIP_BLANKS;
3891 } while (RAW == '|');
3892 if (RAW != ')') {
3893 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3895 ctxt->sax->error(ctxt->userData,
3896 "')' required to finish NOTATION declaration\n");
3897 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003898 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003899 if ((last != NULL) && (last != ret))
3900 xmlFreeEnumeration(last);
3901 return(ret);
3902 }
3903 NEXT;
3904 return(ret);
3905}
3906
3907/**
3908 * xmlParseEnumerationType:
3909 * @ctxt: an XML parser context
3910 *
3911 * parse an Enumeration attribute type.
3912 *
3913 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3914 *
3915 * [ VC: Enumeration ]
3916 * Values of this type must match one of the Nmtoken tokens in
3917 * the declaration
3918 *
3919 * Returns: the enumeration attribute tree built while parsing
3920 */
3921
3922xmlEnumerationPtr
3923xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3924 xmlChar *name;
3925 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3926
3927 if (RAW != '(') {
3928 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3930 ctxt->sax->error(ctxt->userData,
3931 "'(' required to start ATTLIST enumeration\n");
3932 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003933 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003934 return(NULL);
3935 }
3936 SHRINK;
3937 do {
3938 NEXT;
3939 SKIP_BLANKS;
3940 name = xmlParseNmtoken(ctxt);
3941 if (name == NULL) {
3942 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3944 ctxt->sax->error(ctxt->userData,
3945 "NmToken expected in ATTLIST enumeration\n");
3946 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003947 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003948 return(ret);
3949 }
3950 cur = xmlCreateEnumeration(name);
3951 xmlFree(name);
3952 if (cur == NULL) return(ret);
3953 if (last == NULL) ret = last = cur;
3954 else {
3955 last->next = cur;
3956 last = cur;
3957 }
3958 SKIP_BLANKS;
3959 } while (RAW == '|');
3960 if (RAW != ')') {
3961 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3963 ctxt->sax->error(ctxt->userData,
3964 "')' required to finish ATTLIST enumeration\n");
3965 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003966 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003967 return(ret);
3968 }
3969 NEXT;
3970 return(ret);
3971}
3972
3973/**
3974 * xmlParseEnumeratedType:
3975 * @ctxt: an XML parser context
3976 * @tree: the enumeration tree built while parsing
3977 *
3978 * parse an Enumerated attribute type.
3979 *
3980 * [57] EnumeratedType ::= NotationType | Enumeration
3981 *
3982 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3983 *
3984 *
3985 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3986 */
3987
3988int
3989xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3990 if ((RAW == 'N') && (NXT(1) == 'O') &&
3991 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3992 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3993 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3994 SKIP(8);
3995 if (!IS_BLANK(CUR)) {
3996 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3998 ctxt->sax->error(ctxt->userData,
3999 "Space required after 'NOTATION'\n");
4000 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004001 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004002 return(0);
4003 }
4004 SKIP_BLANKS;
4005 *tree = xmlParseNotationType(ctxt);
4006 if (*tree == NULL) return(0);
4007 return(XML_ATTRIBUTE_NOTATION);
4008 }
4009 *tree = xmlParseEnumerationType(ctxt);
4010 if (*tree == NULL) return(0);
4011 return(XML_ATTRIBUTE_ENUMERATION);
4012}
4013
4014/**
4015 * xmlParseAttributeType:
4016 * @ctxt: an XML parser context
4017 * @tree: the enumeration tree built while parsing
4018 *
4019 * parse the Attribute list def for an element
4020 *
4021 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4022 *
4023 * [55] StringType ::= 'CDATA'
4024 *
4025 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4026 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4027 *
4028 * Validity constraints for attribute values syntax are checked in
4029 * xmlValidateAttributeValue()
4030 *
4031 * [ VC: ID ]
4032 * Values of type ID must match the Name production. A name must not
4033 * appear more than once in an XML document as a value of this type;
4034 * i.e., ID values must uniquely identify the elements which bear them.
4035 *
4036 * [ VC: One ID per Element Type ]
4037 * No element type may have more than one ID attribute specified.
4038 *
4039 * [ VC: ID Attribute Default ]
4040 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4041 *
4042 * [ VC: IDREF ]
4043 * Values of type IDREF must match the Name production, and values
4044 * of type IDREFS must match Names; each IDREF Name must match the value
4045 * of an ID attribute on some element in the XML document; i.e. IDREF
4046 * values must match the value of some ID attribute.
4047 *
4048 * [ VC: Entity Name ]
4049 * Values of type ENTITY must match the Name production, values
4050 * of type ENTITIES must match Names; each Entity Name must match the
4051 * name of an unparsed entity declared in the DTD.
4052 *
4053 * [ VC: Name Token ]
4054 * Values of type NMTOKEN must match the Nmtoken production; values
4055 * of type NMTOKENS must match Nmtokens.
4056 *
4057 * Returns the attribute type
4058 */
4059int
4060xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4061 SHRINK;
4062 if ((RAW == 'C') && (NXT(1) == 'D') &&
4063 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4064 (NXT(4) == 'A')) {
4065 SKIP(5);
4066 return(XML_ATTRIBUTE_CDATA);
4067 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4068 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4069 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4070 SKIP(6);
4071 return(XML_ATTRIBUTE_IDREFS);
4072 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4073 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4074 (NXT(4) == 'F')) {
4075 SKIP(5);
4076 return(XML_ATTRIBUTE_IDREF);
4077 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4078 SKIP(2);
4079 return(XML_ATTRIBUTE_ID);
4080 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4081 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4082 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4083 SKIP(6);
4084 return(XML_ATTRIBUTE_ENTITY);
4085 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4086 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4087 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4088 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4089 SKIP(8);
4090 return(XML_ATTRIBUTE_ENTITIES);
4091 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4092 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4093 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4094 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4095 SKIP(8);
4096 return(XML_ATTRIBUTE_NMTOKENS);
4097 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4098 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4099 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4100 (NXT(6) == 'N')) {
4101 SKIP(7);
4102 return(XML_ATTRIBUTE_NMTOKEN);
4103 }
4104 return(xmlParseEnumeratedType(ctxt, tree));
4105}
4106
4107/**
4108 * xmlParseAttributeListDecl:
4109 * @ctxt: an XML parser context
4110 *
4111 * : parse the Attribute list def for an element
4112 *
4113 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4114 *
4115 * [53] AttDef ::= S Name S AttType S DefaultDecl
4116 *
4117 */
4118void
4119xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4120 xmlChar *elemName;
4121 xmlChar *attrName;
4122 xmlEnumerationPtr tree;
4123
4124 if ((RAW == '<') && (NXT(1) == '!') &&
4125 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4126 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4127 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4128 (NXT(8) == 'T')) {
4129 xmlParserInputPtr input = ctxt->input;
4130
4131 SKIP(9);
4132 if (!IS_BLANK(CUR)) {
4133 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4135 ctxt->sax->error(ctxt->userData,
4136 "Space required after '<!ATTLIST'\n");
4137 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004138 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004139 }
4140 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004141 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004142 if (elemName == NULL) {
4143 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4145 ctxt->sax->error(ctxt->userData,
4146 "ATTLIST: no name for Element\n");
4147 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004148 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004149 return;
4150 }
4151 SKIP_BLANKS;
4152 GROW;
4153 while (RAW != '>') {
4154 const xmlChar *check = CUR_PTR;
4155 int type;
4156 int def;
4157 xmlChar *defaultValue = NULL;
4158
4159 GROW;
4160 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004161 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004162 if (attrName == NULL) {
4163 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4165 ctxt->sax->error(ctxt->userData,
4166 "ATTLIST: no name for Attribute\n");
4167 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004168 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004169 break;
4170 }
4171 GROW;
4172 if (!IS_BLANK(CUR)) {
4173 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4175 ctxt->sax->error(ctxt->userData,
4176 "Space required after the attribute name\n");
4177 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004178 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004179 if (attrName != NULL)
4180 xmlFree(attrName);
4181 if (defaultValue != NULL)
4182 xmlFree(defaultValue);
4183 break;
4184 }
4185 SKIP_BLANKS;
4186
4187 type = xmlParseAttributeType(ctxt, &tree);
4188 if (type <= 0) {
4189 if (attrName != NULL)
4190 xmlFree(attrName);
4191 if (defaultValue != NULL)
4192 xmlFree(defaultValue);
4193 break;
4194 }
4195
4196 GROW;
4197 if (!IS_BLANK(CUR)) {
4198 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4200 ctxt->sax->error(ctxt->userData,
4201 "Space required after the attribute type\n");
4202 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004203 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004204 if (attrName != NULL)
4205 xmlFree(attrName);
4206 if (defaultValue != NULL)
4207 xmlFree(defaultValue);
4208 if (tree != NULL)
4209 xmlFreeEnumeration(tree);
4210 break;
4211 }
4212 SKIP_BLANKS;
4213
4214 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4215 if (def <= 0) {
4216 if (attrName != NULL)
4217 xmlFree(attrName);
4218 if (defaultValue != NULL)
4219 xmlFree(defaultValue);
4220 if (tree != NULL)
4221 xmlFreeEnumeration(tree);
4222 break;
4223 }
4224
4225 GROW;
4226 if (RAW != '>') {
4227 if (!IS_BLANK(CUR)) {
4228 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4230 ctxt->sax->error(ctxt->userData,
4231 "Space required after the attribute default value\n");
4232 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004233 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004234 if (attrName != NULL)
4235 xmlFree(attrName);
4236 if (defaultValue != NULL)
4237 xmlFree(defaultValue);
4238 if (tree != NULL)
4239 xmlFreeEnumeration(tree);
4240 break;
4241 }
4242 SKIP_BLANKS;
4243 }
4244 if (check == CUR_PTR) {
4245 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4247 ctxt->sax->error(ctxt->userData,
4248 "xmlParseAttributeListDecl: detected internal error\n");
4249 if (attrName != NULL)
4250 xmlFree(attrName);
4251 if (defaultValue != NULL)
4252 xmlFree(defaultValue);
4253 if (tree != NULL)
4254 xmlFreeEnumeration(tree);
4255 break;
4256 }
4257 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4258 (ctxt->sax->attributeDecl != NULL))
4259 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4260 type, def, defaultValue, tree);
4261 if (attrName != NULL)
4262 xmlFree(attrName);
4263 if (defaultValue != NULL)
4264 xmlFree(defaultValue);
4265 GROW;
4266 }
4267 if (RAW == '>') {
4268 if (input != ctxt->input) {
4269 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4271 ctxt->sax->error(ctxt->userData,
4272"Attribute list declaration doesn't start and stop in the same entity\n");
4273 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004274 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004275 }
4276 NEXT;
4277 }
4278
4279 xmlFree(elemName);
4280 }
4281}
4282
4283/**
4284 * xmlParseElementMixedContentDecl:
4285 * @ctxt: an XML parser context
4286 *
4287 * parse the declaration for a Mixed Element content
4288 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4289 *
4290 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4291 * '(' S? '#PCDATA' S? ')'
4292 *
4293 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4294 *
4295 * [ VC: No Duplicate Types ]
4296 * The same name must not appear more than once in a single
4297 * mixed-content declaration.
4298 *
4299 * returns: the list of the xmlElementContentPtr describing the element choices
4300 */
4301xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004302xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004303 xmlElementContentPtr ret = NULL, cur = NULL, n;
4304 xmlChar *elem = NULL;
4305
4306 GROW;
4307 if ((RAW == '#') && (NXT(1) == 'P') &&
4308 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4309 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4310 (NXT(6) == 'A')) {
4311 SKIP(7);
4312 SKIP_BLANKS;
4313 SHRINK;
4314 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004315 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4316 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4317 if (ctxt->vctxt.error != NULL)
4318 ctxt->vctxt.error(ctxt->vctxt.userData,
4319"Element content declaration doesn't start and stop in the same entity\n");
4320 ctxt->valid = 0;
4321 }
Owen Taylor3473f882001-02-23 17:55:21 +00004322 NEXT;
4323 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4324 if (RAW == '*') {
4325 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4326 NEXT;
4327 }
4328 return(ret);
4329 }
4330 if ((RAW == '(') || (RAW == '|')) {
4331 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4332 if (ret == NULL) return(NULL);
4333 }
4334 while (RAW == '|') {
4335 NEXT;
4336 if (elem == NULL) {
4337 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4338 if (ret == NULL) return(NULL);
4339 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004340 if (cur != NULL)
4341 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004342 cur = ret;
4343 } else {
4344 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4345 if (n == NULL) return(NULL);
4346 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004347 if (n->c1 != NULL)
4348 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004349 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004350 if (n != NULL)
4351 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004352 cur = n;
4353 xmlFree(elem);
4354 }
4355 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004356 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004357 if (elem == NULL) {
4358 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4360 ctxt->sax->error(ctxt->userData,
4361 "xmlParseElementMixedContentDecl : Name expected\n");
4362 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004363 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004364 xmlFreeElementContent(cur);
4365 return(NULL);
4366 }
4367 SKIP_BLANKS;
4368 GROW;
4369 }
4370 if ((RAW == ')') && (NXT(1) == '*')) {
4371 if (elem != NULL) {
4372 cur->c2 = xmlNewElementContent(elem,
4373 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004374 if (cur->c2 != NULL)
4375 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004376 xmlFree(elem);
4377 }
4378 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004379 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4380 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4381 if (ctxt->vctxt.error != NULL)
4382 ctxt->vctxt.error(ctxt->vctxt.userData,
4383"Element content declaration doesn't start and stop in the same entity\n");
4384 ctxt->valid = 0;
4385 }
Owen Taylor3473f882001-02-23 17:55:21 +00004386 SKIP(2);
4387 } else {
4388 if (elem != NULL) xmlFree(elem);
4389 xmlFreeElementContent(ret);
4390 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4392 ctxt->sax->error(ctxt->userData,
4393 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4394 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004395 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004396 return(NULL);
4397 }
4398
4399 } else {
4400 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4402 ctxt->sax->error(ctxt->userData,
4403 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4404 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004405 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004406 }
4407 return(ret);
4408}
4409
4410/**
4411 * xmlParseElementChildrenContentDecl:
4412 * @ctxt: an XML parser context
4413 *
4414 * parse the declaration for a Mixed Element content
4415 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4416 *
4417 *
4418 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4419 *
4420 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4421 *
4422 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4423 *
4424 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4425 *
4426 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4427 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004428 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004429 * opening or closing parentheses in a choice, seq, or Mixed
4430 * construct is contained in the replacement text for a parameter
4431 * entity, both must be contained in the same replacement text. For
4432 * interoperability, if a parameter-entity reference appears in a
4433 * choice, seq, or Mixed construct, its replacement text should not
4434 * be empty, and neither the first nor last non-blank character of
4435 * the replacement text should be a connector (| or ,).
4436 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004437 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004438 * hierarchy.
4439 */
4440xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004441xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004442(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004443 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4444 xmlChar *elem;
4445 xmlChar type = 0;
4446
4447 SKIP_BLANKS;
4448 GROW;
4449 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004450 xmlParserInputPtr input = ctxt->input;
4451
Owen Taylor3473f882001-02-23 17:55:21 +00004452 /* Recurse on first child */
4453 NEXT;
4454 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004455 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004456 SKIP_BLANKS;
4457 GROW;
4458 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004459 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004460 if (elem == NULL) {
4461 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4463 ctxt->sax->error(ctxt->userData,
4464 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4465 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004466 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004467 return(NULL);
4468 }
4469 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4470 GROW;
4471 if (RAW == '?') {
4472 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4473 NEXT;
4474 } else if (RAW == '*') {
4475 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4476 NEXT;
4477 } else if (RAW == '+') {
4478 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4479 NEXT;
4480 } else {
4481 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4482 }
4483 xmlFree(elem);
4484 GROW;
4485 }
4486 SKIP_BLANKS;
4487 SHRINK;
4488 while (RAW != ')') {
4489 /*
4490 * Each loop we parse one separator and one element.
4491 */
4492 if (RAW == ',') {
4493 if (type == 0) type = CUR;
4494
4495 /*
4496 * Detect "Name | Name , Name" error
4497 */
4498 else if (type != CUR) {
4499 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4501 ctxt->sax->error(ctxt->userData,
4502 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4503 type);
4504 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004505 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004506 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004507 xmlFreeElementContent(last);
4508 if (ret != NULL)
4509 xmlFreeElementContent(ret);
4510 return(NULL);
4511 }
4512 NEXT;
4513
4514 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4515 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004516 if ((last != NULL) && (last != ret))
4517 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004518 xmlFreeElementContent(ret);
4519 return(NULL);
4520 }
4521 if (last == NULL) {
4522 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004523 if (ret != NULL)
4524 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004525 ret = cur = op;
4526 } else {
4527 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004528 if (op != NULL)
4529 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004530 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004531 if (last != NULL)
4532 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004533 cur =op;
4534 last = NULL;
4535 }
4536 } else if (RAW == '|') {
4537 if (type == 0) type = CUR;
4538
4539 /*
4540 * Detect "Name , Name | Name" error
4541 */
4542 else if (type != CUR) {
4543 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4545 ctxt->sax->error(ctxt->userData,
4546 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4547 type);
4548 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004549 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004550 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004551 xmlFreeElementContent(last);
4552 if (ret != NULL)
4553 xmlFreeElementContent(ret);
4554 return(NULL);
4555 }
4556 NEXT;
4557
4558 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4559 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004560 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004561 xmlFreeElementContent(last);
4562 if (ret != NULL)
4563 xmlFreeElementContent(ret);
4564 return(NULL);
4565 }
4566 if (last == NULL) {
4567 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004568 if (ret != NULL)
4569 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004570 ret = cur = op;
4571 } else {
4572 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004573 if (op != NULL)
4574 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004575 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004576 if (last != NULL)
4577 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004578 cur =op;
4579 last = NULL;
4580 }
4581 } else {
4582 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4584 ctxt->sax->error(ctxt->userData,
4585 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4586 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004587 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004588 if (ret != NULL)
4589 xmlFreeElementContent(ret);
4590 return(NULL);
4591 }
4592 GROW;
4593 SKIP_BLANKS;
4594 GROW;
4595 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004596 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004597 /* Recurse on second child */
4598 NEXT;
4599 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004600 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004601 SKIP_BLANKS;
4602 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004603 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004604 if (elem == NULL) {
4605 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4607 ctxt->sax->error(ctxt->userData,
4608 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4609 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004610 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004611 if (ret != NULL)
4612 xmlFreeElementContent(ret);
4613 return(NULL);
4614 }
4615 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4616 xmlFree(elem);
4617 if (RAW == '?') {
4618 last->ocur = XML_ELEMENT_CONTENT_OPT;
4619 NEXT;
4620 } else if (RAW == '*') {
4621 last->ocur = XML_ELEMENT_CONTENT_MULT;
4622 NEXT;
4623 } else if (RAW == '+') {
4624 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4625 NEXT;
4626 } else {
4627 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4628 }
4629 }
4630 SKIP_BLANKS;
4631 GROW;
4632 }
4633 if ((cur != NULL) && (last != NULL)) {
4634 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004635 if (last != NULL)
4636 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004637 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004638 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4639 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4640 if (ctxt->vctxt.error != NULL)
4641 ctxt->vctxt.error(ctxt->vctxt.userData,
4642"Element content declaration doesn't start and stop in the same entity\n");
4643 ctxt->valid = 0;
4644 }
Owen Taylor3473f882001-02-23 17:55:21 +00004645 NEXT;
4646 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004647 if (ret != NULL)
4648 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004649 NEXT;
4650 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004651 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004652 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004653 cur = ret;
4654 /*
4655 * Some normalization:
4656 * (a | b* | c?)* == (a | b | c)*
4657 */
4658 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4659 if ((cur->c1 != NULL) &&
4660 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4661 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4662 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4663 if ((cur->c2 != NULL) &&
4664 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4665 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4666 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4667 cur = cur->c2;
4668 }
4669 }
Owen Taylor3473f882001-02-23 17:55:21 +00004670 NEXT;
4671 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004672 if (ret != NULL) {
4673 int found = 0;
4674
Daniel Veillarde470df72001-04-18 21:41:07 +00004675 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004676 /*
4677 * Some normalization:
4678 * (a | b*)+ == (a | b)*
4679 * (a | b?)+ == (a | b)*
4680 */
4681 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4682 if ((cur->c1 != NULL) &&
4683 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4684 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4685 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4686 found = 1;
4687 }
4688 if ((cur->c2 != NULL) &&
4689 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4690 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4691 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4692 found = 1;
4693 }
4694 cur = cur->c2;
4695 }
4696 if (found)
4697 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4698 }
Owen Taylor3473f882001-02-23 17:55:21 +00004699 NEXT;
4700 }
4701 return(ret);
4702}
4703
4704/**
4705 * xmlParseElementContentDecl:
4706 * @ctxt: an XML parser context
4707 * @name: the name of the element being defined.
4708 * @result: the Element Content pointer will be stored here if any
4709 *
4710 * parse the declaration for an Element content either Mixed or Children,
4711 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4712 *
4713 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4714 *
4715 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4716 */
4717
4718int
4719xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4720 xmlElementContentPtr *result) {
4721
4722 xmlElementContentPtr tree = NULL;
4723 xmlParserInputPtr input = ctxt->input;
4724 int res;
4725
4726 *result = NULL;
4727
4728 if (RAW != '(') {
4729 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4731 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004732 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004733 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004734 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004735 return(-1);
4736 }
4737 NEXT;
4738 GROW;
4739 SKIP_BLANKS;
4740 if ((RAW == '#') && (NXT(1) == 'P') &&
4741 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4742 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4743 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004744 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004745 res = XML_ELEMENT_TYPE_MIXED;
4746 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004747 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004748 res = XML_ELEMENT_TYPE_ELEMENT;
4749 }
Owen Taylor3473f882001-02-23 17:55:21 +00004750 SKIP_BLANKS;
4751 *result = tree;
4752 return(res);
4753}
4754
4755/**
4756 * xmlParseElementDecl:
4757 * @ctxt: an XML parser context
4758 *
4759 * parse an Element declaration.
4760 *
4761 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4762 *
4763 * [ VC: Unique Element Type Declaration ]
4764 * No element type may be declared more than once
4765 *
4766 * Returns the type of the element, or -1 in case of error
4767 */
4768int
4769xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4770 xmlChar *name;
4771 int ret = -1;
4772 xmlElementContentPtr content = NULL;
4773
4774 GROW;
4775 if ((RAW == '<') && (NXT(1) == '!') &&
4776 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4777 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4778 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4779 (NXT(8) == 'T')) {
4780 xmlParserInputPtr input = ctxt->input;
4781
4782 SKIP(9);
4783 if (!IS_BLANK(CUR)) {
4784 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4786 ctxt->sax->error(ctxt->userData,
4787 "Space required after 'ELEMENT'\n");
4788 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004789 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004790 }
4791 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004792 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004793 if (name == NULL) {
4794 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4796 ctxt->sax->error(ctxt->userData,
4797 "xmlParseElementDecl: no name for Element\n");
4798 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004799 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004800 return(-1);
4801 }
4802 while ((RAW == 0) && (ctxt->inputNr > 1))
4803 xmlPopInput(ctxt);
4804 if (!IS_BLANK(CUR)) {
4805 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4807 ctxt->sax->error(ctxt->userData,
4808 "Space required after the element name\n");
4809 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004810 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004811 }
4812 SKIP_BLANKS;
4813 if ((RAW == 'E') && (NXT(1) == 'M') &&
4814 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4815 (NXT(4) == 'Y')) {
4816 SKIP(5);
4817 /*
4818 * Element must always be empty.
4819 */
4820 ret = XML_ELEMENT_TYPE_EMPTY;
4821 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4822 (NXT(2) == 'Y')) {
4823 SKIP(3);
4824 /*
4825 * Element is a generic container.
4826 */
4827 ret = XML_ELEMENT_TYPE_ANY;
4828 } else if (RAW == '(') {
4829 ret = xmlParseElementContentDecl(ctxt, name, &content);
4830 } else {
4831 /*
4832 * [ WFC: PEs in Internal Subset ] error handling.
4833 */
4834 if ((RAW == '%') && (ctxt->external == 0) &&
4835 (ctxt->inputNr == 1)) {
4836 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4838 ctxt->sax->error(ctxt->userData,
4839 "PEReference: forbidden within markup decl in internal subset\n");
4840 } else {
4841 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4843 ctxt->sax->error(ctxt->userData,
4844 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4845 }
4846 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004847 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004848 if (name != NULL) xmlFree(name);
4849 return(-1);
4850 }
4851
4852 SKIP_BLANKS;
4853 /*
4854 * Pop-up of finished entities.
4855 */
4856 while ((RAW == 0) && (ctxt->inputNr > 1))
4857 xmlPopInput(ctxt);
4858 SKIP_BLANKS;
4859
4860 if (RAW != '>') {
4861 ctxt->errNo = XML_ERR_GT_REQUIRED;
4862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4863 ctxt->sax->error(ctxt->userData,
4864 "xmlParseElementDecl: expected '>' at the end\n");
4865 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004866 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004867 } else {
4868 if (input != ctxt->input) {
4869 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4871 ctxt->sax->error(ctxt->userData,
4872"Element declaration doesn't start and stop in the same entity\n");
4873 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004874 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004875 }
4876
4877 NEXT;
4878 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4879 (ctxt->sax->elementDecl != NULL))
4880 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4881 content);
4882 }
4883 if (content != NULL) {
4884 xmlFreeElementContent(content);
4885 }
4886 if (name != NULL) {
4887 xmlFree(name);
4888 }
4889 }
4890 return(ret);
4891}
4892
4893/**
Owen Taylor3473f882001-02-23 17:55:21 +00004894 * xmlParseConditionalSections
4895 * @ctxt: an XML parser context
4896 *
4897 * [61] conditionalSect ::= includeSect | ignoreSect
4898 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4899 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4900 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4901 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4902 */
4903
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004904static void
Owen Taylor3473f882001-02-23 17:55:21 +00004905xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4906 SKIP(3);
4907 SKIP_BLANKS;
4908 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4909 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4910 (NXT(6) == 'E')) {
4911 SKIP(7);
4912 SKIP_BLANKS;
4913 if (RAW != '[') {
4914 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4916 ctxt->sax->error(ctxt->userData,
4917 "XML conditional section '[' expected\n");
4918 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004919 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004920 } else {
4921 NEXT;
4922 }
4923 if (xmlParserDebugEntities) {
4924 if ((ctxt->input != NULL) && (ctxt->input->filename))
4925 xmlGenericError(xmlGenericErrorContext,
4926 "%s(%d): ", ctxt->input->filename,
4927 ctxt->input->line);
4928 xmlGenericError(xmlGenericErrorContext,
4929 "Entering INCLUDE Conditional Section\n");
4930 }
4931
4932 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4933 (NXT(2) != '>'))) {
4934 const xmlChar *check = CUR_PTR;
4935 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00004936
4937 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4938 xmlParseConditionalSections(ctxt);
4939 } else if (IS_BLANK(CUR)) {
4940 NEXT;
4941 } else if (RAW == '%') {
4942 xmlParsePEReference(ctxt);
4943 } else
4944 xmlParseMarkupDecl(ctxt);
4945
4946 /*
4947 * Pop-up of finished entities.
4948 */
4949 while ((RAW == 0) && (ctxt->inputNr > 1))
4950 xmlPopInput(ctxt);
4951
Daniel Veillardfdc91562002-07-01 21:52:03 +00004952 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004953 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4955 ctxt->sax->error(ctxt->userData,
4956 "Content error in the external subset\n");
4957 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004958 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004959 break;
4960 }
4961 }
4962 if (xmlParserDebugEntities) {
4963 if ((ctxt->input != NULL) && (ctxt->input->filename))
4964 xmlGenericError(xmlGenericErrorContext,
4965 "%s(%d): ", ctxt->input->filename,
4966 ctxt->input->line);
4967 xmlGenericError(xmlGenericErrorContext,
4968 "Leaving INCLUDE Conditional Section\n");
4969 }
4970
4971 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4972 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4973 int state;
4974 int instate;
4975 int depth = 0;
4976
4977 SKIP(6);
4978 SKIP_BLANKS;
4979 if (RAW != '[') {
4980 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4981 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4982 ctxt->sax->error(ctxt->userData,
4983 "XML conditional section '[' expected\n");
4984 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004985 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004986 } else {
4987 NEXT;
4988 }
4989 if (xmlParserDebugEntities) {
4990 if ((ctxt->input != NULL) && (ctxt->input->filename))
4991 xmlGenericError(xmlGenericErrorContext,
4992 "%s(%d): ", ctxt->input->filename,
4993 ctxt->input->line);
4994 xmlGenericError(xmlGenericErrorContext,
4995 "Entering IGNORE Conditional Section\n");
4996 }
4997
4998 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004999 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005000 * But disable SAX event generating DTD building in the meantime
5001 */
5002 state = ctxt->disableSAX;
5003 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005004 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005005 ctxt->instate = XML_PARSER_IGNORE;
5006
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005007 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005008 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5009 depth++;
5010 SKIP(3);
5011 continue;
5012 }
5013 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5014 if (--depth >= 0) SKIP(3);
5015 continue;
5016 }
5017 NEXT;
5018 continue;
5019 }
5020
5021 ctxt->disableSAX = state;
5022 ctxt->instate = instate;
5023
5024 if (xmlParserDebugEntities) {
5025 if ((ctxt->input != NULL) && (ctxt->input->filename))
5026 xmlGenericError(xmlGenericErrorContext,
5027 "%s(%d): ", ctxt->input->filename,
5028 ctxt->input->line);
5029 xmlGenericError(xmlGenericErrorContext,
5030 "Leaving IGNORE Conditional Section\n");
5031 }
5032
5033 } else {
5034 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5036 ctxt->sax->error(ctxt->userData,
5037 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5038 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005039 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005040 }
5041
5042 if (RAW == 0)
5043 SHRINK;
5044
5045 if (RAW == 0) {
5046 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5048 ctxt->sax->error(ctxt->userData,
5049 "XML conditional section not closed\n");
5050 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005051 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005052 } else {
5053 SKIP(3);
5054 }
5055}
5056
5057/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005058 * xmlParseMarkupDecl:
5059 * @ctxt: an XML parser context
5060 *
5061 * parse Markup declarations
5062 *
5063 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5064 * NotationDecl | PI | Comment
5065 *
5066 * [ VC: Proper Declaration/PE Nesting ]
5067 * Parameter-entity replacement text must be properly nested with
5068 * markup declarations. That is to say, if either the first character
5069 * or the last character of a markup declaration (markupdecl above) is
5070 * contained in the replacement text for a parameter-entity reference,
5071 * both must be contained in the same replacement text.
5072 *
5073 * [ WFC: PEs in Internal Subset ]
5074 * In the internal DTD subset, parameter-entity references can occur
5075 * only where markup declarations can occur, not within markup declarations.
5076 * (This does not apply to references that occur in external parameter
5077 * entities or to the external subset.)
5078 */
5079void
5080xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5081 GROW;
5082 xmlParseElementDecl(ctxt);
5083 xmlParseAttributeListDecl(ctxt);
5084 xmlParseEntityDecl(ctxt);
5085 xmlParseNotationDecl(ctxt);
5086 xmlParsePI(ctxt);
5087 xmlParseComment(ctxt);
5088 /*
5089 * This is only for internal subset. On external entities,
5090 * the replacement is done before parsing stage
5091 */
5092 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5093 xmlParsePEReference(ctxt);
5094
5095 /*
5096 * Conditional sections are allowed from entities included
5097 * by PE References in the internal subset.
5098 */
5099 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5100 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5101 xmlParseConditionalSections(ctxt);
5102 }
5103 }
5104
5105 ctxt->instate = XML_PARSER_DTD;
5106}
5107
5108/**
5109 * xmlParseTextDecl:
5110 * @ctxt: an XML parser context
5111 *
5112 * parse an XML declaration header for external entities
5113 *
5114 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5115 *
5116 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5117 */
5118
5119void
5120xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5121 xmlChar *version;
5122
5123 /*
5124 * We know that '<?xml' is here.
5125 */
5126 if ((RAW == '<') && (NXT(1) == '?') &&
5127 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5128 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5129 SKIP(5);
5130 } else {
5131 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5133 ctxt->sax->error(ctxt->userData,
5134 "Text declaration '<?xml' required\n");
5135 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005136 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005137
5138 return;
5139 }
5140
5141 if (!IS_BLANK(CUR)) {
5142 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5144 ctxt->sax->error(ctxt->userData,
5145 "Space needed after '<?xml'\n");
5146 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005147 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005148 }
5149 SKIP_BLANKS;
5150
5151 /*
5152 * We may have the VersionInfo here.
5153 */
5154 version = xmlParseVersionInfo(ctxt);
5155 if (version == NULL)
5156 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005157 else {
5158 if (!IS_BLANK(CUR)) {
5159 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5161 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5162 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005163 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005164 }
5165 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005166 ctxt->input->version = version;
5167
5168 /*
5169 * We must have the encoding declaration
5170 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005171 xmlParseEncodingDecl(ctxt);
5172 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5173 /*
5174 * The XML REC instructs us to stop parsing right here
5175 */
5176 return;
5177 }
5178
5179 SKIP_BLANKS;
5180 if ((RAW == '?') && (NXT(1) == '>')) {
5181 SKIP(2);
5182 } else if (RAW == '>') {
5183 /* Deprecated old WD ... */
5184 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5186 ctxt->sax->error(ctxt->userData,
5187 "XML declaration must end-up with '?>'\n");
5188 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005189 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005190 NEXT;
5191 } else {
5192 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5194 ctxt->sax->error(ctxt->userData,
5195 "parsing XML declaration: '?>' expected\n");
5196 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005197 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005198 MOVETO_ENDTAG(CUR_PTR);
5199 NEXT;
5200 }
5201}
5202
5203/**
Owen Taylor3473f882001-02-23 17:55:21 +00005204 * xmlParseExternalSubset:
5205 * @ctxt: an XML parser context
5206 * @ExternalID: the external identifier
5207 * @SystemID: the system identifier (or URL)
5208 *
5209 * parse Markup declarations from an external subset
5210 *
5211 * [30] extSubset ::= textDecl? extSubsetDecl
5212 *
5213 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5214 */
5215void
5216xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5217 const xmlChar *SystemID) {
5218 GROW;
5219 if ((RAW == '<') && (NXT(1) == '?') &&
5220 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5221 (NXT(4) == 'l')) {
5222 xmlParseTextDecl(ctxt);
5223 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5224 /*
5225 * The XML REC instructs us to stop parsing right here
5226 */
5227 ctxt->instate = XML_PARSER_EOF;
5228 return;
5229 }
5230 }
5231 if (ctxt->myDoc == NULL) {
5232 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5233 }
5234 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5235 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5236
5237 ctxt->instate = XML_PARSER_DTD;
5238 ctxt->external = 1;
5239 while (((RAW == '<') && (NXT(1) == '?')) ||
5240 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005241 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005242 const xmlChar *check = CUR_PTR;
5243 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005244
5245 GROW;
5246 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5247 xmlParseConditionalSections(ctxt);
5248 } else if (IS_BLANK(CUR)) {
5249 NEXT;
5250 } else if (RAW == '%') {
5251 xmlParsePEReference(ctxt);
5252 } else
5253 xmlParseMarkupDecl(ctxt);
5254
5255 /*
5256 * Pop-up of finished entities.
5257 */
5258 while ((RAW == 0) && (ctxt->inputNr > 1))
5259 xmlPopInput(ctxt);
5260
Daniel Veillardfdc91562002-07-01 21:52:03 +00005261 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005262 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5264 ctxt->sax->error(ctxt->userData,
5265 "Content error in the external subset\n");
5266 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005267 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005268 break;
5269 }
5270 }
5271
5272 if (RAW != 0) {
5273 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5275 ctxt->sax->error(ctxt->userData,
5276 "Extra content at the end of the document\n");
5277 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005278 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005279 }
5280
5281}
5282
5283/**
5284 * xmlParseReference:
5285 * @ctxt: an XML parser context
5286 *
5287 * parse and handle entity references in content, depending on the SAX
5288 * interface, this may end-up in a call to character() if this is a
5289 * CharRef, a predefined entity, if there is no reference() callback.
5290 * or if the parser was asked to switch to that mode.
5291 *
5292 * [67] Reference ::= EntityRef | CharRef
5293 */
5294void
5295xmlParseReference(xmlParserCtxtPtr ctxt) {
5296 xmlEntityPtr ent;
5297 xmlChar *val;
5298 if (RAW != '&') return;
5299
5300 if (NXT(1) == '#') {
5301 int i = 0;
5302 xmlChar out[10];
5303 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005304 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005305
5306 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5307 /*
5308 * So we are using non-UTF-8 buffers
5309 * Check that the char fit on 8bits, if not
5310 * generate a CharRef.
5311 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005312 if (value <= 0xFF) {
5313 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005314 out[1] = 0;
5315 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5316 (!ctxt->disableSAX))
5317 ctxt->sax->characters(ctxt->userData, out, 1);
5318 } else {
5319 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005320 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005321 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005322 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005323 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5324 (!ctxt->disableSAX))
5325 ctxt->sax->reference(ctxt->userData, out);
5326 }
5327 } else {
5328 /*
5329 * Just encode the value in UTF-8
5330 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005331 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005332 out[i] = 0;
5333 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5334 (!ctxt->disableSAX))
5335 ctxt->sax->characters(ctxt->userData, out, i);
5336 }
5337 } else {
5338 ent = xmlParseEntityRef(ctxt);
5339 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005340 if (!ctxt->wellFormed)
5341 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005342 if ((ent->name != NULL) &&
5343 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5344 xmlNodePtr list = NULL;
5345 int ret;
5346
5347
5348 /*
5349 * The first reference to the entity trigger a parsing phase
5350 * where the ent->children is filled with the result from
5351 * the parsing.
5352 */
5353 if (ent->children == NULL) {
5354 xmlChar *value;
5355 value = ent->content;
5356
5357 /*
5358 * Check that this entity is well formed
5359 */
5360 if ((value != NULL) &&
5361 (value[1] == 0) && (value[0] == '<') &&
5362 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5363 /*
5364 * DONE: get definite answer on this !!!
5365 * Lots of entity decls are used to declare a single
5366 * char
5367 * <!ENTITY lt "<">
5368 * Which seems to be valid since
5369 * 2.4: The ampersand character (&) and the left angle
5370 * bracket (<) may appear in their literal form only
5371 * when used ... They are also legal within the literal
5372 * entity value of an internal entity declaration;i
5373 * see "4.3.2 Well-Formed Parsed Entities".
5374 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5375 * Looking at the OASIS test suite and James Clark
5376 * tests, this is broken. However the XML REC uses
5377 * it. Is the XML REC not well-formed ????
5378 * This is a hack to avoid this problem
5379 *
5380 * ANSWER: since lt gt amp .. are already defined,
5381 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005382 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005383 * is lousy but acceptable.
5384 */
5385 list = xmlNewDocText(ctxt->myDoc, value);
5386 if (list != NULL) {
5387 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5388 (ent->children == NULL)) {
5389 ent->children = list;
5390 ent->last = list;
5391 list->parent = (xmlNodePtr) ent;
5392 } else {
5393 xmlFreeNodeList(list);
5394 }
5395 } else if (list != NULL) {
5396 xmlFreeNodeList(list);
5397 }
5398 } else {
5399 /*
5400 * 4.3.2: An internal general parsed entity is well-formed
5401 * if its replacement text matches the production labeled
5402 * content.
5403 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005404
5405 void *user_data;
5406 /*
5407 * This is a bit hackish but this seems the best
5408 * way to make sure both SAX and DOM entity support
5409 * behaves okay.
5410 */
5411 if (ctxt->userData == ctxt)
5412 user_data = NULL;
5413 else
5414 user_data = ctxt->userData;
5415
Owen Taylor3473f882001-02-23 17:55:21 +00005416 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5417 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005418 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5419 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 ctxt->depth--;
5421 } else if (ent->etype ==
5422 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5423 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005424 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005425 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005426 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005427 ctxt->depth--;
5428 } else {
5429 ret = -1;
5430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5431 ctxt->sax->error(ctxt->userData,
5432 "Internal: invalid entity type\n");
5433 }
5434 if (ret == XML_ERR_ENTITY_LOOP) {
5435 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5437 ctxt->sax->error(ctxt->userData,
5438 "Detected entity reference loop\n");
5439 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005440 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005441 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005442 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005443 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5444 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005445 (ent->children == NULL)) {
5446 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005447 if (ctxt->replaceEntities) {
5448 /*
5449 * Prune it directly in the generated document
5450 * except for single text nodes.
5451 */
5452 if ((list->type == XML_TEXT_NODE) &&
5453 (list->next == NULL)) {
5454 list->parent = (xmlNodePtr) ent;
5455 list = NULL;
5456 } else {
5457 while (list != NULL) {
5458 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005459 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005460 if (list->next == NULL)
5461 ent->last = list;
5462 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005463 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005464 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005465 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5466 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005467 }
5468 } else {
5469 while (list != NULL) {
5470 list->parent = (xmlNodePtr) ent;
5471 if (list->next == NULL)
5472 ent->last = list;
5473 list = list->next;
5474 }
Owen Taylor3473f882001-02-23 17:55:21 +00005475 }
5476 } else {
5477 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005478 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005479 }
5480 } else if (ret > 0) {
5481 ctxt->errNo = ret;
5482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5483 ctxt->sax->error(ctxt->userData,
5484 "Entity value required\n");
5485 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005486 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005487 } else if (list != NULL) {
5488 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005489 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005490 }
5491 }
5492 }
5493 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5494 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5495 /*
5496 * Create a node.
5497 */
5498 ctxt->sax->reference(ctxt->userData, ent->name);
5499 return;
5500 } else if (ctxt->replaceEntities) {
5501 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5502 /*
5503 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005504 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005505 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005506 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005507 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005508 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005509 cur = ent->children;
5510 while (cur != NULL) {
5511 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005512 if (firstChild == NULL){
5513 firstChild = new;
5514 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005515 xmlAddChild(ctxt->node, new);
5516 if (cur == ent->last)
5517 break;
5518 cur = cur->next;
5519 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005520 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5521 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005522 } else {
5523 /*
5524 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005525 * node with a possible previous text one which
5526 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005527 */
5528 if (ent->children->type == XML_TEXT_NODE)
5529 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5530 if ((ent->last != ent->children) &&
5531 (ent->last->type == XML_TEXT_NODE))
5532 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5533 xmlAddChildList(ctxt->node, ent->children);
5534 }
5535
Owen Taylor3473f882001-02-23 17:55:21 +00005536 /*
5537 * This is to avoid a nasty side effect, see
5538 * characters() in SAX.c
5539 */
5540 ctxt->nodemem = 0;
5541 ctxt->nodelen = 0;
5542 return;
5543 } else {
5544 /*
5545 * Probably running in SAX mode
5546 */
5547 xmlParserInputPtr input;
5548
5549 input = xmlNewEntityInputStream(ctxt, ent);
5550 xmlPushInput(ctxt, input);
5551 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5552 (RAW == '<') && (NXT(1) == '?') &&
5553 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5554 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5555 xmlParseTextDecl(ctxt);
5556 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5557 /*
5558 * The XML REC instructs us to stop parsing right here
5559 */
5560 ctxt->instate = XML_PARSER_EOF;
5561 return;
5562 }
5563 if (input->standalone == 1) {
5564 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5566 ctxt->sax->error(ctxt->userData,
5567 "external parsed entities cannot be standalone\n");
5568 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005569 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005570 }
5571 }
5572 return;
5573 }
5574 }
5575 } else {
5576 val = ent->content;
5577 if (val == NULL) return;
5578 /*
5579 * inline the entity.
5580 */
5581 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5582 (!ctxt->disableSAX))
5583 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5584 }
5585 }
5586}
5587
5588/**
5589 * xmlParseEntityRef:
5590 * @ctxt: an XML parser context
5591 *
5592 * parse ENTITY references declarations
5593 *
5594 * [68] EntityRef ::= '&' Name ';'
5595 *
5596 * [ WFC: Entity Declared ]
5597 * In a document without any DTD, a document with only an internal DTD
5598 * subset which contains no parameter entity references, or a document
5599 * with "standalone='yes'", the Name given in the entity reference
5600 * must match that in an entity declaration, except that well-formed
5601 * documents need not declare any of the following entities: amp, lt,
5602 * gt, apos, quot. The declaration of a parameter entity must precede
5603 * any reference to it. Similarly, the declaration of a general entity
5604 * must precede any reference to it which appears in a default value in an
5605 * attribute-list declaration. Note that if entities are declared in the
5606 * external subset or in external parameter entities, a non-validating
5607 * processor is not obligated to read and process their declarations;
5608 * for such documents, the rule that an entity must be declared is a
5609 * well-formedness constraint only if standalone='yes'.
5610 *
5611 * [ WFC: Parsed Entity ]
5612 * An entity reference must not contain the name of an unparsed entity
5613 *
5614 * Returns the xmlEntityPtr if found, or NULL otherwise.
5615 */
5616xmlEntityPtr
5617xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5618 xmlChar *name;
5619 xmlEntityPtr ent = NULL;
5620
5621 GROW;
5622
5623 if (RAW == '&') {
5624 NEXT;
5625 name = xmlParseName(ctxt);
5626 if (name == NULL) {
5627 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5629 ctxt->sax->error(ctxt->userData,
5630 "xmlParseEntityRef: no name\n");
5631 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005632 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005633 } else {
5634 if (RAW == ';') {
5635 NEXT;
5636 /*
5637 * Ask first SAX for entity resolution, otherwise try the
5638 * predefined set.
5639 */
5640 if (ctxt->sax != NULL) {
5641 if (ctxt->sax->getEntity != NULL)
5642 ent = ctxt->sax->getEntity(ctxt->userData, name);
5643 if (ent == NULL)
5644 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005645 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5646 ent = getEntity(ctxt, name);
5647 }
Owen Taylor3473f882001-02-23 17:55:21 +00005648 }
5649 /*
5650 * [ WFC: Entity Declared ]
5651 * In a document without any DTD, a document with only an
5652 * internal DTD subset which contains no parameter entity
5653 * references, or a document with "standalone='yes'", the
5654 * Name given in the entity reference must match that in an
5655 * entity declaration, except that well-formed documents
5656 * need not declare any of the following entities: amp, lt,
5657 * gt, apos, quot.
5658 * The declaration of a parameter entity must precede any
5659 * reference to it.
5660 * Similarly, the declaration of a general entity must
5661 * precede any reference to it which appears in a default
5662 * value in an attribute-list declaration. Note that if
5663 * entities are declared in the external subset or in
5664 * external parameter entities, a non-validating processor
5665 * is not obligated to read and process their declarations;
5666 * for such documents, the rule that an entity must be
5667 * declared is a well-formedness constraint only if
5668 * standalone='yes'.
5669 */
5670 if (ent == NULL) {
5671 if ((ctxt->standalone == 1) ||
5672 ((ctxt->hasExternalSubset == 0) &&
5673 (ctxt->hasPErefs == 0))) {
5674 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5675 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5676 ctxt->sax->error(ctxt->userData,
5677 "Entity '%s' not defined\n", name);
5678 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005679 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005680 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005681 } else {
5682 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005684 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005685 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005686 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005687 }
5688 }
5689
5690 /*
5691 * [ WFC: Parsed Entity ]
5692 * An entity reference must not contain the name of an
5693 * unparsed entity
5694 */
5695 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5696 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5698 ctxt->sax->error(ctxt->userData,
5699 "Entity reference to unparsed entity %s\n", name);
5700 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005701 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005702 }
5703
5704 /*
5705 * [ WFC: No External Entity References ]
5706 * Attribute values cannot contain direct or indirect
5707 * entity references to external entities.
5708 */
5709 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5710 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5711 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5713 ctxt->sax->error(ctxt->userData,
5714 "Attribute references external entity '%s'\n", name);
5715 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005716 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005717 }
5718 /*
5719 * [ WFC: No < in Attribute Values ]
5720 * The replacement text of any entity referred to directly or
5721 * indirectly in an attribute value (other than "&lt;") must
5722 * not contain a <.
5723 */
5724 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5725 (ent != NULL) &&
5726 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5727 (ent->content != NULL) &&
5728 (xmlStrchr(ent->content, '<'))) {
5729 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5731 ctxt->sax->error(ctxt->userData,
5732 "'<' in entity '%s' is not allowed in attributes values\n", name);
5733 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005734 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005735 }
5736
5737 /*
5738 * Internal check, no parameter entities here ...
5739 */
5740 else {
5741 switch (ent->etype) {
5742 case XML_INTERNAL_PARAMETER_ENTITY:
5743 case XML_EXTERNAL_PARAMETER_ENTITY:
5744 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5746 ctxt->sax->error(ctxt->userData,
5747 "Attempt to reference the parameter entity '%s'\n", name);
5748 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005749 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005750 break;
5751 default:
5752 break;
5753 }
5754 }
5755
5756 /*
5757 * [ WFC: No Recursion ]
5758 * A parsed entity must not contain a recursive reference
5759 * to itself, either directly or indirectly.
5760 * Done somewhere else
5761 */
5762
5763 } else {
5764 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5766 ctxt->sax->error(ctxt->userData,
5767 "xmlParseEntityRef: expecting ';'\n");
5768 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005769 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005770 }
5771 xmlFree(name);
5772 }
5773 }
5774 return(ent);
5775}
5776
5777/**
5778 * xmlParseStringEntityRef:
5779 * @ctxt: an XML parser context
5780 * @str: a pointer to an index in the string
5781 *
5782 * parse ENTITY references declarations, but this version parses it from
5783 * a string value.
5784 *
5785 * [68] EntityRef ::= '&' Name ';'
5786 *
5787 * [ WFC: Entity Declared ]
5788 * In a document without any DTD, a document with only an internal DTD
5789 * subset which contains no parameter entity references, or a document
5790 * with "standalone='yes'", the Name given in the entity reference
5791 * must match that in an entity declaration, except that well-formed
5792 * documents need not declare any of the following entities: amp, lt,
5793 * gt, apos, quot. The declaration of a parameter entity must precede
5794 * any reference to it. Similarly, the declaration of a general entity
5795 * must precede any reference to it which appears in a default value in an
5796 * attribute-list declaration. Note that if entities are declared in the
5797 * external subset or in external parameter entities, a non-validating
5798 * processor is not obligated to read and process their declarations;
5799 * for such documents, the rule that an entity must be declared is a
5800 * well-formedness constraint only if standalone='yes'.
5801 *
5802 * [ WFC: Parsed Entity ]
5803 * An entity reference must not contain the name of an unparsed entity
5804 *
5805 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5806 * is updated to the current location in the string.
5807 */
5808xmlEntityPtr
5809xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5810 xmlChar *name;
5811 const xmlChar *ptr;
5812 xmlChar cur;
5813 xmlEntityPtr ent = NULL;
5814
5815 if ((str == NULL) || (*str == NULL))
5816 return(NULL);
5817 ptr = *str;
5818 cur = *ptr;
5819 if (cur == '&') {
5820 ptr++;
5821 cur = *ptr;
5822 name = xmlParseStringName(ctxt, &ptr);
5823 if (name == NULL) {
5824 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5826 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005827 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005828 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005829 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005830 } else {
5831 if (*ptr == ';') {
5832 ptr++;
5833 /*
5834 * Ask first SAX for entity resolution, otherwise try the
5835 * predefined set.
5836 */
5837 if (ctxt->sax != NULL) {
5838 if (ctxt->sax->getEntity != NULL)
5839 ent = ctxt->sax->getEntity(ctxt->userData, name);
5840 if (ent == NULL)
5841 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005842 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5843 ent = getEntity(ctxt, name);
5844 }
Owen Taylor3473f882001-02-23 17:55:21 +00005845 }
5846 /*
5847 * [ WFC: Entity Declared ]
5848 * In a document without any DTD, a document with only an
5849 * internal DTD subset which contains no parameter entity
5850 * references, or a document with "standalone='yes'", the
5851 * Name given in the entity reference must match that in an
5852 * entity declaration, except that well-formed documents
5853 * need not declare any of the following entities: amp, lt,
5854 * gt, apos, quot.
5855 * The declaration of a parameter entity must precede any
5856 * reference to it.
5857 * Similarly, the declaration of a general entity must
5858 * precede any reference to it which appears in a default
5859 * value in an attribute-list declaration. Note that if
5860 * entities are declared in the external subset or in
5861 * external parameter entities, a non-validating processor
5862 * is not obligated to read and process their declarations;
5863 * for such documents, the rule that an entity must be
5864 * declared is a well-formedness constraint only if
5865 * standalone='yes'.
5866 */
5867 if (ent == NULL) {
5868 if ((ctxt->standalone == 1) ||
5869 ((ctxt->hasExternalSubset == 0) &&
5870 (ctxt->hasPErefs == 0))) {
5871 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5873 ctxt->sax->error(ctxt->userData,
5874 "Entity '%s' not defined\n", name);
5875 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005876 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005877 } else {
5878 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5879 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5880 ctxt->sax->warning(ctxt->userData,
5881 "Entity '%s' not defined\n", name);
5882 }
5883 }
5884
5885 /*
5886 * [ WFC: Parsed Entity ]
5887 * An entity reference must not contain the name of an
5888 * unparsed entity
5889 */
5890 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5891 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5893 ctxt->sax->error(ctxt->userData,
5894 "Entity reference to unparsed entity %s\n", name);
5895 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005896 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005897 }
5898
5899 /*
5900 * [ WFC: No External Entity References ]
5901 * Attribute values cannot contain direct or indirect
5902 * entity references to external entities.
5903 */
5904 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5905 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5906 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5908 ctxt->sax->error(ctxt->userData,
5909 "Attribute references external entity '%s'\n", name);
5910 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005911 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005912 }
5913 /*
5914 * [ WFC: No < in Attribute Values ]
5915 * The replacement text of any entity referred to directly or
5916 * indirectly in an attribute value (other than "&lt;") must
5917 * not contain a <.
5918 */
5919 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5920 (ent != NULL) &&
5921 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5922 (ent->content != NULL) &&
5923 (xmlStrchr(ent->content, '<'))) {
5924 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5926 ctxt->sax->error(ctxt->userData,
5927 "'<' in entity '%s' is not allowed in attributes values\n", name);
5928 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005929 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005930 }
5931
5932 /*
5933 * Internal check, no parameter entities here ...
5934 */
5935 else {
5936 switch (ent->etype) {
5937 case XML_INTERNAL_PARAMETER_ENTITY:
5938 case XML_EXTERNAL_PARAMETER_ENTITY:
5939 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5941 ctxt->sax->error(ctxt->userData,
5942 "Attempt to reference the parameter entity '%s'\n", name);
5943 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005944 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005945 break;
5946 default:
5947 break;
5948 }
5949 }
5950
5951 /*
5952 * [ WFC: No Recursion ]
5953 * A parsed entity must not contain a recursive reference
5954 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005955 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005956 */
5957
5958 } else {
5959 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5961 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005962 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005963 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005964 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005965 }
5966 xmlFree(name);
5967 }
5968 }
5969 *str = ptr;
5970 return(ent);
5971}
5972
5973/**
5974 * xmlParsePEReference:
5975 * @ctxt: an XML parser context
5976 *
5977 * parse PEReference declarations
5978 * The entity content is handled directly by pushing it's content as
5979 * a new input stream.
5980 *
5981 * [69] PEReference ::= '%' Name ';'
5982 *
5983 * [ WFC: No Recursion ]
5984 * A parsed entity must not contain a recursive
5985 * reference to itself, either directly or indirectly.
5986 *
5987 * [ WFC: Entity Declared ]
5988 * In a document without any DTD, a document with only an internal DTD
5989 * subset which contains no parameter entity references, or a document
5990 * with "standalone='yes'", ... ... The declaration of a parameter
5991 * entity must precede any reference to it...
5992 *
5993 * [ VC: Entity Declared ]
5994 * In a document with an external subset or external parameter entities
5995 * with "standalone='no'", ... ... The declaration of a parameter entity
5996 * must precede any reference to it...
5997 *
5998 * [ WFC: In DTD ]
5999 * Parameter-entity references may only appear in the DTD.
6000 * NOTE: misleading but this is handled.
6001 */
6002void
6003xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6004 xmlChar *name;
6005 xmlEntityPtr entity = NULL;
6006 xmlParserInputPtr input;
6007
6008 if (RAW == '%') {
6009 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006010 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006011 if (name == NULL) {
6012 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6014 ctxt->sax->error(ctxt->userData,
6015 "xmlParsePEReference: no name\n");
6016 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006017 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006018 } else {
6019 if (RAW == ';') {
6020 NEXT;
6021 if ((ctxt->sax != NULL) &&
6022 (ctxt->sax->getParameterEntity != NULL))
6023 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6024 name);
6025 if (entity == NULL) {
6026 /*
6027 * [ WFC: Entity Declared ]
6028 * In a document without any DTD, a document with only an
6029 * internal DTD subset which contains no parameter entity
6030 * references, or a document with "standalone='yes'", ...
6031 * ... The declaration of a parameter entity must precede
6032 * any reference to it...
6033 */
6034 if ((ctxt->standalone == 1) ||
6035 ((ctxt->hasExternalSubset == 0) &&
6036 (ctxt->hasPErefs == 0))) {
6037 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6038 if ((!ctxt->disableSAX) &&
6039 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6040 ctxt->sax->error(ctxt->userData,
6041 "PEReference: %%%s; not found\n", name);
6042 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006044 } else {
6045 /*
6046 * [ VC: Entity Declared ]
6047 * In a document with an external subset or external
6048 * parameter entities with "standalone='no'", ...
6049 * ... The declaration of a parameter entity must precede
6050 * any reference to it...
6051 */
6052 if ((!ctxt->disableSAX) &&
6053 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6054 ctxt->sax->warning(ctxt->userData,
6055 "PEReference: %%%s; not found\n", name);
6056 ctxt->valid = 0;
6057 }
6058 } else {
6059 /*
6060 * Internal checking in case the entity quest barfed
6061 */
6062 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6063 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6064 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6065 ctxt->sax->warning(ctxt->userData,
6066 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006067 } else if (ctxt->input->free != deallocblankswrapper) {
6068 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6069 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006070 } else {
6071 /*
6072 * TODO !!!
6073 * handle the extra spaces added before and after
6074 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6075 */
6076 input = xmlNewEntityInputStream(ctxt, entity);
6077 xmlPushInput(ctxt, input);
6078 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6079 (RAW == '<') && (NXT(1) == '?') &&
6080 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6081 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6082 xmlParseTextDecl(ctxt);
6083 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6084 /*
6085 * The XML REC instructs us to stop parsing
6086 * right here
6087 */
6088 ctxt->instate = XML_PARSER_EOF;
6089 xmlFree(name);
6090 return;
6091 }
6092 }
Owen Taylor3473f882001-02-23 17:55:21 +00006093 }
6094 }
6095 ctxt->hasPErefs = 1;
6096 } else {
6097 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6099 ctxt->sax->error(ctxt->userData,
6100 "xmlParsePEReference: expecting ';'\n");
6101 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006102 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006103 }
6104 xmlFree(name);
6105 }
6106 }
6107}
6108
6109/**
6110 * xmlParseStringPEReference:
6111 * @ctxt: an XML parser context
6112 * @str: a pointer to an index in the string
6113 *
6114 * parse PEReference declarations
6115 *
6116 * [69] PEReference ::= '%' Name ';'
6117 *
6118 * [ WFC: No Recursion ]
6119 * A parsed entity must not contain a recursive
6120 * reference to itself, either directly or indirectly.
6121 *
6122 * [ WFC: Entity Declared ]
6123 * In a document without any DTD, a document with only an internal DTD
6124 * subset which contains no parameter entity references, or a document
6125 * with "standalone='yes'", ... ... The declaration of a parameter
6126 * entity must precede any reference to it...
6127 *
6128 * [ VC: Entity Declared ]
6129 * In a document with an external subset or external parameter entities
6130 * with "standalone='no'", ... ... The declaration of a parameter entity
6131 * must precede any reference to it...
6132 *
6133 * [ WFC: In DTD ]
6134 * Parameter-entity references may only appear in the DTD.
6135 * NOTE: misleading but this is handled.
6136 *
6137 * Returns the string of the entity content.
6138 * str is updated to the current value of the index
6139 */
6140xmlEntityPtr
6141xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6142 const xmlChar *ptr;
6143 xmlChar cur;
6144 xmlChar *name;
6145 xmlEntityPtr entity = NULL;
6146
6147 if ((str == NULL) || (*str == NULL)) return(NULL);
6148 ptr = *str;
6149 cur = *ptr;
6150 if (cur == '%') {
6151 ptr++;
6152 cur = *ptr;
6153 name = xmlParseStringName(ctxt, &ptr);
6154 if (name == NULL) {
6155 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6157 ctxt->sax->error(ctxt->userData,
6158 "xmlParseStringPEReference: no name\n");
6159 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006160 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006161 } else {
6162 cur = *ptr;
6163 if (cur == ';') {
6164 ptr++;
6165 cur = *ptr;
6166 if ((ctxt->sax != NULL) &&
6167 (ctxt->sax->getParameterEntity != NULL))
6168 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6169 name);
6170 if (entity == NULL) {
6171 /*
6172 * [ WFC: Entity Declared ]
6173 * In a document without any DTD, a document with only an
6174 * internal DTD subset which contains no parameter entity
6175 * references, or a document with "standalone='yes'", ...
6176 * ... The declaration of a parameter entity must precede
6177 * any reference to it...
6178 */
6179 if ((ctxt->standalone == 1) ||
6180 ((ctxt->hasExternalSubset == 0) &&
6181 (ctxt->hasPErefs == 0))) {
6182 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6184 ctxt->sax->error(ctxt->userData,
6185 "PEReference: %%%s; not found\n", name);
6186 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006187 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006188 } else {
6189 /*
6190 * [ VC: Entity Declared ]
6191 * In a document with an external subset or external
6192 * parameter entities with "standalone='no'", ...
6193 * ... The declaration of a parameter entity must
6194 * precede any reference to it...
6195 */
6196 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6197 ctxt->sax->warning(ctxt->userData,
6198 "PEReference: %%%s; not found\n", name);
6199 ctxt->valid = 0;
6200 }
6201 } else {
6202 /*
6203 * Internal checking in case the entity quest barfed
6204 */
6205 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6206 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6207 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6208 ctxt->sax->warning(ctxt->userData,
6209 "Internal: %%%s; is not a parameter entity\n", name);
6210 }
6211 }
6212 ctxt->hasPErefs = 1;
6213 } else {
6214 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6216 ctxt->sax->error(ctxt->userData,
6217 "xmlParseStringPEReference: expecting ';'\n");
6218 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006219 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006220 }
6221 xmlFree(name);
6222 }
6223 }
6224 *str = ptr;
6225 return(entity);
6226}
6227
6228/**
6229 * xmlParseDocTypeDecl:
6230 * @ctxt: an XML parser context
6231 *
6232 * parse a DOCTYPE declaration
6233 *
6234 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6235 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6236 *
6237 * [ VC: Root Element Type ]
6238 * The Name in the document type declaration must match the element
6239 * type of the root element.
6240 */
6241
6242void
6243xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6244 xmlChar *name = NULL;
6245 xmlChar *ExternalID = NULL;
6246 xmlChar *URI = NULL;
6247
6248 /*
6249 * We know that '<!DOCTYPE' has been detected.
6250 */
6251 SKIP(9);
6252
6253 SKIP_BLANKS;
6254
6255 /*
6256 * Parse the DOCTYPE name.
6257 */
6258 name = xmlParseName(ctxt);
6259 if (name == NULL) {
6260 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6262 ctxt->sax->error(ctxt->userData,
6263 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6264 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006265 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006266 }
6267 ctxt->intSubName = name;
6268
6269 SKIP_BLANKS;
6270
6271 /*
6272 * Check for SystemID and ExternalID
6273 */
6274 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6275
6276 if ((URI != NULL) || (ExternalID != NULL)) {
6277 ctxt->hasExternalSubset = 1;
6278 }
6279 ctxt->extSubURI = URI;
6280 ctxt->extSubSystem = ExternalID;
6281
6282 SKIP_BLANKS;
6283
6284 /*
6285 * Create and update the internal subset.
6286 */
6287 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6288 (!ctxt->disableSAX))
6289 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6290
6291 /*
6292 * Is there any internal subset declarations ?
6293 * they are handled separately in xmlParseInternalSubset()
6294 */
6295 if (RAW == '[')
6296 return;
6297
6298 /*
6299 * We should be at the end of the DOCTYPE declaration.
6300 */
6301 if (RAW != '>') {
6302 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006304 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006305 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006306 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006307 }
6308 NEXT;
6309}
6310
6311/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006312 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006313 * @ctxt: an XML parser context
6314 *
6315 * parse the internal subset declaration
6316 *
6317 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6318 */
6319
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006320static void
Owen Taylor3473f882001-02-23 17:55:21 +00006321xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6322 /*
6323 * Is there any DTD definition ?
6324 */
6325 if (RAW == '[') {
6326 ctxt->instate = XML_PARSER_DTD;
6327 NEXT;
6328 /*
6329 * Parse the succession of Markup declarations and
6330 * PEReferences.
6331 * Subsequence (markupdecl | PEReference | S)*
6332 */
6333 while (RAW != ']') {
6334 const xmlChar *check = CUR_PTR;
6335 int cons = ctxt->input->consumed;
6336
6337 SKIP_BLANKS;
6338 xmlParseMarkupDecl(ctxt);
6339 xmlParsePEReference(ctxt);
6340
6341 /*
6342 * Pop-up of finished entities.
6343 */
6344 while ((RAW == 0) && (ctxt->inputNr > 1))
6345 xmlPopInput(ctxt);
6346
6347 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6348 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6350 ctxt->sax->error(ctxt->userData,
6351 "xmlParseInternalSubset: error detected in Markup declaration\n");
6352 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006353 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006354 break;
6355 }
6356 }
6357 if (RAW == ']') {
6358 NEXT;
6359 SKIP_BLANKS;
6360 }
6361 }
6362
6363 /*
6364 * We should be at the end of the DOCTYPE declaration.
6365 */
6366 if (RAW != '>') {
6367 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006369 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006370 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006371 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006372 }
6373 NEXT;
6374}
6375
6376/**
6377 * xmlParseAttribute:
6378 * @ctxt: an XML parser context
6379 * @value: a xmlChar ** used to store the value of the attribute
6380 *
6381 * parse an attribute
6382 *
6383 * [41] Attribute ::= Name Eq AttValue
6384 *
6385 * [ WFC: No External Entity References ]
6386 * Attribute values cannot contain direct or indirect entity references
6387 * to external entities.
6388 *
6389 * [ WFC: No < in Attribute Values ]
6390 * The replacement text of any entity referred to directly or indirectly in
6391 * an attribute value (other than "&lt;") must not contain a <.
6392 *
6393 * [ VC: Attribute Value Type ]
6394 * The attribute must have been declared; the value must be of the type
6395 * declared for it.
6396 *
6397 * [25] Eq ::= S? '=' S?
6398 *
6399 * With namespace:
6400 *
6401 * [NS 11] Attribute ::= QName Eq AttValue
6402 *
6403 * Also the case QName == xmlns:??? is handled independently as a namespace
6404 * definition.
6405 *
6406 * Returns the attribute name, and the value in *value.
6407 */
6408
6409xmlChar *
6410xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6411 xmlChar *name, *val;
6412
6413 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006414 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006415 name = xmlParseName(ctxt);
6416 if (name == NULL) {
6417 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6418 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6419 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6420 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006421 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006422 return(NULL);
6423 }
6424
6425 /*
6426 * read the value
6427 */
6428 SKIP_BLANKS;
6429 if (RAW == '=') {
6430 NEXT;
6431 SKIP_BLANKS;
6432 val = xmlParseAttValue(ctxt);
6433 ctxt->instate = XML_PARSER_CONTENT;
6434 } else {
6435 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6437 ctxt->sax->error(ctxt->userData,
6438 "Specification mandate value for attribute %s\n", name);
6439 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006440 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006441 xmlFree(name);
6442 return(NULL);
6443 }
6444
6445 /*
6446 * Check that xml:lang conforms to the specification
6447 * No more registered as an error, just generate a warning now
6448 * since this was deprecated in XML second edition
6449 */
6450 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6451 if (!xmlCheckLanguageID(val)) {
6452 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6453 ctxt->sax->warning(ctxt->userData,
6454 "Malformed value for xml:lang : %s\n", val);
6455 }
6456 }
6457
6458 /*
6459 * Check that xml:space conforms to the specification
6460 */
6461 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6462 if (xmlStrEqual(val, BAD_CAST "default"))
6463 *(ctxt->space) = 0;
6464 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6465 *(ctxt->space) = 1;
6466 else {
6467 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6469 ctxt->sax->error(ctxt->userData,
6470"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6471 val);
6472 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006473 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006474 }
6475 }
6476
6477 *value = val;
6478 return(name);
6479}
6480
6481/**
6482 * xmlParseStartTag:
6483 * @ctxt: an XML parser context
6484 *
6485 * parse a start of tag either for rule element or
6486 * EmptyElement. In both case we don't parse the tag closing chars.
6487 *
6488 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6489 *
6490 * [ WFC: Unique Att Spec ]
6491 * No attribute name may appear more than once in the same start-tag or
6492 * empty-element tag.
6493 *
6494 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6495 *
6496 * [ WFC: Unique Att Spec ]
6497 * No attribute name may appear more than once in the same start-tag or
6498 * empty-element tag.
6499 *
6500 * With namespace:
6501 *
6502 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6503 *
6504 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6505 *
6506 * Returns the element name parsed
6507 */
6508
6509xmlChar *
6510xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6511 xmlChar *name;
6512 xmlChar *attname;
6513 xmlChar *attvalue;
6514 const xmlChar **atts = NULL;
6515 int nbatts = 0;
6516 int maxatts = 0;
6517 int i;
6518
6519 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006520 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006521
6522 name = xmlParseName(ctxt);
6523 if (name == NULL) {
6524 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6526 ctxt->sax->error(ctxt->userData,
6527 "xmlParseStartTag: invalid element name\n");
6528 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006529 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006530 return(NULL);
6531 }
6532
6533 /*
6534 * Now parse the attributes, it ends up with the ending
6535 *
6536 * (S Attribute)* S?
6537 */
6538 SKIP_BLANKS;
6539 GROW;
6540
Daniel Veillard21a0f912001-02-25 19:54:14 +00006541 while ((RAW != '>') &&
6542 ((RAW != '/') || (NXT(1) != '>')) &&
6543 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006544 const xmlChar *q = CUR_PTR;
6545 int cons = ctxt->input->consumed;
6546
6547 attname = xmlParseAttribute(ctxt, &attvalue);
6548 if ((attname != NULL) && (attvalue != NULL)) {
6549 /*
6550 * [ WFC: Unique Att Spec ]
6551 * No attribute name may appear more than once in the same
6552 * start-tag or empty-element tag.
6553 */
6554 for (i = 0; i < nbatts;i += 2) {
6555 if (xmlStrEqual(atts[i], attname)) {
6556 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6558 ctxt->sax->error(ctxt->userData,
6559 "Attribute %s redefined\n",
6560 attname);
6561 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006562 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006563 xmlFree(attname);
6564 xmlFree(attvalue);
6565 goto failed;
6566 }
6567 }
6568
6569 /*
6570 * Add the pair to atts
6571 */
6572 if (atts == NULL) {
6573 maxatts = 10;
6574 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6575 if (atts == NULL) {
6576 xmlGenericError(xmlGenericErrorContext,
6577 "malloc of %ld byte failed\n",
6578 maxatts * (long)sizeof(xmlChar *));
6579 return(NULL);
6580 }
6581 } else if (nbatts + 4 > maxatts) {
6582 maxatts *= 2;
6583 atts = (const xmlChar **) xmlRealloc((void *) atts,
6584 maxatts * sizeof(xmlChar *));
6585 if (atts == NULL) {
6586 xmlGenericError(xmlGenericErrorContext,
6587 "realloc of %ld byte failed\n",
6588 maxatts * (long)sizeof(xmlChar *));
6589 return(NULL);
6590 }
6591 }
6592 atts[nbatts++] = attname;
6593 atts[nbatts++] = attvalue;
6594 atts[nbatts] = NULL;
6595 atts[nbatts + 1] = NULL;
6596 } else {
6597 if (attname != NULL)
6598 xmlFree(attname);
6599 if (attvalue != NULL)
6600 xmlFree(attvalue);
6601 }
6602
6603failed:
6604
6605 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6606 break;
6607 if (!IS_BLANK(RAW)) {
6608 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6610 ctxt->sax->error(ctxt->userData,
6611 "attributes construct error\n");
6612 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006613 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006614 }
6615 SKIP_BLANKS;
6616 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6617 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6619 ctxt->sax->error(ctxt->userData,
6620 "xmlParseStartTag: problem parsing attributes\n");
6621 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006622 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006623 break;
6624 }
6625 GROW;
6626 }
6627
6628 /*
6629 * SAX: Start of Element !
6630 */
6631 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6632 (!ctxt->disableSAX))
6633 ctxt->sax->startElement(ctxt->userData, name, atts);
6634
6635 if (atts != NULL) {
6636 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6637 xmlFree((void *) atts);
6638 }
6639 return(name);
6640}
6641
6642/**
6643 * xmlParseEndTag:
6644 * @ctxt: an XML parser context
6645 *
6646 * parse an end of tag
6647 *
6648 * [42] ETag ::= '</' Name S? '>'
6649 *
6650 * With namespace
6651 *
6652 * [NS 9] ETag ::= '</' QName S? '>'
6653 */
6654
6655void
6656xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6657 xmlChar *name;
6658 xmlChar *oldname;
6659
6660 GROW;
6661 if ((RAW != '<') || (NXT(1) != '/')) {
6662 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6664 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6665 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006666 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006667 return;
6668 }
6669 SKIP(2);
6670
Daniel Veillard46de64e2002-05-29 08:21:33 +00006671 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006672
6673 /*
6674 * We should definitely be at the ending "S? '>'" part
6675 */
6676 GROW;
6677 SKIP_BLANKS;
6678 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6679 ctxt->errNo = XML_ERR_GT_REQUIRED;
6680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6681 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6682 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006683 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006684 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006685 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006686
6687 /*
6688 * [ WFC: Element Type Match ]
6689 * The Name in an element's end-tag must match the element type in the
6690 * start-tag.
6691 *
6692 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006693 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006694 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006696 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006697 ctxt->sax->error(ctxt->userData,
6698 "Opening and ending tag mismatch: %s and %s\n",
6699 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006700 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006701 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006702 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006703 }
6704
6705 }
6706 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006707 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6708#if 0
6709 else {
6710 /*
6711 * Recover in case of one missing close
6712 */
6713 if ((ctxt->nameNr > 2) &&
6714 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6715 namePop(ctxt);
6716 spacePop(ctxt);
6717 }
6718 }
6719#endif
6720 if (name != NULL)
6721 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006722 }
6723
6724 /*
6725 * SAX: End of Tag
6726 */
6727 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6728 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006729 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006730
Owen Taylor3473f882001-02-23 17:55:21 +00006731 oldname = namePop(ctxt);
6732 spacePop(ctxt);
6733 if (oldname != NULL) {
6734#ifdef DEBUG_STACK
6735 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6736#endif
6737 xmlFree(oldname);
6738 }
6739 return;
6740}
6741
6742/**
6743 * xmlParseCDSect:
6744 * @ctxt: an XML parser context
6745 *
6746 * Parse escaped pure raw content.
6747 *
6748 * [18] CDSect ::= CDStart CData CDEnd
6749 *
6750 * [19] CDStart ::= '<![CDATA['
6751 *
6752 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6753 *
6754 * [21] CDEnd ::= ']]>'
6755 */
6756void
6757xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6758 xmlChar *buf = NULL;
6759 int len = 0;
6760 int size = XML_PARSER_BUFFER_SIZE;
6761 int r, rl;
6762 int s, sl;
6763 int cur, l;
6764 int count = 0;
6765
6766 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6767 (NXT(2) == '[') && (NXT(3) == 'C') &&
6768 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6769 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6770 (NXT(8) == '[')) {
6771 SKIP(9);
6772 } else
6773 return;
6774
6775 ctxt->instate = XML_PARSER_CDATA_SECTION;
6776 r = CUR_CHAR(rl);
6777 if (!IS_CHAR(r)) {
6778 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6779 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6780 ctxt->sax->error(ctxt->userData,
6781 "CData section not finished\n");
6782 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006783 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006784 ctxt->instate = XML_PARSER_CONTENT;
6785 return;
6786 }
6787 NEXTL(rl);
6788 s = CUR_CHAR(sl);
6789 if (!IS_CHAR(s)) {
6790 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6792 ctxt->sax->error(ctxt->userData,
6793 "CData section not finished\n");
6794 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006795 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006796 ctxt->instate = XML_PARSER_CONTENT;
6797 return;
6798 }
6799 NEXTL(sl);
6800 cur = CUR_CHAR(l);
6801 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6802 if (buf == NULL) {
6803 xmlGenericError(xmlGenericErrorContext,
6804 "malloc of %d byte failed\n", size);
6805 return;
6806 }
6807 while (IS_CHAR(cur) &&
6808 ((r != ']') || (s != ']') || (cur != '>'))) {
6809 if (len + 5 >= size) {
6810 size *= 2;
6811 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6812 if (buf == NULL) {
6813 xmlGenericError(xmlGenericErrorContext,
6814 "realloc of %d byte failed\n", size);
6815 return;
6816 }
6817 }
6818 COPY_BUF(rl,buf,len,r);
6819 r = s;
6820 rl = sl;
6821 s = cur;
6822 sl = l;
6823 count++;
6824 if (count > 50) {
6825 GROW;
6826 count = 0;
6827 }
6828 NEXTL(l);
6829 cur = CUR_CHAR(l);
6830 }
6831 buf[len] = 0;
6832 ctxt->instate = XML_PARSER_CONTENT;
6833 if (cur != '>') {
6834 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6836 ctxt->sax->error(ctxt->userData,
6837 "CData section not finished\n%.50s\n", buf);
6838 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006839 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006840 xmlFree(buf);
6841 return;
6842 }
6843 NEXTL(l);
6844
6845 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006846 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006847 */
6848 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6849 if (ctxt->sax->cdataBlock != NULL)
6850 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006851 else if (ctxt->sax->characters != NULL)
6852 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006853 }
6854 xmlFree(buf);
6855}
6856
6857/**
6858 * xmlParseContent:
6859 * @ctxt: an XML parser context
6860 *
6861 * Parse a content:
6862 *
6863 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6864 */
6865
6866void
6867xmlParseContent(xmlParserCtxtPtr ctxt) {
6868 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006869 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006870 ((RAW != '<') || (NXT(1) != '/'))) {
6871 const xmlChar *test = CUR_PTR;
6872 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006873 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006874
6875 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006876 * First case : a Processing Instruction.
6877 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006878 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006879 xmlParsePI(ctxt);
6880 }
6881
6882 /*
6883 * Second case : a CDSection
6884 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006885 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006886 (NXT(2) == '[') && (NXT(3) == 'C') &&
6887 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6888 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6889 (NXT(8) == '[')) {
6890 xmlParseCDSect(ctxt);
6891 }
6892
6893 /*
6894 * Third case : a comment
6895 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006896 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006897 (NXT(2) == '-') && (NXT(3) == '-')) {
6898 xmlParseComment(ctxt);
6899 ctxt->instate = XML_PARSER_CONTENT;
6900 }
6901
6902 /*
6903 * Fourth case : a sub-element.
6904 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006905 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006906 xmlParseElement(ctxt);
6907 }
6908
6909 /*
6910 * Fifth case : a reference. If if has not been resolved,
6911 * parsing returns it's Name, create the node
6912 */
6913
Daniel Veillard21a0f912001-02-25 19:54:14 +00006914 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006915 xmlParseReference(ctxt);
6916 }
6917
6918 /*
6919 * Last case, text. Note that References are handled directly.
6920 */
6921 else {
6922 xmlParseCharData(ctxt, 0);
6923 }
6924
6925 GROW;
6926 /*
6927 * Pop-up of finished entities.
6928 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00006929 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00006930 xmlPopInput(ctxt);
6931 SHRINK;
6932
Daniel Veillardfdc91562002-07-01 21:52:03 +00006933 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006934 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6936 ctxt->sax->error(ctxt->userData,
6937 "detected an error in element content\n");
6938 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006939 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006940 ctxt->instate = XML_PARSER_EOF;
6941 break;
6942 }
6943 }
6944}
6945
6946/**
6947 * xmlParseElement:
6948 * @ctxt: an XML parser context
6949 *
6950 * parse an XML element, this is highly recursive
6951 *
6952 * [39] element ::= EmptyElemTag | STag content ETag
6953 *
6954 * [ WFC: Element Type Match ]
6955 * The Name in an element's end-tag must match the element type in the
6956 * start-tag.
6957 *
6958 * [ VC: Element Valid ]
6959 * An element is valid if there is a declaration matching elementdecl
6960 * where the Name matches the element type and one of the following holds:
6961 * - The declaration matches EMPTY and the element has no content.
6962 * - The declaration matches children and the sequence of child elements
6963 * belongs to the language generated by the regular expression in the
6964 * content model, with optional white space (characters matching the
6965 * nonterminal S) between each pair of child elements.
6966 * - The declaration matches Mixed and the content consists of character
6967 * data and child elements whose types match names in the content model.
6968 * - The declaration matches ANY, and the types of any child elements have
6969 * been declared.
6970 */
6971
6972void
6973xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00006974 xmlChar *name;
6975 xmlChar *oldname;
6976 xmlParserNodeInfo node_info;
6977 xmlNodePtr ret;
6978
6979 /* Capture start position */
6980 if (ctxt->record_info) {
6981 node_info.begin_pos = ctxt->input->consumed +
6982 (CUR_PTR - ctxt->input->base);
6983 node_info.begin_line = ctxt->input->line;
6984 }
6985
6986 if (ctxt->spaceNr == 0)
6987 spacePush(ctxt, -1);
6988 else
6989 spacePush(ctxt, *ctxt->space);
6990
6991 name = xmlParseStartTag(ctxt);
6992 if (name == NULL) {
6993 spacePop(ctxt);
6994 return;
6995 }
6996 namePush(ctxt, name);
6997 ret = ctxt->node;
6998
6999 /*
7000 * [ VC: Root Element Type ]
7001 * The Name in the document type declaration must match the element
7002 * type of the root element.
7003 */
7004 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7005 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7006 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7007
7008 /*
7009 * Check for an Empty Element.
7010 */
7011 if ((RAW == '/') && (NXT(1) == '>')) {
7012 SKIP(2);
7013 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7014 (!ctxt->disableSAX))
7015 ctxt->sax->endElement(ctxt->userData, name);
7016 oldname = namePop(ctxt);
7017 spacePop(ctxt);
7018 if (oldname != NULL) {
7019#ifdef DEBUG_STACK
7020 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7021#endif
7022 xmlFree(oldname);
7023 }
7024 if ( ret != NULL && ctxt->record_info ) {
7025 node_info.end_pos = ctxt->input->consumed +
7026 (CUR_PTR - ctxt->input->base);
7027 node_info.end_line = ctxt->input->line;
7028 node_info.node = ret;
7029 xmlParserAddNodeInfo(ctxt, &node_info);
7030 }
7031 return;
7032 }
7033 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007034 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007035 } else {
7036 ctxt->errNo = XML_ERR_GT_REQUIRED;
7037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7038 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007039 "Couldn't find end of Start Tag %s\n",
7040 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007041 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007042 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007043
7044 /*
7045 * end of parsing of this node.
7046 */
7047 nodePop(ctxt);
7048 oldname = namePop(ctxt);
7049 spacePop(ctxt);
7050 if (oldname != NULL) {
7051#ifdef DEBUG_STACK
7052 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7053#endif
7054 xmlFree(oldname);
7055 }
7056
7057 /*
7058 * Capture end position and add node
7059 */
7060 if ( ret != NULL && ctxt->record_info ) {
7061 node_info.end_pos = ctxt->input->consumed +
7062 (CUR_PTR - ctxt->input->base);
7063 node_info.end_line = ctxt->input->line;
7064 node_info.node = ret;
7065 xmlParserAddNodeInfo(ctxt, &node_info);
7066 }
7067 return;
7068 }
7069
7070 /*
7071 * Parse the content of the element:
7072 */
7073 xmlParseContent(ctxt);
7074 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007075 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007076 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7077 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007078 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007079 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007080 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007081
7082 /*
7083 * end of parsing of this node.
7084 */
7085 nodePop(ctxt);
7086 oldname = namePop(ctxt);
7087 spacePop(ctxt);
7088 if (oldname != NULL) {
7089#ifdef DEBUG_STACK
7090 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7091#endif
7092 xmlFree(oldname);
7093 }
7094 return;
7095 }
7096
7097 /*
7098 * parse the end of tag: '</' should be here.
7099 */
7100 xmlParseEndTag(ctxt);
7101
7102 /*
7103 * Capture end position and add node
7104 */
7105 if ( ret != NULL && ctxt->record_info ) {
7106 node_info.end_pos = ctxt->input->consumed +
7107 (CUR_PTR - ctxt->input->base);
7108 node_info.end_line = ctxt->input->line;
7109 node_info.node = ret;
7110 xmlParserAddNodeInfo(ctxt, &node_info);
7111 }
7112}
7113
7114/**
7115 * xmlParseVersionNum:
7116 * @ctxt: an XML parser context
7117 *
7118 * parse the XML version value.
7119 *
7120 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7121 *
7122 * Returns the string giving the XML version number, or NULL
7123 */
7124xmlChar *
7125xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7126 xmlChar *buf = NULL;
7127 int len = 0;
7128 int size = 10;
7129 xmlChar cur;
7130
7131 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7132 if (buf == NULL) {
7133 xmlGenericError(xmlGenericErrorContext,
7134 "malloc of %d byte failed\n", size);
7135 return(NULL);
7136 }
7137 cur = CUR;
7138 while (((cur >= 'a') && (cur <= 'z')) ||
7139 ((cur >= 'A') && (cur <= 'Z')) ||
7140 ((cur >= '0') && (cur <= '9')) ||
7141 (cur == '_') || (cur == '.') ||
7142 (cur == ':') || (cur == '-')) {
7143 if (len + 1 >= size) {
7144 size *= 2;
7145 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7146 if (buf == NULL) {
7147 xmlGenericError(xmlGenericErrorContext,
7148 "realloc of %d byte failed\n", size);
7149 return(NULL);
7150 }
7151 }
7152 buf[len++] = cur;
7153 NEXT;
7154 cur=CUR;
7155 }
7156 buf[len] = 0;
7157 return(buf);
7158}
7159
7160/**
7161 * xmlParseVersionInfo:
7162 * @ctxt: an XML parser context
7163 *
7164 * parse the XML version.
7165 *
7166 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7167 *
7168 * [25] Eq ::= S? '=' S?
7169 *
7170 * Returns the version string, e.g. "1.0"
7171 */
7172
7173xmlChar *
7174xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7175 xmlChar *version = NULL;
7176 const xmlChar *q;
7177
7178 if ((RAW == 'v') && (NXT(1) == 'e') &&
7179 (NXT(2) == 'r') && (NXT(3) == 's') &&
7180 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7181 (NXT(6) == 'n')) {
7182 SKIP(7);
7183 SKIP_BLANKS;
7184 if (RAW != '=') {
7185 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7187 ctxt->sax->error(ctxt->userData,
7188 "xmlParseVersionInfo : expected '='\n");
7189 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007190 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007191 return(NULL);
7192 }
7193 NEXT;
7194 SKIP_BLANKS;
7195 if (RAW == '"') {
7196 NEXT;
7197 q = CUR_PTR;
7198 version = xmlParseVersionNum(ctxt);
7199 if (RAW != '"') {
7200 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7202 ctxt->sax->error(ctxt->userData,
7203 "String not closed\n%.50s\n", q);
7204 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007205 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007206 } else
7207 NEXT;
7208 } else if (RAW == '\''){
7209 NEXT;
7210 q = CUR_PTR;
7211 version = xmlParseVersionNum(ctxt);
7212 if (RAW != '\'') {
7213 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7215 ctxt->sax->error(ctxt->userData,
7216 "String not closed\n%.50s\n", q);
7217 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007218 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007219 } else
7220 NEXT;
7221 } else {
7222 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7224 ctxt->sax->error(ctxt->userData,
7225 "xmlParseVersionInfo : expected ' or \"\n");
7226 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007227 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007228 }
7229 }
7230 return(version);
7231}
7232
7233/**
7234 * xmlParseEncName:
7235 * @ctxt: an XML parser context
7236 *
7237 * parse the XML encoding name
7238 *
7239 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7240 *
7241 * Returns the encoding name value or NULL
7242 */
7243xmlChar *
7244xmlParseEncName(xmlParserCtxtPtr ctxt) {
7245 xmlChar *buf = NULL;
7246 int len = 0;
7247 int size = 10;
7248 xmlChar cur;
7249
7250 cur = CUR;
7251 if (((cur >= 'a') && (cur <= 'z')) ||
7252 ((cur >= 'A') && (cur <= 'Z'))) {
7253 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7254 if (buf == NULL) {
7255 xmlGenericError(xmlGenericErrorContext,
7256 "malloc of %d byte failed\n", size);
7257 return(NULL);
7258 }
7259
7260 buf[len++] = cur;
7261 NEXT;
7262 cur = CUR;
7263 while (((cur >= 'a') && (cur <= 'z')) ||
7264 ((cur >= 'A') && (cur <= 'Z')) ||
7265 ((cur >= '0') && (cur <= '9')) ||
7266 (cur == '.') || (cur == '_') ||
7267 (cur == '-')) {
7268 if (len + 1 >= size) {
7269 size *= 2;
7270 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7271 if (buf == NULL) {
7272 xmlGenericError(xmlGenericErrorContext,
7273 "realloc of %d byte failed\n", size);
7274 return(NULL);
7275 }
7276 }
7277 buf[len++] = cur;
7278 NEXT;
7279 cur = CUR;
7280 if (cur == 0) {
7281 SHRINK;
7282 GROW;
7283 cur = CUR;
7284 }
7285 }
7286 buf[len] = 0;
7287 } else {
7288 ctxt->errNo = XML_ERR_ENCODING_NAME;
7289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7290 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7291 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007292 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007293 }
7294 return(buf);
7295}
7296
7297/**
7298 * xmlParseEncodingDecl:
7299 * @ctxt: an XML parser context
7300 *
7301 * parse the XML encoding declaration
7302 *
7303 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7304 *
7305 * this setups the conversion filters.
7306 *
7307 * Returns the encoding value or NULL
7308 */
7309
7310xmlChar *
7311xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7312 xmlChar *encoding = NULL;
7313 const xmlChar *q;
7314
7315 SKIP_BLANKS;
7316 if ((RAW == 'e') && (NXT(1) == 'n') &&
7317 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7318 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7319 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7320 SKIP(8);
7321 SKIP_BLANKS;
7322 if (RAW != '=') {
7323 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7325 ctxt->sax->error(ctxt->userData,
7326 "xmlParseEncodingDecl : expected '='\n");
7327 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007328 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007329 return(NULL);
7330 }
7331 NEXT;
7332 SKIP_BLANKS;
7333 if (RAW == '"') {
7334 NEXT;
7335 q = CUR_PTR;
7336 encoding = xmlParseEncName(ctxt);
7337 if (RAW != '"') {
7338 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7340 ctxt->sax->error(ctxt->userData,
7341 "String not closed\n%.50s\n", q);
7342 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007343 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007344 } else
7345 NEXT;
7346 } else if (RAW == '\''){
7347 NEXT;
7348 q = CUR_PTR;
7349 encoding = xmlParseEncName(ctxt);
7350 if (RAW != '\'') {
7351 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7353 ctxt->sax->error(ctxt->userData,
7354 "String not closed\n%.50s\n", q);
7355 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007356 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007357 } else
7358 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007359 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007360 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7362 ctxt->sax->error(ctxt->userData,
7363 "xmlParseEncodingDecl : expected ' or \"\n");
7364 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007365 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007366 }
7367 if (encoding != NULL) {
7368 xmlCharEncoding enc;
7369 xmlCharEncodingHandlerPtr handler;
7370
7371 if (ctxt->input->encoding != NULL)
7372 xmlFree((xmlChar *) ctxt->input->encoding);
7373 ctxt->input->encoding = encoding;
7374
7375 enc = xmlParseCharEncoding((const char *) encoding);
7376 /*
7377 * registered set of known encodings
7378 */
7379 if (enc != XML_CHAR_ENCODING_ERROR) {
7380 xmlSwitchEncoding(ctxt, enc);
7381 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007382 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007383 xmlFree(encoding);
7384 return(NULL);
7385 }
7386 } else {
7387 /*
7388 * fallback for unknown encodings
7389 */
7390 handler = xmlFindCharEncodingHandler((const char *) encoding);
7391 if (handler != NULL) {
7392 xmlSwitchToEncoding(ctxt, handler);
7393 } else {
7394 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7396 ctxt->sax->error(ctxt->userData,
7397 "Unsupported encoding %s\n", encoding);
7398 return(NULL);
7399 }
7400 }
7401 }
7402 }
7403 return(encoding);
7404}
7405
7406/**
7407 * xmlParseSDDecl:
7408 * @ctxt: an XML parser context
7409 *
7410 * parse the XML standalone declaration
7411 *
7412 * [32] SDDecl ::= S 'standalone' Eq
7413 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7414 *
7415 * [ VC: Standalone Document Declaration ]
7416 * TODO The standalone document declaration must have the value "no"
7417 * if any external markup declarations contain declarations of:
7418 * - attributes with default values, if elements to which these
7419 * attributes apply appear in the document without specifications
7420 * of values for these attributes, or
7421 * - entities (other than amp, lt, gt, apos, quot), if references
7422 * to those entities appear in the document, or
7423 * - attributes with values subject to normalization, where the
7424 * attribute appears in the document with a value which will change
7425 * as a result of normalization, or
7426 * - element types with element content, if white space occurs directly
7427 * within any instance of those types.
7428 *
7429 * Returns 1 if standalone, 0 otherwise
7430 */
7431
7432int
7433xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7434 int standalone = -1;
7435
7436 SKIP_BLANKS;
7437 if ((RAW == 's') && (NXT(1) == 't') &&
7438 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7439 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7440 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7441 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7442 SKIP(10);
7443 SKIP_BLANKS;
7444 if (RAW != '=') {
7445 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7446 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7447 ctxt->sax->error(ctxt->userData,
7448 "XML standalone declaration : expected '='\n");
7449 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007450 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007451 return(standalone);
7452 }
7453 NEXT;
7454 SKIP_BLANKS;
7455 if (RAW == '\''){
7456 NEXT;
7457 if ((RAW == 'n') && (NXT(1) == 'o')) {
7458 standalone = 0;
7459 SKIP(2);
7460 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7461 (NXT(2) == 's')) {
7462 standalone = 1;
7463 SKIP(3);
7464 } else {
7465 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7467 ctxt->sax->error(ctxt->userData,
7468 "standalone accepts only 'yes' or 'no'\n");
7469 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007470 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007471 }
7472 if (RAW != '\'') {
7473 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7475 ctxt->sax->error(ctxt->userData, "String not closed\n");
7476 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007477 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007478 } else
7479 NEXT;
7480 } else if (RAW == '"'){
7481 NEXT;
7482 if ((RAW == 'n') && (NXT(1) == 'o')) {
7483 standalone = 0;
7484 SKIP(2);
7485 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7486 (NXT(2) == 's')) {
7487 standalone = 1;
7488 SKIP(3);
7489 } else {
7490 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7492 ctxt->sax->error(ctxt->userData,
7493 "standalone accepts only 'yes' or 'no'\n");
7494 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007495 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007496 }
7497 if (RAW != '"') {
7498 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7500 ctxt->sax->error(ctxt->userData, "String not closed\n");
7501 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007502 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007503 } else
7504 NEXT;
7505 } else {
7506 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7508 ctxt->sax->error(ctxt->userData,
7509 "Standalone value not found\n");
7510 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007511 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007512 }
7513 }
7514 return(standalone);
7515}
7516
7517/**
7518 * xmlParseXMLDecl:
7519 * @ctxt: an XML parser context
7520 *
7521 * parse an XML declaration header
7522 *
7523 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7524 */
7525
7526void
7527xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7528 xmlChar *version;
7529
7530 /*
7531 * We know that '<?xml' is here.
7532 */
7533 SKIP(5);
7534
7535 if (!IS_BLANK(RAW)) {
7536 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7538 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7539 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007540 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007541 }
7542 SKIP_BLANKS;
7543
7544 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007545 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007546 */
7547 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007548 if (version == NULL) {
7549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7550 ctxt->sax->error(ctxt->userData,
7551 "Malformed declaration expecting version\n");
7552 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007553 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007554 } else {
7555 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7556 /*
7557 * TODO: Blueberry should be detected here
7558 */
7559 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7560 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7561 version);
7562 }
7563 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007564 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007565 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007566 }
Owen Taylor3473f882001-02-23 17:55:21 +00007567
7568 /*
7569 * We may have the encoding declaration
7570 */
7571 if (!IS_BLANK(RAW)) {
7572 if ((RAW == '?') && (NXT(1) == '>')) {
7573 SKIP(2);
7574 return;
7575 }
7576 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7578 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007581 }
7582 xmlParseEncodingDecl(ctxt);
7583 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7584 /*
7585 * The XML REC instructs us to stop parsing right here
7586 */
7587 return;
7588 }
7589
7590 /*
7591 * We may have the standalone status.
7592 */
7593 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7594 if ((RAW == '?') && (NXT(1) == '>')) {
7595 SKIP(2);
7596 return;
7597 }
7598 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7600 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7601 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007602 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007603 }
7604 SKIP_BLANKS;
7605 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7606
7607 SKIP_BLANKS;
7608 if ((RAW == '?') && (NXT(1) == '>')) {
7609 SKIP(2);
7610 } else if (RAW == '>') {
7611 /* Deprecated old WD ... */
7612 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7614 ctxt->sax->error(ctxt->userData,
7615 "XML declaration must end-up with '?>'\n");
7616 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007617 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007618 NEXT;
7619 } else {
7620 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7622 ctxt->sax->error(ctxt->userData,
7623 "parsing XML declaration: '?>' expected\n");
7624 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007625 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007626 MOVETO_ENDTAG(CUR_PTR);
7627 NEXT;
7628 }
7629}
7630
7631/**
7632 * xmlParseMisc:
7633 * @ctxt: an XML parser context
7634 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007635 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007636 *
7637 * [27] Misc ::= Comment | PI | S
7638 */
7639
7640void
7641xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007642 while (((RAW == '<') && (NXT(1) == '?')) ||
7643 ((RAW == '<') && (NXT(1) == '!') &&
7644 (NXT(2) == '-') && (NXT(3) == '-')) ||
7645 IS_BLANK(CUR)) {
7646 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007647 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007648 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007649 NEXT;
7650 } else
7651 xmlParseComment(ctxt);
7652 }
7653}
7654
7655/**
7656 * xmlParseDocument:
7657 * @ctxt: an XML parser context
7658 *
7659 * parse an XML document (and build a tree if using the standard SAX
7660 * interface).
7661 *
7662 * [1] document ::= prolog element Misc*
7663 *
7664 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7665 *
7666 * Returns 0, -1 in case of error. the parser context is augmented
7667 * as a result of the parsing.
7668 */
7669
7670int
7671xmlParseDocument(xmlParserCtxtPtr ctxt) {
7672 xmlChar start[4];
7673 xmlCharEncoding enc;
7674
7675 xmlInitParser();
7676
7677 GROW;
7678
7679 /*
7680 * SAX: beginning of the document processing.
7681 */
7682 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7683 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7684
Daniel Veillard50f34372001-08-03 12:06:36 +00007685 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007686 /*
7687 * Get the 4 first bytes and decode the charset
7688 * if enc != XML_CHAR_ENCODING_NONE
7689 * plug some encoding conversion routines.
7690 */
7691 start[0] = RAW;
7692 start[1] = NXT(1);
7693 start[2] = NXT(2);
7694 start[3] = NXT(3);
7695 enc = xmlDetectCharEncoding(start, 4);
7696 if (enc != XML_CHAR_ENCODING_NONE) {
7697 xmlSwitchEncoding(ctxt, enc);
7698 }
Owen Taylor3473f882001-02-23 17:55:21 +00007699 }
7700
7701
7702 if (CUR == 0) {
7703 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7705 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7706 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007707 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007708 }
7709
7710 /*
7711 * Check for the XMLDecl in the Prolog.
7712 */
7713 GROW;
7714 if ((RAW == '<') && (NXT(1) == '?') &&
7715 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7716 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7717
7718 /*
7719 * Note that we will switch encoding on the fly.
7720 */
7721 xmlParseXMLDecl(ctxt);
7722 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7723 /*
7724 * The XML REC instructs us to stop parsing right here
7725 */
7726 return(-1);
7727 }
7728 ctxt->standalone = ctxt->input->standalone;
7729 SKIP_BLANKS;
7730 } else {
7731 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7732 }
7733 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7734 ctxt->sax->startDocument(ctxt->userData);
7735
7736 /*
7737 * The Misc part of the Prolog
7738 */
7739 GROW;
7740 xmlParseMisc(ctxt);
7741
7742 /*
7743 * Then possibly doc type declaration(s) and more Misc
7744 * (doctypedecl Misc*)?
7745 */
7746 GROW;
7747 if ((RAW == '<') && (NXT(1) == '!') &&
7748 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7749 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7750 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7751 (NXT(8) == 'E')) {
7752
7753 ctxt->inSubset = 1;
7754 xmlParseDocTypeDecl(ctxt);
7755 if (RAW == '[') {
7756 ctxt->instate = XML_PARSER_DTD;
7757 xmlParseInternalSubset(ctxt);
7758 }
7759
7760 /*
7761 * Create and update the external subset.
7762 */
7763 ctxt->inSubset = 2;
7764 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7765 (!ctxt->disableSAX))
7766 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7767 ctxt->extSubSystem, ctxt->extSubURI);
7768 ctxt->inSubset = 0;
7769
7770
7771 ctxt->instate = XML_PARSER_PROLOG;
7772 xmlParseMisc(ctxt);
7773 }
7774
7775 /*
7776 * Time to start parsing the tree itself
7777 */
7778 GROW;
7779 if (RAW != '<') {
7780 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7782 ctxt->sax->error(ctxt->userData,
7783 "Start tag expected, '<' not found\n");
7784 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007785 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007786 ctxt->instate = XML_PARSER_EOF;
7787 } else {
7788 ctxt->instate = XML_PARSER_CONTENT;
7789 xmlParseElement(ctxt);
7790 ctxt->instate = XML_PARSER_EPILOG;
7791
7792
7793 /*
7794 * The Misc part at the end
7795 */
7796 xmlParseMisc(ctxt);
7797
Daniel Veillard561b7f82002-03-20 21:55:57 +00007798 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007799 ctxt->errNo = XML_ERR_DOCUMENT_END;
7800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7801 ctxt->sax->error(ctxt->userData,
7802 "Extra content at the end of the document\n");
7803 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007804 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007805 }
7806 ctxt->instate = XML_PARSER_EOF;
7807 }
7808
7809 /*
7810 * SAX: end of the document processing.
7811 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007812 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007813 ctxt->sax->endDocument(ctxt->userData);
7814
Daniel Veillard5997aca2002-03-18 18:36:20 +00007815 /*
7816 * Remove locally kept entity definitions if the tree was not built
7817 */
7818 if ((ctxt->myDoc != NULL) &&
7819 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7820 xmlFreeDoc(ctxt->myDoc);
7821 ctxt->myDoc = NULL;
7822 }
7823
Daniel Veillardc7612992002-02-17 22:47:37 +00007824 if (! ctxt->wellFormed) {
7825 ctxt->valid = 0;
7826 return(-1);
7827 }
Owen Taylor3473f882001-02-23 17:55:21 +00007828 return(0);
7829}
7830
7831/**
7832 * xmlParseExtParsedEnt:
7833 * @ctxt: an XML parser context
7834 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007835 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007836 * An external general parsed entity is well-formed if it matches the
7837 * production labeled extParsedEnt.
7838 *
7839 * [78] extParsedEnt ::= TextDecl? content
7840 *
7841 * Returns 0, -1 in case of error. the parser context is augmented
7842 * as a result of the parsing.
7843 */
7844
7845int
7846xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7847 xmlChar start[4];
7848 xmlCharEncoding enc;
7849
7850 xmlDefaultSAXHandlerInit();
7851
7852 GROW;
7853
7854 /*
7855 * SAX: beginning of the document processing.
7856 */
7857 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7858 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7859
7860 /*
7861 * Get the 4 first bytes and decode the charset
7862 * if enc != XML_CHAR_ENCODING_NONE
7863 * plug some encoding conversion routines.
7864 */
7865 start[0] = RAW;
7866 start[1] = NXT(1);
7867 start[2] = NXT(2);
7868 start[3] = NXT(3);
7869 enc = xmlDetectCharEncoding(start, 4);
7870 if (enc != XML_CHAR_ENCODING_NONE) {
7871 xmlSwitchEncoding(ctxt, enc);
7872 }
7873
7874
7875 if (CUR == 0) {
7876 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7878 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7879 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007880 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007881 }
7882
7883 /*
7884 * Check for the XMLDecl in the Prolog.
7885 */
7886 GROW;
7887 if ((RAW == '<') && (NXT(1) == '?') &&
7888 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7889 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7890
7891 /*
7892 * Note that we will switch encoding on the fly.
7893 */
7894 xmlParseXMLDecl(ctxt);
7895 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7896 /*
7897 * The XML REC instructs us to stop parsing right here
7898 */
7899 return(-1);
7900 }
7901 SKIP_BLANKS;
7902 } else {
7903 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7904 }
7905 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7906 ctxt->sax->startDocument(ctxt->userData);
7907
7908 /*
7909 * Doing validity checking on chunk doesn't make sense
7910 */
7911 ctxt->instate = XML_PARSER_CONTENT;
7912 ctxt->validate = 0;
7913 ctxt->loadsubset = 0;
7914 ctxt->depth = 0;
7915
7916 xmlParseContent(ctxt);
7917
7918 if ((RAW == '<') && (NXT(1) == '/')) {
7919 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7921 ctxt->sax->error(ctxt->userData,
7922 "chunk is not well balanced\n");
7923 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007924 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007925 } else if (RAW != 0) {
7926 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7928 ctxt->sax->error(ctxt->userData,
7929 "extra content at the end of well balanced chunk\n");
7930 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007931 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007932 }
7933
7934 /*
7935 * SAX: end of the document processing.
7936 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007937 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007938 ctxt->sax->endDocument(ctxt->userData);
7939
7940 if (! ctxt->wellFormed) return(-1);
7941 return(0);
7942}
7943
7944/************************************************************************
7945 * *
7946 * Progressive parsing interfaces *
7947 * *
7948 ************************************************************************/
7949
7950/**
7951 * xmlParseLookupSequence:
7952 * @ctxt: an XML parser context
7953 * @first: the first char to lookup
7954 * @next: the next char to lookup or zero
7955 * @third: the next char to lookup or zero
7956 *
7957 * Try to find if a sequence (first, next, third) or just (first next) or
7958 * (first) is available in the input stream.
7959 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7960 * to avoid rescanning sequences of bytes, it DOES change the state of the
7961 * parser, do not use liberally.
7962 *
7963 * Returns the index to the current parsing point if the full sequence
7964 * is available, -1 otherwise.
7965 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007966static int
Owen Taylor3473f882001-02-23 17:55:21 +00007967xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7968 xmlChar next, xmlChar third) {
7969 int base, len;
7970 xmlParserInputPtr in;
7971 const xmlChar *buf;
7972
7973 in = ctxt->input;
7974 if (in == NULL) return(-1);
7975 base = in->cur - in->base;
7976 if (base < 0) return(-1);
7977 if (ctxt->checkIndex > base)
7978 base = ctxt->checkIndex;
7979 if (in->buf == NULL) {
7980 buf = in->base;
7981 len = in->length;
7982 } else {
7983 buf = in->buf->buffer->content;
7984 len = in->buf->buffer->use;
7985 }
7986 /* take into account the sequence length */
7987 if (third) len -= 2;
7988 else if (next) len --;
7989 for (;base < len;base++) {
7990 if (buf[base] == first) {
7991 if (third != 0) {
7992 if ((buf[base + 1] != next) ||
7993 (buf[base + 2] != third)) continue;
7994 } else if (next != 0) {
7995 if (buf[base + 1] != next) continue;
7996 }
7997 ctxt->checkIndex = 0;
7998#ifdef DEBUG_PUSH
7999 if (next == 0)
8000 xmlGenericError(xmlGenericErrorContext,
8001 "PP: lookup '%c' found at %d\n",
8002 first, base);
8003 else if (third == 0)
8004 xmlGenericError(xmlGenericErrorContext,
8005 "PP: lookup '%c%c' found at %d\n",
8006 first, next, base);
8007 else
8008 xmlGenericError(xmlGenericErrorContext,
8009 "PP: lookup '%c%c%c' found at %d\n",
8010 first, next, third, base);
8011#endif
8012 return(base - (in->cur - in->base));
8013 }
8014 }
8015 ctxt->checkIndex = base;
8016#ifdef DEBUG_PUSH
8017 if (next == 0)
8018 xmlGenericError(xmlGenericErrorContext,
8019 "PP: lookup '%c' failed\n", first);
8020 else if (third == 0)
8021 xmlGenericError(xmlGenericErrorContext,
8022 "PP: lookup '%c%c' failed\n", first, next);
8023 else
8024 xmlGenericError(xmlGenericErrorContext,
8025 "PP: lookup '%c%c%c' failed\n", first, next, third);
8026#endif
8027 return(-1);
8028}
8029
8030/**
8031 * xmlParseTryOrFinish:
8032 * @ctxt: an XML parser context
8033 * @terminate: last chunk indicator
8034 *
8035 * Try to progress on parsing
8036 *
8037 * Returns zero if no parsing was possible
8038 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008039static int
Owen Taylor3473f882001-02-23 17:55:21 +00008040xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8041 int ret = 0;
8042 int avail;
8043 xmlChar cur, next;
8044
8045#ifdef DEBUG_PUSH
8046 switch (ctxt->instate) {
8047 case XML_PARSER_EOF:
8048 xmlGenericError(xmlGenericErrorContext,
8049 "PP: try EOF\n"); break;
8050 case XML_PARSER_START:
8051 xmlGenericError(xmlGenericErrorContext,
8052 "PP: try START\n"); break;
8053 case XML_PARSER_MISC:
8054 xmlGenericError(xmlGenericErrorContext,
8055 "PP: try MISC\n");break;
8056 case XML_PARSER_COMMENT:
8057 xmlGenericError(xmlGenericErrorContext,
8058 "PP: try COMMENT\n");break;
8059 case XML_PARSER_PROLOG:
8060 xmlGenericError(xmlGenericErrorContext,
8061 "PP: try PROLOG\n");break;
8062 case XML_PARSER_START_TAG:
8063 xmlGenericError(xmlGenericErrorContext,
8064 "PP: try START_TAG\n");break;
8065 case XML_PARSER_CONTENT:
8066 xmlGenericError(xmlGenericErrorContext,
8067 "PP: try CONTENT\n");break;
8068 case XML_PARSER_CDATA_SECTION:
8069 xmlGenericError(xmlGenericErrorContext,
8070 "PP: try CDATA_SECTION\n");break;
8071 case XML_PARSER_END_TAG:
8072 xmlGenericError(xmlGenericErrorContext,
8073 "PP: try END_TAG\n");break;
8074 case XML_PARSER_ENTITY_DECL:
8075 xmlGenericError(xmlGenericErrorContext,
8076 "PP: try ENTITY_DECL\n");break;
8077 case XML_PARSER_ENTITY_VALUE:
8078 xmlGenericError(xmlGenericErrorContext,
8079 "PP: try ENTITY_VALUE\n");break;
8080 case XML_PARSER_ATTRIBUTE_VALUE:
8081 xmlGenericError(xmlGenericErrorContext,
8082 "PP: try ATTRIBUTE_VALUE\n");break;
8083 case XML_PARSER_DTD:
8084 xmlGenericError(xmlGenericErrorContext,
8085 "PP: try DTD\n");break;
8086 case XML_PARSER_EPILOG:
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: try EPILOG\n");break;
8089 case XML_PARSER_PI:
8090 xmlGenericError(xmlGenericErrorContext,
8091 "PP: try PI\n");break;
8092 case XML_PARSER_IGNORE:
8093 xmlGenericError(xmlGenericErrorContext,
8094 "PP: try IGNORE\n");break;
8095 }
8096#endif
8097
8098 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008099 SHRINK;
8100
Owen Taylor3473f882001-02-23 17:55:21 +00008101 /*
8102 * Pop-up of finished entities.
8103 */
8104 while ((RAW == 0) && (ctxt->inputNr > 1))
8105 xmlPopInput(ctxt);
8106
8107 if (ctxt->input ==NULL) break;
8108 if (ctxt->input->buf == NULL)
8109 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008110 else {
8111 /*
8112 * If we are operating on converted input, try to flush
8113 * remainng chars to avoid them stalling in the non-converted
8114 * buffer.
8115 */
8116 if ((ctxt->input->buf->raw != NULL) &&
8117 (ctxt->input->buf->raw->use > 0)) {
8118 int base = ctxt->input->base -
8119 ctxt->input->buf->buffer->content;
8120 int current = ctxt->input->cur - ctxt->input->base;
8121
8122 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8123 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8124 ctxt->input->cur = ctxt->input->base + current;
8125 ctxt->input->end =
8126 &ctxt->input->buf->buffer->content[
8127 ctxt->input->buf->buffer->use];
8128 }
8129 avail = ctxt->input->buf->buffer->use -
8130 (ctxt->input->cur - ctxt->input->base);
8131 }
Owen Taylor3473f882001-02-23 17:55:21 +00008132 if (avail < 1)
8133 goto done;
8134 switch (ctxt->instate) {
8135 case XML_PARSER_EOF:
8136 /*
8137 * Document parsing is done !
8138 */
8139 goto done;
8140 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008141 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8142 xmlChar start[4];
8143 xmlCharEncoding enc;
8144
8145 /*
8146 * Very first chars read from the document flow.
8147 */
8148 if (avail < 4)
8149 goto done;
8150
8151 /*
8152 * Get the 4 first bytes and decode the charset
8153 * if enc != XML_CHAR_ENCODING_NONE
8154 * plug some encoding conversion routines.
8155 */
8156 start[0] = RAW;
8157 start[1] = NXT(1);
8158 start[2] = NXT(2);
8159 start[3] = NXT(3);
8160 enc = xmlDetectCharEncoding(start, 4);
8161 if (enc != XML_CHAR_ENCODING_NONE) {
8162 xmlSwitchEncoding(ctxt, enc);
8163 }
8164 break;
8165 }
Owen Taylor3473f882001-02-23 17:55:21 +00008166
8167 cur = ctxt->input->cur[0];
8168 next = ctxt->input->cur[1];
8169 if (cur == 0) {
8170 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8171 ctxt->sax->setDocumentLocator(ctxt->userData,
8172 &xmlDefaultSAXLocator);
8173 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8175 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8176 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008177 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008178 ctxt->instate = XML_PARSER_EOF;
8179#ifdef DEBUG_PUSH
8180 xmlGenericError(xmlGenericErrorContext,
8181 "PP: entering EOF\n");
8182#endif
8183 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8184 ctxt->sax->endDocument(ctxt->userData);
8185 goto done;
8186 }
8187 if ((cur == '<') && (next == '?')) {
8188 /* PI or XML decl */
8189 if (avail < 5) return(ret);
8190 if ((!terminate) &&
8191 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8192 return(ret);
8193 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8194 ctxt->sax->setDocumentLocator(ctxt->userData,
8195 &xmlDefaultSAXLocator);
8196 if ((ctxt->input->cur[2] == 'x') &&
8197 (ctxt->input->cur[3] == 'm') &&
8198 (ctxt->input->cur[4] == 'l') &&
8199 (IS_BLANK(ctxt->input->cur[5]))) {
8200 ret += 5;
8201#ifdef DEBUG_PUSH
8202 xmlGenericError(xmlGenericErrorContext,
8203 "PP: Parsing XML Decl\n");
8204#endif
8205 xmlParseXMLDecl(ctxt);
8206 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8207 /*
8208 * The XML REC instructs us to stop parsing right
8209 * here
8210 */
8211 ctxt->instate = XML_PARSER_EOF;
8212 return(0);
8213 }
8214 ctxt->standalone = ctxt->input->standalone;
8215 if ((ctxt->encoding == NULL) &&
8216 (ctxt->input->encoding != NULL))
8217 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8218 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8219 (!ctxt->disableSAX))
8220 ctxt->sax->startDocument(ctxt->userData);
8221 ctxt->instate = XML_PARSER_MISC;
8222#ifdef DEBUG_PUSH
8223 xmlGenericError(xmlGenericErrorContext,
8224 "PP: entering MISC\n");
8225#endif
8226 } else {
8227 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8228 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8229 (!ctxt->disableSAX))
8230 ctxt->sax->startDocument(ctxt->userData);
8231 ctxt->instate = XML_PARSER_MISC;
8232#ifdef DEBUG_PUSH
8233 xmlGenericError(xmlGenericErrorContext,
8234 "PP: entering MISC\n");
8235#endif
8236 }
8237 } else {
8238 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8239 ctxt->sax->setDocumentLocator(ctxt->userData,
8240 &xmlDefaultSAXLocator);
8241 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8242 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8243 (!ctxt->disableSAX))
8244 ctxt->sax->startDocument(ctxt->userData);
8245 ctxt->instate = XML_PARSER_MISC;
8246#ifdef DEBUG_PUSH
8247 xmlGenericError(xmlGenericErrorContext,
8248 "PP: entering MISC\n");
8249#endif
8250 }
8251 break;
8252 case XML_PARSER_MISC:
8253 SKIP_BLANKS;
8254 if (ctxt->input->buf == NULL)
8255 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8256 else
8257 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8258 if (avail < 2)
8259 goto done;
8260 cur = ctxt->input->cur[0];
8261 next = ctxt->input->cur[1];
8262 if ((cur == '<') && (next == '?')) {
8263 if ((!terminate) &&
8264 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8265 goto done;
8266#ifdef DEBUG_PUSH
8267 xmlGenericError(xmlGenericErrorContext,
8268 "PP: Parsing PI\n");
8269#endif
8270 xmlParsePI(ctxt);
8271 } else if ((cur == '<') && (next == '!') &&
8272 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8273 if ((!terminate) &&
8274 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8275 goto done;
8276#ifdef DEBUG_PUSH
8277 xmlGenericError(xmlGenericErrorContext,
8278 "PP: Parsing Comment\n");
8279#endif
8280 xmlParseComment(ctxt);
8281 ctxt->instate = XML_PARSER_MISC;
8282 } else if ((cur == '<') && (next == '!') &&
8283 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8284 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8285 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8286 (ctxt->input->cur[8] == 'E')) {
8287 if ((!terminate) &&
8288 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8289 goto done;
8290#ifdef DEBUG_PUSH
8291 xmlGenericError(xmlGenericErrorContext,
8292 "PP: Parsing internal subset\n");
8293#endif
8294 ctxt->inSubset = 1;
8295 xmlParseDocTypeDecl(ctxt);
8296 if (RAW == '[') {
8297 ctxt->instate = XML_PARSER_DTD;
8298#ifdef DEBUG_PUSH
8299 xmlGenericError(xmlGenericErrorContext,
8300 "PP: entering DTD\n");
8301#endif
8302 } else {
8303 /*
8304 * Create and update the external subset.
8305 */
8306 ctxt->inSubset = 2;
8307 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8308 (ctxt->sax->externalSubset != NULL))
8309 ctxt->sax->externalSubset(ctxt->userData,
8310 ctxt->intSubName, ctxt->extSubSystem,
8311 ctxt->extSubURI);
8312 ctxt->inSubset = 0;
8313 ctxt->instate = XML_PARSER_PROLOG;
8314#ifdef DEBUG_PUSH
8315 xmlGenericError(xmlGenericErrorContext,
8316 "PP: entering PROLOG\n");
8317#endif
8318 }
8319 } else if ((cur == '<') && (next == '!') &&
8320 (avail < 9)) {
8321 goto done;
8322 } else {
8323 ctxt->instate = XML_PARSER_START_TAG;
8324#ifdef DEBUG_PUSH
8325 xmlGenericError(xmlGenericErrorContext,
8326 "PP: entering START_TAG\n");
8327#endif
8328 }
8329 break;
8330 case XML_PARSER_IGNORE:
8331 xmlGenericError(xmlGenericErrorContext,
8332 "PP: internal error, state == IGNORE");
8333 ctxt->instate = XML_PARSER_DTD;
8334#ifdef DEBUG_PUSH
8335 xmlGenericError(xmlGenericErrorContext,
8336 "PP: entering DTD\n");
8337#endif
8338 break;
8339 case XML_PARSER_PROLOG:
8340 SKIP_BLANKS;
8341 if (ctxt->input->buf == NULL)
8342 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8343 else
8344 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8345 if (avail < 2)
8346 goto done;
8347 cur = ctxt->input->cur[0];
8348 next = ctxt->input->cur[1];
8349 if ((cur == '<') && (next == '?')) {
8350 if ((!terminate) &&
8351 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8352 goto done;
8353#ifdef DEBUG_PUSH
8354 xmlGenericError(xmlGenericErrorContext,
8355 "PP: Parsing PI\n");
8356#endif
8357 xmlParsePI(ctxt);
8358 } else if ((cur == '<') && (next == '!') &&
8359 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8360 if ((!terminate) &&
8361 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8362 goto done;
8363#ifdef DEBUG_PUSH
8364 xmlGenericError(xmlGenericErrorContext,
8365 "PP: Parsing Comment\n");
8366#endif
8367 xmlParseComment(ctxt);
8368 ctxt->instate = XML_PARSER_PROLOG;
8369 } else if ((cur == '<') && (next == '!') &&
8370 (avail < 4)) {
8371 goto done;
8372 } else {
8373 ctxt->instate = XML_PARSER_START_TAG;
8374#ifdef DEBUG_PUSH
8375 xmlGenericError(xmlGenericErrorContext,
8376 "PP: entering START_TAG\n");
8377#endif
8378 }
8379 break;
8380 case XML_PARSER_EPILOG:
8381 SKIP_BLANKS;
8382 if (ctxt->input->buf == NULL)
8383 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8384 else
8385 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8386 if (avail < 2)
8387 goto done;
8388 cur = ctxt->input->cur[0];
8389 next = ctxt->input->cur[1];
8390 if ((cur == '<') && (next == '?')) {
8391 if ((!terminate) &&
8392 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8393 goto done;
8394#ifdef DEBUG_PUSH
8395 xmlGenericError(xmlGenericErrorContext,
8396 "PP: Parsing PI\n");
8397#endif
8398 xmlParsePI(ctxt);
8399 ctxt->instate = XML_PARSER_EPILOG;
8400 } else if ((cur == '<') && (next == '!') &&
8401 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8402 if ((!terminate) &&
8403 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8404 goto done;
8405#ifdef DEBUG_PUSH
8406 xmlGenericError(xmlGenericErrorContext,
8407 "PP: Parsing Comment\n");
8408#endif
8409 xmlParseComment(ctxt);
8410 ctxt->instate = XML_PARSER_EPILOG;
8411 } else if ((cur == '<') && (next == '!') &&
8412 (avail < 4)) {
8413 goto done;
8414 } else {
8415 ctxt->errNo = XML_ERR_DOCUMENT_END;
8416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8417 ctxt->sax->error(ctxt->userData,
8418 "Extra content at the end of the document\n");
8419 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008420 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008421 ctxt->instate = XML_PARSER_EOF;
8422#ifdef DEBUG_PUSH
8423 xmlGenericError(xmlGenericErrorContext,
8424 "PP: entering EOF\n");
8425#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008426 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008427 ctxt->sax->endDocument(ctxt->userData);
8428 goto done;
8429 }
8430 break;
8431 case XML_PARSER_START_TAG: {
8432 xmlChar *name, *oldname;
8433
8434 if ((avail < 2) && (ctxt->inputNr == 1))
8435 goto done;
8436 cur = ctxt->input->cur[0];
8437 if (cur != '<') {
8438 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8440 ctxt->sax->error(ctxt->userData,
8441 "Start tag expect, '<' not found\n");
8442 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008443 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008444 ctxt->instate = XML_PARSER_EOF;
8445#ifdef DEBUG_PUSH
8446 xmlGenericError(xmlGenericErrorContext,
8447 "PP: entering EOF\n");
8448#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008449 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008450 ctxt->sax->endDocument(ctxt->userData);
8451 goto done;
8452 }
8453 if ((!terminate) &&
8454 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8455 goto done;
8456 if (ctxt->spaceNr == 0)
8457 spacePush(ctxt, -1);
8458 else
8459 spacePush(ctxt, *ctxt->space);
8460 name = xmlParseStartTag(ctxt);
8461 if (name == NULL) {
8462 spacePop(ctxt);
8463 ctxt->instate = XML_PARSER_EOF;
8464#ifdef DEBUG_PUSH
8465 xmlGenericError(xmlGenericErrorContext,
8466 "PP: entering EOF\n");
8467#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008468 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008469 ctxt->sax->endDocument(ctxt->userData);
8470 goto done;
8471 }
8472 namePush(ctxt, xmlStrdup(name));
8473
8474 /*
8475 * [ VC: Root Element Type ]
8476 * The Name in the document type declaration must match
8477 * the element type of the root element.
8478 */
8479 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8480 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8481 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8482
8483 /*
8484 * Check for an Empty Element.
8485 */
8486 if ((RAW == '/') && (NXT(1) == '>')) {
8487 SKIP(2);
8488 if ((ctxt->sax != NULL) &&
8489 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8490 ctxt->sax->endElement(ctxt->userData, name);
8491 xmlFree(name);
8492 oldname = namePop(ctxt);
8493 spacePop(ctxt);
8494 if (oldname != NULL) {
8495#ifdef DEBUG_STACK
8496 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8497#endif
8498 xmlFree(oldname);
8499 }
8500 if (ctxt->name == NULL) {
8501 ctxt->instate = XML_PARSER_EPILOG;
8502#ifdef DEBUG_PUSH
8503 xmlGenericError(xmlGenericErrorContext,
8504 "PP: entering EPILOG\n");
8505#endif
8506 } else {
8507 ctxt->instate = XML_PARSER_CONTENT;
8508#ifdef DEBUG_PUSH
8509 xmlGenericError(xmlGenericErrorContext,
8510 "PP: entering CONTENT\n");
8511#endif
8512 }
8513 break;
8514 }
8515 if (RAW == '>') {
8516 NEXT;
8517 } else {
8518 ctxt->errNo = XML_ERR_GT_REQUIRED;
8519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8520 ctxt->sax->error(ctxt->userData,
8521 "Couldn't find end of Start Tag %s\n",
8522 name);
8523 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008524 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008525
8526 /*
8527 * end of parsing of this node.
8528 */
8529 nodePop(ctxt);
8530 oldname = namePop(ctxt);
8531 spacePop(ctxt);
8532 if (oldname != NULL) {
8533#ifdef DEBUG_STACK
8534 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8535#endif
8536 xmlFree(oldname);
8537 }
8538 }
8539 xmlFree(name);
8540 ctxt->instate = XML_PARSER_CONTENT;
8541#ifdef DEBUG_PUSH
8542 xmlGenericError(xmlGenericErrorContext,
8543 "PP: entering CONTENT\n");
8544#endif
8545 break;
8546 }
8547 case XML_PARSER_CONTENT: {
8548 const xmlChar *test;
8549 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008550 if ((avail < 2) && (ctxt->inputNr == 1))
8551 goto done;
8552 cur = ctxt->input->cur[0];
8553 next = ctxt->input->cur[1];
8554
8555 test = CUR_PTR;
8556 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008557 if ((cur == '<') && (next == '?')) {
8558 if ((!terminate) &&
8559 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8560 goto done;
8561#ifdef DEBUG_PUSH
8562 xmlGenericError(xmlGenericErrorContext,
8563 "PP: Parsing PI\n");
8564#endif
8565 xmlParsePI(ctxt);
8566 } else if ((cur == '<') && (next == '!') &&
8567 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8568 if ((!terminate) &&
8569 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8570 goto done;
8571#ifdef DEBUG_PUSH
8572 xmlGenericError(xmlGenericErrorContext,
8573 "PP: Parsing Comment\n");
8574#endif
8575 xmlParseComment(ctxt);
8576 ctxt->instate = XML_PARSER_CONTENT;
8577 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8578 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8579 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8580 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8581 (ctxt->input->cur[8] == '[')) {
8582 SKIP(9);
8583 ctxt->instate = XML_PARSER_CDATA_SECTION;
8584#ifdef DEBUG_PUSH
8585 xmlGenericError(xmlGenericErrorContext,
8586 "PP: entering CDATA_SECTION\n");
8587#endif
8588 break;
8589 } else if ((cur == '<') && (next == '!') &&
8590 (avail < 9)) {
8591 goto done;
8592 } else if ((cur == '<') && (next == '/')) {
8593 ctxt->instate = XML_PARSER_END_TAG;
8594#ifdef DEBUG_PUSH
8595 xmlGenericError(xmlGenericErrorContext,
8596 "PP: entering END_TAG\n");
8597#endif
8598 break;
8599 } else if (cur == '<') {
8600 ctxt->instate = XML_PARSER_START_TAG;
8601#ifdef DEBUG_PUSH
8602 xmlGenericError(xmlGenericErrorContext,
8603 "PP: entering START_TAG\n");
8604#endif
8605 break;
8606 } else if (cur == '&') {
8607 if ((!terminate) &&
8608 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8609 goto done;
8610#ifdef DEBUG_PUSH
8611 xmlGenericError(xmlGenericErrorContext,
8612 "PP: Parsing Reference\n");
8613#endif
8614 xmlParseReference(ctxt);
8615 } else {
8616 /* TODO Avoid the extra copy, handle directly !!! */
8617 /*
8618 * Goal of the following test is:
8619 * - minimize calls to the SAX 'character' callback
8620 * when they are mergeable
8621 * - handle an problem for isBlank when we only parse
8622 * a sequence of blank chars and the next one is
8623 * not available to check against '<' presence.
8624 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008625 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008626 * of the parser.
8627 */
8628 if ((ctxt->inputNr == 1) &&
8629 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8630 if ((!terminate) &&
8631 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8632 goto done;
8633 }
8634 ctxt->checkIndex = 0;
8635#ifdef DEBUG_PUSH
8636 xmlGenericError(xmlGenericErrorContext,
8637 "PP: Parsing char data\n");
8638#endif
8639 xmlParseCharData(ctxt, 0);
8640 }
8641 /*
8642 * Pop-up of finished entities.
8643 */
8644 while ((RAW == 0) && (ctxt->inputNr > 1))
8645 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008646 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008647 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8649 ctxt->sax->error(ctxt->userData,
8650 "detected an error in element content\n");
8651 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008652 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008653 ctxt->instate = XML_PARSER_EOF;
8654 break;
8655 }
8656 break;
8657 }
8658 case XML_PARSER_CDATA_SECTION: {
8659 /*
8660 * The Push mode need to have the SAX callback for
8661 * cdataBlock merge back contiguous callbacks.
8662 */
8663 int base;
8664
8665 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8666 if (base < 0) {
8667 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8668 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8669 if (ctxt->sax->cdataBlock != NULL)
8670 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8671 XML_PARSER_BIG_BUFFER_SIZE);
8672 }
8673 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8674 ctxt->checkIndex = 0;
8675 }
8676 goto done;
8677 } else {
8678 if ((ctxt->sax != NULL) && (base > 0) &&
8679 (!ctxt->disableSAX)) {
8680 if (ctxt->sax->cdataBlock != NULL)
8681 ctxt->sax->cdataBlock(ctxt->userData,
8682 ctxt->input->cur, base);
8683 }
8684 SKIP(base + 3);
8685 ctxt->checkIndex = 0;
8686 ctxt->instate = XML_PARSER_CONTENT;
8687#ifdef DEBUG_PUSH
8688 xmlGenericError(xmlGenericErrorContext,
8689 "PP: entering CONTENT\n");
8690#endif
8691 }
8692 break;
8693 }
8694 case XML_PARSER_END_TAG:
8695 if (avail < 2)
8696 goto done;
8697 if ((!terminate) &&
8698 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8699 goto done;
8700 xmlParseEndTag(ctxt);
8701 if (ctxt->name == NULL) {
8702 ctxt->instate = XML_PARSER_EPILOG;
8703#ifdef DEBUG_PUSH
8704 xmlGenericError(xmlGenericErrorContext,
8705 "PP: entering EPILOG\n");
8706#endif
8707 } else {
8708 ctxt->instate = XML_PARSER_CONTENT;
8709#ifdef DEBUG_PUSH
8710 xmlGenericError(xmlGenericErrorContext,
8711 "PP: entering CONTENT\n");
8712#endif
8713 }
8714 break;
8715 case XML_PARSER_DTD: {
8716 /*
8717 * Sorry but progressive parsing of the internal subset
8718 * is not expected to be supported. We first check that
8719 * the full content of the internal subset is available and
8720 * the parsing is launched only at that point.
8721 * Internal subset ends up with "']' S? '>'" in an unescaped
8722 * section and not in a ']]>' sequence which are conditional
8723 * sections (whoever argued to keep that crap in XML deserve
8724 * a place in hell !).
8725 */
8726 int base, i;
8727 xmlChar *buf;
8728 xmlChar quote = 0;
8729
8730 base = ctxt->input->cur - ctxt->input->base;
8731 if (base < 0) return(0);
8732 if (ctxt->checkIndex > base)
8733 base = ctxt->checkIndex;
8734 buf = ctxt->input->buf->buffer->content;
8735 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8736 base++) {
8737 if (quote != 0) {
8738 if (buf[base] == quote)
8739 quote = 0;
8740 continue;
8741 }
8742 if (buf[base] == '"') {
8743 quote = '"';
8744 continue;
8745 }
8746 if (buf[base] == '\'') {
8747 quote = '\'';
8748 continue;
8749 }
8750 if (buf[base] == ']') {
8751 if ((unsigned int) base +1 >=
8752 ctxt->input->buf->buffer->use)
8753 break;
8754 if (buf[base + 1] == ']') {
8755 /* conditional crap, skip both ']' ! */
8756 base++;
8757 continue;
8758 }
8759 for (i = 0;
8760 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8761 i++) {
8762 if (buf[base + i] == '>')
8763 goto found_end_int_subset;
8764 }
8765 break;
8766 }
8767 }
8768 /*
8769 * We didn't found the end of the Internal subset
8770 */
8771 if (quote == 0)
8772 ctxt->checkIndex = base;
8773#ifdef DEBUG_PUSH
8774 if (next == 0)
8775 xmlGenericError(xmlGenericErrorContext,
8776 "PP: lookup of int subset end filed\n");
8777#endif
8778 goto done;
8779
8780found_end_int_subset:
8781 xmlParseInternalSubset(ctxt);
8782 ctxt->inSubset = 2;
8783 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8784 (ctxt->sax->externalSubset != NULL))
8785 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8786 ctxt->extSubSystem, ctxt->extSubURI);
8787 ctxt->inSubset = 0;
8788 ctxt->instate = XML_PARSER_PROLOG;
8789 ctxt->checkIndex = 0;
8790#ifdef DEBUG_PUSH
8791 xmlGenericError(xmlGenericErrorContext,
8792 "PP: entering PROLOG\n");
8793#endif
8794 break;
8795 }
8796 case XML_PARSER_COMMENT:
8797 xmlGenericError(xmlGenericErrorContext,
8798 "PP: internal error, state == COMMENT\n");
8799 ctxt->instate = XML_PARSER_CONTENT;
8800#ifdef DEBUG_PUSH
8801 xmlGenericError(xmlGenericErrorContext,
8802 "PP: entering CONTENT\n");
8803#endif
8804 break;
8805 case XML_PARSER_PI:
8806 xmlGenericError(xmlGenericErrorContext,
8807 "PP: internal error, state == PI\n");
8808 ctxt->instate = XML_PARSER_CONTENT;
8809#ifdef DEBUG_PUSH
8810 xmlGenericError(xmlGenericErrorContext,
8811 "PP: entering CONTENT\n");
8812#endif
8813 break;
8814 case XML_PARSER_ENTITY_DECL:
8815 xmlGenericError(xmlGenericErrorContext,
8816 "PP: internal error, state == ENTITY_DECL\n");
8817 ctxt->instate = XML_PARSER_DTD;
8818#ifdef DEBUG_PUSH
8819 xmlGenericError(xmlGenericErrorContext,
8820 "PP: entering DTD\n");
8821#endif
8822 break;
8823 case XML_PARSER_ENTITY_VALUE:
8824 xmlGenericError(xmlGenericErrorContext,
8825 "PP: internal error, state == ENTITY_VALUE\n");
8826 ctxt->instate = XML_PARSER_CONTENT;
8827#ifdef DEBUG_PUSH
8828 xmlGenericError(xmlGenericErrorContext,
8829 "PP: entering DTD\n");
8830#endif
8831 break;
8832 case XML_PARSER_ATTRIBUTE_VALUE:
8833 xmlGenericError(xmlGenericErrorContext,
8834 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8835 ctxt->instate = XML_PARSER_START_TAG;
8836#ifdef DEBUG_PUSH
8837 xmlGenericError(xmlGenericErrorContext,
8838 "PP: entering START_TAG\n");
8839#endif
8840 break;
8841 case XML_PARSER_SYSTEM_LITERAL:
8842 xmlGenericError(xmlGenericErrorContext,
8843 "PP: internal error, state == SYSTEM_LITERAL\n");
8844 ctxt->instate = XML_PARSER_START_TAG;
8845#ifdef DEBUG_PUSH
8846 xmlGenericError(xmlGenericErrorContext,
8847 "PP: entering START_TAG\n");
8848#endif
8849 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008850 case XML_PARSER_PUBLIC_LITERAL:
8851 xmlGenericError(xmlGenericErrorContext,
8852 "PP: internal error, state == PUBLIC_LITERAL\n");
8853 ctxt->instate = XML_PARSER_START_TAG;
8854#ifdef DEBUG_PUSH
8855 xmlGenericError(xmlGenericErrorContext,
8856 "PP: entering START_TAG\n");
8857#endif
8858 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008859 }
8860 }
8861done:
8862#ifdef DEBUG_PUSH
8863 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8864#endif
8865 return(ret);
8866}
8867
8868/**
Owen Taylor3473f882001-02-23 17:55:21 +00008869 * xmlParseChunk:
8870 * @ctxt: an XML parser context
8871 * @chunk: an char array
8872 * @size: the size in byte of the chunk
8873 * @terminate: last chunk indicator
8874 *
8875 * Parse a Chunk of memory
8876 *
8877 * Returns zero if no error, the xmlParserErrors otherwise.
8878 */
8879int
8880xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8881 int terminate) {
8882 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8883 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8884 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8885 int cur = ctxt->input->cur - ctxt->input->base;
8886
8887 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8888 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8889 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008890 ctxt->input->end =
8891 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008892#ifdef DEBUG_PUSH
8893 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8894#endif
8895
8896 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8897 xmlParseTryOrFinish(ctxt, terminate);
8898 } else if (ctxt->instate != XML_PARSER_EOF) {
8899 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8900 xmlParserInputBufferPtr in = ctxt->input->buf;
8901 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8902 (in->raw != NULL)) {
8903 int nbchars;
8904
8905 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8906 if (nbchars < 0) {
8907 xmlGenericError(xmlGenericErrorContext,
8908 "xmlParseChunk: encoder error\n");
8909 return(XML_ERR_INVALID_ENCODING);
8910 }
8911 }
8912 }
8913 }
8914 xmlParseTryOrFinish(ctxt, terminate);
8915 if (terminate) {
8916 /*
8917 * Check for termination
8918 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00008919 int avail = 0;
8920 if (ctxt->input->buf == NULL)
8921 avail = ctxt->input->length -
8922 (ctxt->input->cur - ctxt->input->base);
8923 else
8924 avail = ctxt->input->buf->buffer->use -
8925 (ctxt->input->cur - ctxt->input->base);
8926
Owen Taylor3473f882001-02-23 17:55:21 +00008927 if ((ctxt->instate != XML_PARSER_EOF) &&
8928 (ctxt->instate != XML_PARSER_EPILOG)) {
8929 ctxt->errNo = XML_ERR_DOCUMENT_END;
8930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8931 ctxt->sax->error(ctxt->userData,
8932 "Extra content at the end of the document\n");
8933 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008934 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008935 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00008936 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
8937 ctxt->errNo = XML_ERR_DOCUMENT_END;
8938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8939 ctxt->sax->error(ctxt->userData,
8940 "Extra content at the end of the document\n");
8941 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008942 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00008943
8944 }
Owen Taylor3473f882001-02-23 17:55:21 +00008945 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008946 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008947 ctxt->sax->endDocument(ctxt->userData);
8948 }
8949 ctxt->instate = XML_PARSER_EOF;
8950 }
8951 return((xmlParserErrors) ctxt->errNo);
8952}
8953
8954/************************************************************************
8955 * *
8956 * I/O front end functions to the parser *
8957 * *
8958 ************************************************************************/
8959
8960/**
8961 * xmlStopParser:
8962 * @ctxt: an XML parser context
8963 *
8964 * Blocks further parser processing
8965 */
8966void
8967xmlStopParser(xmlParserCtxtPtr ctxt) {
8968 ctxt->instate = XML_PARSER_EOF;
8969 if (ctxt->input != NULL)
8970 ctxt->input->cur = BAD_CAST"";
8971}
8972
8973/**
8974 * xmlCreatePushParserCtxt:
8975 * @sax: a SAX handler
8976 * @user_data: The user data returned on SAX callbacks
8977 * @chunk: a pointer to an array of chars
8978 * @size: number of chars in the array
8979 * @filename: an optional file name or URI
8980 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00008981 * Create a parser context for using the XML parser in push mode.
8982 * If @buffer and @size are non-NULL, the data is used to detect
8983 * the encoding. The remaining characters will be parsed so they
8984 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00008985 * To allow content encoding detection, @size should be >= 4
8986 * The value of @filename is used for fetching external entities
8987 * and error/warning reports.
8988 *
8989 * Returns the new parser context or NULL
8990 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00008991
Owen Taylor3473f882001-02-23 17:55:21 +00008992xmlParserCtxtPtr
8993xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8994 const char *chunk, int size, const char *filename) {
8995 xmlParserCtxtPtr ctxt;
8996 xmlParserInputPtr inputStream;
8997 xmlParserInputBufferPtr buf;
8998 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8999
9000 /*
9001 * plug some encoding conversion routines
9002 */
9003 if ((chunk != NULL) && (size >= 4))
9004 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9005
9006 buf = xmlAllocParserInputBuffer(enc);
9007 if (buf == NULL) return(NULL);
9008
9009 ctxt = xmlNewParserCtxt();
9010 if (ctxt == NULL) {
9011 xmlFree(buf);
9012 return(NULL);
9013 }
9014 if (sax != NULL) {
9015 if (ctxt->sax != &xmlDefaultSAXHandler)
9016 xmlFree(ctxt->sax);
9017 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9018 if (ctxt->sax == NULL) {
9019 xmlFree(buf);
9020 xmlFree(ctxt);
9021 return(NULL);
9022 }
9023 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9024 if (user_data != NULL)
9025 ctxt->userData = user_data;
9026 }
9027 if (filename == NULL) {
9028 ctxt->directory = NULL;
9029 } else {
9030 ctxt->directory = xmlParserGetDirectory(filename);
9031 }
9032
9033 inputStream = xmlNewInputStream(ctxt);
9034 if (inputStream == NULL) {
9035 xmlFreeParserCtxt(ctxt);
9036 return(NULL);
9037 }
9038
9039 if (filename == NULL)
9040 inputStream->filename = NULL;
9041 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009042 inputStream->filename = (char *)
9043 xmlNormalizeWindowsPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009044 inputStream->buf = buf;
9045 inputStream->base = inputStream->buf->buffer->content;
9046 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009047 inputStream->end =
9048 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009049
9050 inputPush(ctxt, inputStream);
9051
9052 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9053 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009054 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9055 int cur = ctxt->input->cur - ctxt->input->base;
9056
Owen Taylor3473f882001-02-23 17:55:21 +00009057 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009058
9059 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9060 ctxt->input->cur = ctxt->input->base + cur;
9061 ctxt->input->end =
9062 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009063#ifdef DEBUG_PUSH
9064 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9065#endif
9066 }
9067
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009068 if (enc != XML_CHAR_ENCODING_NONE) {
9069 xmlSwitchEncoding(ctxt, enc);
9070 }
9071
Owen Taylor3473f882001-02-23 17:55:21 +00009072 return(ctxt);
9073}
9074
9075/**
9076 * xmlCreateIOParserCtxt:
9077 * @sax: a SAX handler
9078 * @user_data: The user data returned on SAX callbacks
9079 * @ioread: an I/O read function
9080 * @ioclose: an I/O close function
9081 * @ioctx: an I/O handler
9082 * @enc: the charset encoding if known
9083 *
9084 * Create a parser context for using the XML parser with an existing
9085 * I/O stream
9086 *
9087 * Returns the new parser context or NULL
9088 */
9089xmlParserCtxtPtr
9090xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9091 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9092 void *ioctx, xmlCharEncoding enc) {
9093 xmlParserCtxtPtr ctxt;
9094 xmlParserInputPtr inputStream;
9095 xmlParserInputBufferPtr buf;
9096
9097 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9098 if (buf == NULL) return(NULL);
9099
9100 ctxt = xmlNewParserCtxt();
9101 if (ctxt == NULL) {
9102 xmlFree(buf);
9103 return(NULL);
9104 }
9105 if (sax != NULL) {
9106 if (ctxt->sax != &xmlDefaultSAXHandler)
9107 xmlFree(ctxt->sax);
9108 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9109 if (ctxt->sax == NULL) {
9110 xmlFree(buf);
9111 xmlFree(ctxt);
9112 return(NULL);
9113 }
9114 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9115 if (user_data != NULL)
9116 ctxt->userData = user_data;
9117 }
9118
9119 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9120 if (inputStream == NULL) {
9121 xmlFreeParserCtxt(ctxt);
9122 return(NULL);
9123 }
9124 inputPush(ctxt, inputStream);
9125
9126 return(ctxt);
9127}
9128
9129/************************************************************************
9130 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009131 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009132 * *
9133 ************************************************************************/
9134
9135/**
9136 * xmlIOParseDTD:
9137 * @sax: the SAX handler block or NULL
9138 * @input: an Input Buffer
9139 * @enc: the charset encoding if known
9140 *
9141 * Load and parse a DTD
9142 *
9143 * Returns the resulting xmlDtdPtr or NULL in case of error.
9144 * @input will be freed at parsing end.
9145 */
9146
9147xmlDtdPtr
9148xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9149 xmlCharEncoding enc) {
9150 xmlDtdPtr ret = NULL;
9151 xmlParserCtxtPtr ctxt;
9152 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009153 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009154
9155 if (input == NULL)
9156 return(NULL);
9157
9158 ctxt = xmlNewParserCtxt();
9159 if (ctxt == NULL) {
9160 return(NULL);
9161 }
9162
9163 /*
9164 * Set-up the SAX context
9165 */
9166 if (sax != NULL) {
9167 if (ctxt->sax != NULL)
9168 xmlFree(ctxt->sax);
9169 ctxt->sax = sax;
9170 ctxt->userData = NULL;
9171 }
9172
9173 /*
9174 * generate a parser input from the I/O handler
9175 */
9176
9177 pinput = xmlNewIOInputStream(ctxt, input, enc);
9178 if (pinput == NULL) {
9179 if (sax != NULL) ctxt->sax = NULL;
9180 xmlFreeParserCtxt(ctxt);
9181 return(NULL);
9182 }
9183
9184 /*
9185 * plug some encoding conversion routines here.
9186 */
9187 xmlPushInput(ctxt, pinput);
9188
9189 pinput->filename = NULL;
9190 pinput->line = 1;
9191 pinput->col = 1;
9192 pinput->base = ctxt->input->cur;
9193 pinput->cur = ctxt->input->cur;
9194 pinput->free = NULL;
9195
9196 /*
9197 * let's parse that entity knowing it's an external subset.
9198 */
9199 ctxt->inSubset = 2;
9200 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9201 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9202 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009203
9204 if (enc == XML_CHAR_ENCODING_NONE) {
9205 /*
9206 * Get the 4 first bytes and decode the charset
9207 * if enc != XML_CHAR_ENCODING_NONE
9208 * plug some encoding conversion routines.
9209 */
9210 start[0] = RAW;
9211 start[1] = NXT(1);
9212 start[2] = NXT(2);
9213 start[3] = NXT(3);
9214 enc = xmlDetectCharEncoding(start, 4);
9215 if (enc != XML_CHAR_ENCODING_NONE) {
9216 xmlSwitchEncoding(ctxt, enc);
9217 }
9218 }
9219
Owen Taylor3473f882001-02-23 17:55:21 +00009220 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9221
9222 if (ctxt->myDoc != NULL) {
9223 if (ctxt->wellFormed) {
9224 ret = ctxt->myDoc->extSubset;
9225 ctxt->myDoc->extSubset = NULL;
9226 } else {
9227 ret = NULL;
9228 }
9229 xmlFreeDoc(ctxt->myDoc);
9230 ctxt->myDoc = NULL;
9231 }
9232 if (sax != NULL) ctxt->sax = NULL;
9233 xmlFreeParserCtxt(ctxt);
9234
9235 return(ret);
9236}
9237
9238/**
9239 * xmlSAXParseDTD:
9240 * @sax: the SAX handler block
9241 * @ExternalID: a NAME* containing the External ID of the DTD
9242 * @SystemID: a NAME* containing the URL to the DTD
9243 *
9244 * Load and parse an external subset.
9245 *
9246 * Returns the resulting xmlDtdPtr or NULL in case of error.
9247 */
9248
9249xmlDtdPtr
9250xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9251 const xmlChar *SystemID) {
9252 xmlDtdPtr ret = NULL;
9253 xmlParserCtxtPtr ctxt;
9254 xmlParserInputPtr input = NULL;
9255 xmlCharEncoding enc;
9256
9257 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9258
9259 ctxt = xmlNewParserCtxt();
9260 if (ctxt == NULL) {
9261 return(NULL);
9262 }
9263
9264 /*
9265 * Set-up the SAX context
9266 */
9267 if (sax != NULL) {
9268 if (ctxt->sax != NULL)
9269 xmlFree(ctxt->sax);
9270 ctxt->sax = sax;
9271 ctxt->userData = NULL;
9272 }
9273
9274 /*
9275 * Ask the Entity resolver to load the damn thing
9276 */
9277
9278 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9279 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9280 if (input == NULL) {
9281 if (sax != NULL) ctxt->sax = NULL;
9282 xmlFreeParserCtxt(ctxt);
9283 return(NULL);
9284 }
9285
9286 /*
9287 * plug some encoding conversion routines here.
9288 */
9289 xmlPushInput(ctxt, input);
9290 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9291 xmlSwitchEncoding(ctxt, enc);
9292
9293 if (input->filename == NULL)
9294 input->filename = (char *) xmlStrdup(SystemID);
9295 input->line = 1;
9296 input->col = 1;
9297 input->base = ctxt->input->cur;
9298 input->cur = ctxt->input->cur;
9299 input->free = NULL;
9300
9301 /*
9302 * let's parse that entity knowing it's an external subset.
9303 */
9304 ctxt->inSubset = 2;
9305 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9306 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9307 ExternalID, SystemID);
9308 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9309
9310 if (ctxt->myDoc != NULL) {
9311 if (ctxt->wellFormed) {
9312 ret = ctxt->myDoc->extSubset;
9313 ctxt->myDoc->extSubset = NULL;
9314 } else {
9315 ret = NULL;
9316 }
9317 xmlFreeDoc(ctxt->myDoc);
9318 ctxt->myDoc = NULL;
9319 }
9320 if (sax != NULL) ctxt->sax = NULL;
9321 xmlFreeParserCtxt(ctxt);
9322
9323 return(ret);
9324}
9325
9326/**
9327 * xmlParseDTD:
9328 * @ExternalID: a NAME* containing the External ID of the DTD
9329 * @SystemID: a NAME* containing the URL to the DTD
9330 *
9331 * Load and parse an external subset.
9332 *
9333 * Returns the resulting xmlDtdPtr or NULL in case of error.
9334 */
9335
9336xmlDtdPtr
9337xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9338 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9339}
9340
9341/************************************************************************
9342 * *
9343 * Front ends when parsing an Entity *
9344 * *
9345 ************************************************************************/
9346
9347/**
Owen Taylor3473f882001-02-23 17:55:21 +00009348 * xmlParseCtxtExternalEntity:
9349 * @ctx: the existing parsing context
9350 * @URL: the URL for the entity to load
9351 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009352 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009353 *
9354 * Parse an external general entity within an existing parsing context
9355 * An external general parsed entity is well-formed if it matches the
9356 * production labeled extParsedEnt.
9357 *
9358 * [78] extParsedEnt ::= TextDecl? content
9359 *
9360 * Returns 0 if the entity is well formed, -1 in case of args problem and
9361 * the parser error code otherwise
9362 */
9363
9364int
9365xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009366 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009367 xmlParserCtxtPtr ctxt;
9368 xmlDocPtr newDoc;
9369 xmlSAXHandlerPtr oldsax = NULL;
9370 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009371 xmlChar start[4];
9372 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009373
9374 if (ctx->depth > 40) {
9375 return(XML_ERR_ENTITY_LOOP);
9376 }
9377
Daniel Veillardcda96922001-08-21 10:56:31 +00009378 if (lst != NULL)
9379 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009380 if ((URL == NULL) && (ID == NULL))
9381 return(-1);
9382 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9383 return(-1);
9384
9385
9386 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9387 if (ctxt == NULL) return(-1);
9388 ctxt->userData = ctxt;
9389 oldsax = ctxt->sax;
9390 ctxt->sax = ctx->sax;
9391 newDoc = xmlNewDoc(BAD_CAST "1.0");
9392 if (newDoc == NULL) {
9393 xmlFreeParserCtxt(ctxt);
9394 return(-1);
9395 }
9396 if (ctx->myDoc != NULL) {
9397 newDoc->intSubset = ctx->myDoc->intSubset;
9398 newDoc->extSubset = ctx->myDoc->extSubset;
9399 }
9400 if (ctx->myDoc->URL != NULL) {
9401 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9402 }
9403 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9404 if (newDoc->children == NULL) {
9405 ctxt->sax = oldsax;
9406 xmlFreeParserCtxt(ctxt);
9407 newDoc->intSubset = NULL;
9408 newDoc->extSubset = NULL;
9409 xmlFreeDoc(newDoc);
9410 return(-1);
9411 }
9412 nodePush(ctxt, newDoc->children);
9413 if (ctx->myDoc == NULL) {
9414 ctxt->myDoc = newDoc;
9415 } else {
9416 ctxt->myDoc = ctx->myDoc;
9417 newDoc->children->doc = ctx->myDoc;
9418 }
9419
Daniel Veillard87a764e2001-06-20 17:41:10 +00009420 /*
9421 * Get the 4 first bytes and decode the charset
9422 * if enc != XML_CHAR_ENCODING_NONE
9423 * plug some encoding conversion routines.
9424 */
9425 GROW
9426 start[0] = RAW;
9427 start[1] = NXT(1);
9428 start[2] = NXT(2);
9429 start[3] = NXT(3);
9430 enc = xmlDetectCharEncoding(start, 4);
9431 if (enc != XML_CHAR_ENCODING_NONE) {
9432 xmlSwitchEncoding(ctxt, enc);
9433 }
9434
Owen Taylor3473f882001-02-23 17:55:21 +00009435 /*
9436 * Parse a possible text declaration first
9437 */
Owen Taylor3473f882001-02-23 17:55:21 +00009438 if ((RAW == '<') && (NXT(1) == '?') &&
9439 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9440 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9441 xmlParseTextDecl(ctxt);
9442 }
9443
9444 /*
9445 * Doing validity checking on chunk doesn't make sense
9446 */
9447 ctxt->instate = XML_PARSER_CONTENT;
9448 ctxt->validate = ctx->validate;
9449 ctxt->loadsubset = ctx->loadsubset;
9450 ctxt->depth = ctx->depth + 1;
9451 ctxt->replaceEntities = ctx->replaceEntities;
9452 if (ctxt->validate) {
9453 ctxt->vctxt.error = ctx->vctxt.error;
9454 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009455 } else {
9456 ctxt->vctxt.error = NULL;
9457 ctxt->vctxt.warning = NULL;
9458 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009459 ctxt->vctxt.nodeTab = NULL;
9460 ctxt->vctxt.nodeNr = 0;
9461 ctxt->vctxt.nodeMax = 0;
9462 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009463
9464 xmlParseContent(ctxt);
9465
9466 if ((RAW == '<') && (NXT(1) == '/')) {
9467 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9469 ctxt->sax->error(ctxt->userData,
9470 "chunk is not well balanced\n");
9471 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009472 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009473 } else if (RAW != 0) {
9474 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9476 ctxt->sax->error(ctxt->userData,
9477 "extra content at the end of well balanced chunk\n");
9478 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009479 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009480 }
9481 if (ctxt->node != newDoc->children) {
9482 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9484 ctxt->sax->error(ctxt->userData,
9485 "chunk is not well balanced\n");
9486 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009487 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009488 }
9489
9490 if (!ctxt->wellFormed) {
9491 if (ctxt->errNo == 0)
9492 ret = 1;
9493 else
9494 ret = ctxt->errNo;
9495 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009496 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009497 xmlNodePtr cur;
9498
9499 /*
9500 * Return the newly created nodeset after unlinking it from
9501 * they pseudo parent.
9502 */
9503 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009504 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009505 while (cur != NULL) {
9506 cur->parent = NULL;
9507 cur = cur->next;
9508 }
9509 newDoc->children->children = NULL;
9510 }
9511 ret = 0;
9512 }
9513 ctxt->sax = oldsax;
9514 xmlFreeParserCtxt(ctxt);
9515 newDoc->intSubset = NULL;
9516 newDoc->extSubset = NULL;
9517 xmlFreeDoc(newDoc);
9518
9519 return(ret);
9520}
9521
9522/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009523 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009524 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009525 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009526 * @sax: the SAX handler bloc (possibly NULL)
9527 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9528 * @depth: Used for loop detection, use 0
9529 * @URL: the URL for the entity to load
9530 * @ID: the System ID for the entity to load
9531 * @list: the return value for the set of parsed nodes
9532 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009533 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009534 *
9535 * Returns 0 if the entity is well formed, -1 in case of args problem and
9536 * the parser error code otherwise
9537 */
9538
Daniel Veillard257d9102001-05-08 10:41:44 +00009539static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009540xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9541 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009542 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009543 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009544 xmlParserCtxtPtr ctxt;
9545 xmlDocPtr newDoc;
9546 xmlSAXHandlerPtr oldsax = NULL;
9547 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009548 xmlChar start[4];
9549 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009550
9551 if (depth > 40) {
9552 return(XML_ERR_ENTITY_LOOP);
9553 }
9554
9555
9556
9557 if (list != NULL)
9558 *list = NULL;
9559 if ((URL == NULL) && (ID == NULL))
9560 return(-1);
9561 if (doc == NULL) /* @@ relax but check for dereferences */
9562 return(-1);
9563
9564
9565 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9566 if (ctxt == NULL) return(-1);
9567 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009568 if (oldctxt != NULL) {
9569 ctxt->_private = oldctxt->_private;
9570 ctxt->loadsubset = oldctxt->loadsubset;
9571 ctxt->validate = oldctxt->validate;
9572 ctxt->external = oldctxt->external;
9573 } else {
9574 /*
9575 * Doing validity checking on chunk without context
9576 * doesn't make sense
9577 */
9578 ctxt->_private = NULL;
9579 ctxt->validate = 0;
9580 ctxt->external = 2;
9581 ctxt->loadsubset = 0;
9582 }
Owen Taylor3473f882001-02-23 17:55:21 +00009583 if (sax != NULL) {
9584 oldsax = ctxt->sax;
9585 ctxt->sax = sax;
9586 if (user_data != NULL)
9587 ctxt->userData = user_data;
9588 }
9589 newDoc = xmlNewDoc(BAD_CAST "1.0");
9590 if (newDoc == NULL) {
9591 xmlFreeParserCtxt(ctxt);
9592 return(-1);
9593 }
9594 if (doc != NULL) {
9595 newDoc->intSubset = doc->intSubset;
9596 newDoc->extSubset = doc->extSubset;
9597 }
9598 if (doc->URL != NULL) {
9599 newDoc->URL = xmlStrdup(doc->URL);
9600 }
9601 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9602 if (newDoc->children == NULL) {
9603 if (sax != NULL)
9604 ctxt->sax = oldsax;
9605 xmlFreeParserCtxt(ctxt);
9606 newDoc->intSubset = NULL;
9607 newDoc->extSubset = NULL;
9608 xmlFreeDoc(newDoc);
9609 return(-1);
9610 }
9611 nodePush(ctxt, newDoc->children);
9612 if (doc == NULL) {
9613 ctxt->myDoc = newDoc;
9614 } else {
9615 ctxt->myDoc = doc;
9616 newDoc->children->doc = doc;
9617 }
9618
Daniel Veillard87a764e2001-06-20 17:41:10 +00009619 /*
9620 * Get the 4 first bytes and decode the charset
9621 * if enc != XML_CHAR_ENCODING_NONE
9622 * plug some encoding conversion routines.
9623 */
9624 GROW;
9625 start[0] = RAW;
9626 start[1] = NXT(1);
9627 start[2] = NXT(2);
9628 start[3] = NXT(3);
9629 enc = xmlDetectCharEncoding(start, 4);
9630 if (enc != XML_CHAR_ENCODING_NONE) {
9631 xmlSwitchEncoding(ctxt, enc);
9632 }
9633
Owen Taylor3473f882001-02-23 17:55:21 +00009634 /*
9635 * Parse a possible text declaration first
9636 */
Owen Taylor3473f882001-02-23 17:55:21 +00009637 if ((RAW == '<') && (NXT(1) == '?') &&
9638 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9639 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9640 xmlParseTextDecl(ctxt);
9641 }
9642
Owen Taylor3473f882001-02-23 17:55:21 +00009643 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009644 ctxt->depth = depth;
9645
9646 xmlParseContent(ctxt);
9647
Daniel Veillard561b7f82002-03-20 21:55:57 +00009648 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009649 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9651 ctxt->sax->error(ctxt->userData,
9652 "chunk is not well balanced\n");
9653 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009654 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009655 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009656 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9658 ctxt->sax->error(ctxt->userData,
9659 "extra content at the end of well balanced chunk\n");
9660 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009661 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009662 }
9663 if (ctxt->node != newDoc->children) {
9664 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9666 ctxt->sax->error(ctxt->userData,
9667 "chunk is not well balanced\n");
9668 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009669 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009670 }
9671
9672 if (!ctxt->wellFormed) {
9673 if (ctxt->errNo == 0)
9674 ret = 1;
9675 else
9676 ret = ctxt->errNo;
9677 } else {
9678 if (list != NULL) {
9679 xmlNodePtr cur;
9680
9681 /*
9682 * Return the newly created nodeset after unlinking it from
9683 * they pseudo parent.
9684 */
9685 cur = newDoc->children->children;
9686 *list = cur;
9687 while (cur != NULL) {
9688 cur->parent = NULL;
9689 cur = cur->next;
9690 }
9691 newDoc->children->children = NULL;
9692 }
9693 ret = 0;
9694 }
9695 if (sax != NULL)
9696 ctxt->sax = oldsax;
9697 xmlFreeParserCtxt(ctxt);
9698 newDoc->intSubset = NULL;
9699 newDoc->extSubset = NULL;
9700 xmlFreeDoc(newDoc);
9701
9702 return(ret);
9703}
9704
9705/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009706 * xmlParseExternalEntity:
9707 * @doc: the document the chunk pertains to
9708 * @sax: the SAX handler bloc (possibly NULL)
9709 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9710 * @depth: Used for loop detection, use 0
9711 * @URL: the URL for the entity to load
9712 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009713 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009714 *
9715 * Parse an external general entity
9716 * An external general parsed entity is well-formed if it matches the
9717 * production labeled extParsedEnt.
9718 *
9719 * [78] extParsedEnt ::= TextDecl? content
9720 *
9721 * Returns 0 if the entity is well formed, -1 in case of args problem and
9722 * the parser error code otherwise
9723 */
9724
9725int
9726xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009727 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009728 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009729 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009730}
9731
9732/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009733 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009734 * @doc: the document the chunk pertains to
9735 * @sax: the SAX handler bloc (possibly NULL)
9736 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9737 * @depth: Used for loop detection, use 0
9738 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009739 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009740 *
9741 * Parse a well-balanced chunk of an XML document
9742 * called by the parser
9743 * The allowed sequence for the Well Balanced Chunk is the one defined by
9744 * the content production in the XML grammar:
9745 *
9746 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9747 *
9748 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9749 * the parser error code otherwise
9750 */
9751
9752int
9753xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009754 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009755 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9756 depth, string, lst, 0 );
9757}
9758
9759/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009760 * xmlParseBalancedChunkMemoryInternal:
9761 * @oldctxt: the existing parsing context
9762 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9763 * @user_data: the user data field for the parser context
9764 * @lst: the return value for the set of parsed nodes
9765 *
9766 *
9767 * Parse a well-balanced chunk of an XML document
9768 * called by the parser
9769 * The allowed sequence for the Well Balanced Chunk is the one defined by
9770 * the content production in the XML grammar:
9771 *
9772 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9773 *
9774 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9775 * the parser error code otherwise
9776 *
9777 * In case recover is set to 1, the nodelist will not be empty even if
9778 * the parsed chunk is not well balanced.
9779 */
9780static int
9781xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9782 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9783 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009784 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009785 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009786 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009787 int size;
9788 int ret = 0;
9789
9790 if (oldctxt->depth > 40) {
9791 return(XML_ERR_ENTITY_LOOP);
9792 }
9793
9794
9795 if (lst != NULL)
9796 *lst = NULL;
9797 if (string == NULL)
9798 return(-1);
9799
9800 size = xmlStrlen(string);
9801
9802 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9803 if (ctxt == NULL) return(-1);
9804 if (user_data != NULL)
9805 ctxt->userData = user_data;
9806 else
9807 ctxt->userData = ctxt;
9808
9809 oldsax = ctxt->sax;
9810 ctxt->sax = oldctxt->sax;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009811 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009812 newDoc = xmlNewDoc(BAD_CAST "1.0");
9813 if (newDoc == NULL) {
9814 ctxt->sax = oldsax;
9815 xmlFreeParserCtxt(ctxt);
9816 return(-1);
9817 }
Daniel Veillard328f48c2002-11-15 15:24:34 +00009818 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009819 } else {
9820 ctxt->myDoc = oldctxt->myDoc;
9821 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009822 }
Daniel Veillard9bc53102002-11-25 13:20:04 +00009823 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +00009824 BAD_CAST "pseudoroot", NULL);
9825 if (ctxt->myDoc->children == NULL) {
9826 ctxt->sax = oldsax;
9827 xmlFreeParserCtxt(ctxt);
9828 if (newDoc != NULL)
9829 xmlFreeDoc(newDoc);
9830 return(-1);
9831 }
9832 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009833 ctxt->instate = XML_PARSER_CONTENT;
9834 ctxt->depth = oldctxt->depth + 1;
9835
9836 /*
9837 * Doing validity checking on chunk doesn't make sense
9838 */
9839 ctxt->validate = 0;
9840 ctxt->loadsubset = oldctxt->loadsubset;
9841
Daniel Veillard68e9e742002-11-16 15:35:11 +00009842 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009843 if ((RAW == '<') && (NXT(1) == '/')) {
9844 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9846 ctxt->sax->error(ctxt->userData,
9847 "chunk is not well balanced\n");
9848 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009849 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009850 } else if (RAW != 0) {
9851 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9852 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9853 ctxt->sax->error(ctxt->userData,
9854 "extra content at the end of well balanced chunk\n");
9855 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009856 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009857 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009858 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +00009859 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9861 ctxt->sax->error(ctxt->userData,
9862 "chunk is not well balanced\n");
9863 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009864 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009865 }
9866
9867 if (!ctxt->wellFormed) {
9868 if (ctxt->errNo == 0)
9869 ret = 1;
9870 else
9871 ret = ctxt->errNo;
9872 } else {
9873 ret = 0;
9874 }
9875
9876 if ((lst != NULL) && (ret == 0)) {
9877 xmlNodePtr cur;
9878
9879 /*
9880 * Return the newly created nodeset after unlinking it from
9881 * they pseudo parent.
9882 */
Daniel Veillard68e9e742002-11-16 15:35:11 +00009883 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009884 *lst = cur;
9885 while (cur != NULL) {
9886 cur->parent = NULL;
9887 cur = cur->next;
9888 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009889 ctxt->myDoc->children->children = NULL;
9890 }
9891 if (ctxt->myDoc != NULL) {
9892 xmlFreeNode(ctxt->myDoc->children);
9893 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009894 }
9895
9896 ctxt->sax = oldsax;
9897 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +00009898 if (newDoc != NULL)
9899 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009900
9901 return(ret);
9902}
9903
9904/**
Daniel Veillard58e44c92002-08-02 22:19:49 +00009905 * xmlParseBalancedChunkMemoryRecover:
9906 * @doc: the document the chunk pertains to
9907 * @sax: the SAX handler bloc (possibly NULL)
9908 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9909 * @depth: Used for loop detection, use 0
9910 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9911 * @lst: the return value for the set of parsed nodes
9912 * @recover: return nodes even if the data is broken (use 0)
9913 *
9914 *
9915 * Parse a well-balanced chunk of an XML document
9916 * called by the parser
9917 * The allowed sequence for the Well Balanced Chunk is the one defined by
9918 * the content production in the XML grammar:
9919 *
9920 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9921 *
9922 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9923 * the parser error code otherwise
9924 *
9925 * In case recover is set to 1, the nodelist will not be empty even if
9926 * the parsed chunk is not well balanced.
9927 */
9928int
9929xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9930 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
9931 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +00009932 xmlParserCtxtPtr ctxt;
9933 xmlDocPtr newDoc;
9934 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +00009935 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +00009936 int size;
9937 int ret = 0;
9938
9939 if (depth > 40) {
9940 return(XML_ERR_ENTITY_LOOP);
9941 }
9942
9943
Daniel Veillardcda96922001-08-21 10:56:31 +00009944 if (lst != NULL)
9945 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009946 if (string == NULL)
9947 return(-1);
9948
9949 size = xmlStrlen(string);
9950
9951 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9952 if (ctxt == NULL) return(-1);
9953 ctxt->userData = ctxt;
9954 if (sax != NULL) {
9955 oldsax = ctxt->sax;
9956 ctxt->sax = sax;
9957 if (user_data != NULL)
9958 ctxt->userData = user_data;
9959 }
9960 newDoc = xmlNewDoc(BAD_CAST "1.0");
9961 if (newDoc == NULL) {
9962 xmlFreeParserCtxt(ctxt);
9963 return(-1);
9964 }
9965 if (doc != NULL) {
9966 newDoc->intSubset = doc->intSubset;
9967 newDoc->extSubset = doc->extSubset;
9968 }
9969 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9970 if (newDoc->children == NULL) {
9971 if (sax != NULL)
9972 ctxt->sax = oldsax;
9973 xmlFreeParserCtxt(ctxt);
9974 newDoc->intSubset = NULL;
9975 newDoc->extSubset = NULL;
9976 xmlFreeDoc(newDoc);
9977 return(-1);
9978 }
9979 nodePush(ctxt, newDoc->children);
9980 if (doc == NULL) {
9981 ctxt->myDoc = newDoc;
9982 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +00009983 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +00009984 newDoc->children->doc = doc;
9985 }
9986 ctxt->instate = XML_PARSER_CONTENT;
9987 ctxt->depth = depth;
9988
9989 /*
9990 * Doing validity checking on chunk doesn't make sense
9991 */
9992 ctxt->validate = 0;
9993 ctxt->loadsubset = 0;
9994
Daniel Veillardb39bc392002-10-26 19:29:51 +00009995 if ( doc != NULL ){
9996 content = doc->children;
9997 doc->children = NULL;
9998 xmlParseContent(ctxt);
9999 doc->children = content;
10000 }
10001 else {
10002 xmlParseContent(ctxt);
10003 }
Owen Taylor3473f882001-02-23 17:55:21 +000010004 if ((RAW == '<') && (NXT(1) == '/')) {
10005 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10007 ctxt->sax->error(ctxt->userData,
10008 "chunk is not well balanced\n");
10009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010011 } else if (RAW != 0) {
10012 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10014 ctxt->sax->error(ctxt->userData,
10015 "extra content at the end of well balanced chunk\n");
10016 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010017 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010018 }
10019 if (ctxt->node != newDoc->children) {
10020 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10022 ctxt->sax->error(ctxt->userData,
10023 "chunk is not well balanced\n");
10024 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010026 }
10027
10028 if (!ctxt->wellFormed) {
10029 if (ctxt->errNo == 0)
10030 ret = 1;
10031 else
10032 ret = ctxt->errNo;
10033 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010034 ret = 0;
10035 }
10036
10037 if (lst != NULL && (ret == 0 || recover == 1)) {
10038 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010039
10040 /*
10041 * Return the newly created nodeset after unlinking it from
10042 * they pseudo parent.
10043 */
10044 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010045 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010046 while (cur != NULL) {
10047 cur->parent = NULL;
10048 cur = cur->next;
10049 }
10050 newDoc->children->children = NULL;
10051 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010052
Owen Taylor3473f882001-02-23 17:55:21 +000010053 if (sax != NULL)
10054 ctxt->sax = oldsax;
10055 xmlFreeParserCtxt(ctxt);
10056 newDoc->intSubset = NULL;
10057 newDoc->extSubset = NULL;
10058 xmlFreeDoc(newDoc);
10059
10060 return(ret);
10061}
10062
10063/**
10064 * xmlSAXParseEntity:
10065 * @sax: the SAX handler block
10066 * @filename: the filename
10067 *
10068 * parse an XML external entity out of context and build a tree.
10069 * It use the given SAX function block to handle the parsing callback.
10070 * If sax is NULL, fallback to the default DOM tree building routines.
10071 *
10072 * [78] extParsedEnt ::= TextDecl? content
10073 *
10074 * This correspond to a "Well Balanced" chunk
10075 *
10076 * Returns the resulting document tree
10077 */
10078
10079xmlDocPtr
10080xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10081 xmlDocPtr ret;
10082 xmlParserCtxtPtr ctxt;
10083 char *directory = NULL;
10084
10085 ctxt = xmlCreateFileParserCtxt(filename);
10086 if (ctxt == NULL) {
10087 return(NULL);
10088 }
10089 if (sax != NULL) {
10090 if (ctxt->sax != NULL)
10091 xmlFree(ctxt->sax);
10092 ctxt->sax = sax;
10093 ctxt->userData = NULL;
10094 }
10095
10096 if ((ctxt->directory == NULL) && (directory == NULL))
10097 directory = xmlParserGetDirectory(filename);
10098
10099 xmlParseExtParsedEnt(ctxt);
10100
10101 if (ctxt->wellFormed)
10102 ret = ctxt->myDoc;
10103 else {
10104 ret = NULL;
10105 xmlFreeDoc(ctxt->myDoc);
10106 ctxt->myDoc = NULL;
10107 }
10108 if (sax != NULL)
10109 ctxt->sax = NULL;
10110 xmlFreeParserCtxt(ctxt);
10111
10112 return(ret);
10113}
10114
10115/**
10116 * xmlParseEntity:
10117 * @filename: the filename
10118 *
10119 * parse an XML external entity out of context and build a tree.
10120 *
10121 * [78] extParsedEnt ::= TextDecl? content
10122 *
10123 * This correspond to a "Well Balanced" chunk
10124 *
10125 * Returns the resulting document tree
10126 */
10127
10128xmlDocPtr
10129xmlParseEntity(const char *filename) {
10130 return(xmlSAXParseEntity(NULL, filename));
10131}
10132
10133/**
10134 * xmlCreateEntityParserCtxt:
10135 * @URL: the entity URL
10136 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010137 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010138 *
10139 * Create a parser context for an external entity
10140 * Automatic support for ZLIB/Compress compressed document is provided
10141 * by default if found at compile-time.
10142 *
10143 * Returns the new parser context or NULL
10144 */
10145xmlParserCtxtPtr
10146xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10147 const xmlChar *base) {
10148 xmlParserCtxtPtr ctxt;
10149 xmlParserInputPtr inputStream;
10150 char *directory = NULL;
10151 xmlChar *uri;
10152
10153 ctxt = xmlNewParserCtxt();
10154 if (ctxt == NULL) {
10155 return(NULL);
10156 }
10157
10158 uri = xmlBuildURI(URL, base);
10159
10160 if (uri == NULL) {
10161 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10162 if (inputStream == NULL) {
10163 xmlFreeParserCtxt(ctxt);
10164 return(NULL);
10165 }
10166
10167 inputPush(ctxt, inputStream);
10168
10169 if ((ctxt->directory == NULL) && (directory == NULL))
10170 directory = xmlParserGetDirectory((char *)URL);
10171 if ((ctxt->directory == NULL) && (directory != NULL))
10172 ctxt->directory = directory;
10173 } else {
10174 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10175 if (inputStream == NULL) {
10176 xmlFree(uri);
10177 xmlFreeParserCtxt(ctxt);
10178 return(NULL);
10179 }
10180
10181 inputPush(ctxt, inputStream);
10182
10183 if ((ctxt->directory == NULL) && (directory == NULL))
10184 directory = xmlParserGetDirectory((char *)uri);
10185 if ((ctxt->directory == NULL) && (directory != NULL))
10186 ctxt->directory = directory;
10187 xmlFree(uri);
10188 }
10189
10190 return(ctxt);
10191}
10192
10193/************************************************************************
10194 * *
10195 * Front ends when parsing from a file *
10196 * *
10197 ************************************************************************/
10198
10199/**
10200 * xmlCreateFileParserCtxt:
10201 * @filename: the filename
10202 *
10203 * Create a parser context for a file content.
10204 * Automatic support for ZLIB/Compress compressed document is provided
10205 * by default if found at compile-time.
10206 *
10207 * Returns the new parser context or NULL
10208 */
10209xmlParserCtxtPtr
10210xmlCreateFileParserCtxt(const char *filename)
10211{
10212 xmlParserCtxtPtr ctxt;
10213 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010214 char *directory = NULL;
Daniel Veillardf4862f02002-09-10 11:13:43 +000010215 xmlChar *normalized;
Owen Taylor3473f882001-02-23 17:55:21 +000010216
Owen Taylor3473f882001-02-23 17:55:21 +000010217 ctxt = xmlNewParserCtxt();
10218 if (ctxt == NULL) {
10219 if (xmlDefaultSAXHandler.error != NULL) {
10220 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10221 }
10222 return(NULL);
10223 }
10224
Daniel Veillardf4862f02002-09-10 11:13:43 +000010225 normalized = xmlNormalizeWindowsPath((const xmlChar *) filename);
10226 if (normalized == NULL) {
10227 xmlFreeParserCtxt(ctxt);
10228 return(NULL);
10229 }
10230 inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010231 if (inputStream == NULL) {
10232 xmlFreeParserCtxt(ctxt);
Daniel Veillardf4862f02002-09-10 11:13:43 +000010233 xmlFree(normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010234 return(NULL);
10235 }
10236
Owen Taylor3473f882001-02-23 17:55:21 +000010237 inputPush(ctxt, inputStream);
10238 if ((ctxt->directory == NULL) && (directory == NULL))
Daniel Veillardf4862f02002-09-10 11:13:43 +000010239 directory = xmlParserGetDirectory((char *) normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010240 if ((ctxt->directory == NULL) && (directory != NULL))
10241 ctxt->directory = directory;
10242
Daniel Veillardf4862f02002-09-10 11:13:43 +000010243 xmlFree(normalized);
10244
Owen Taylor3473f882001-02-23 17:55:21 +000010245 return(ctxt);
10246}
10247
10248/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010249 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010250 * @sax: the SAX handler block
10251 * @filename: the filename
10252 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10253 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010254 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010255 *
10256 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10257 * compressed document is provided by default if found at compile-time.
10258 * It use the given SAX function block to handle the parsing callback.
10259 * If sax is NULL, fallback to the default DOM tree building routines.
10260 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010261 * User data (void *) is stored within the parser context in the
10262 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010263 *
Owen Taylor3473f882001-02-23 17:55:21 +000010264 * Returns the resulting document tree
10265 */
10266
10267xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010268xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10269 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010270 xmlDocPtr ret;
10271 xmlParserCtxtPtr ctxt;
10272 char *directory = NULL;
10273
Daniel Veillard635ef722001-10-29 11:48:19 +000010274 xmlInitParser();
10275
Owen Taylor3473f882001-02-23 17:55:21 +000010276 ctxt = xmlCreateFileParserCtxt(filename);
10277 if (ctxt == NULL) {
10278 return(NULL);
10279 }
10280 if (sax != NULL) {
10281 if (ctxt->sax != NULL)
10282 xmlFree(ctxt->sax);
10283 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010284 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010285 if (data!=NULL) {
10286 ctxt->_private=data;
10287 }
Owen Taylor3473f882001-02-23 17:55:21 +000010288
10289 if ((ctxt->directory == NULL) && (directory == NULL))
10290 directory = xmlParserGetDirectory(filename);
10291 if ((ctxt->directory == NULL) && (directory != NULL))
10292 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10293
Daniel Veillarddad3f682002-11-17 16:47:27 +000010294 ctxt->recovery = recovery;
10295
Owen Taylor3473f882001-02-23 17:55:21 +000010296 xmlParseDocument(ctxt);
10297
10298 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10299 else {
10300 ret = NULL;
10301 xmlFreeDoc(ctxt->myDoc);
10302 ctxt->myDoc = NULL;
10303 }
10304 if (sax != NULL)
10305 ctxt->sax = NULL;
10306 xmlFreeParserCtxt(ctxt);
10307
10308 return(ret);
10309}
10310
10311/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010312 * xmlSAXParseFile:
10313 * @sax: the SAX handler block
10314 * @filename: the filename
10315 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10316 * documents
10317 *
10318 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10319 * compressed document is provided by default if found at compile-time.
10320 * It use the given SAX function block to handle the parsing callback.
10321 * If sax is NULL, fallback to the default DOM tree building routines.
10322 *
10323 * Returns the resulting document tree
10324 */
10325
10326xmlDocPtr
10327xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10328 int recovery) {
10329 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10330}
10331
10332/**
Owen Taylor3473f882001-02-23 17:55:21 +000010333 * xmlRecoverDoc:
10334 * @cur: a pointer to an array of xmlChar
10335 *
10336 * parse an XML in-memory document and build a tree.
10337 * In the case the document is not Well Formed, a tree is built anyway
10338 *
10339 * Returns the resulting document tree
10340 */
10341
10342xmlDocPtr
10343xmlRecoverDoc(xmlChar *cur) {
10344 return(xmlSAXParseDoc(NULL, cur, 1));
10345}
10346
10347/**
10348 * xmlParseFile:
10349 * @filename: the filename
10350 *
10351 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10352 * compressed document is provided by default if found at compile-time.
10353 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010354 * Returns the resulting document tree if the file was wellformed,
10355 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010356 */
10357
10358xmlDocPtr
10359xmlParseFile(const char *filename) {
10360 return(xmlSAXParseFile(NULL, filename, 0));
10361}
10362
10363/**
10364 * xmlRecoverFile:
10365 * @filename: the filename
10366 *
10367 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10368 * compressed document is provided by default if found at compile-time.
10369 * In the case the document is not Well Formed, a tree is built anyway
10370 *
10371 * Returns the resulting document tree
10372 */
10373
10374xmlDocPtr
10375xmlRecoverFile(const char *filename) {
10376 return(xmlSAXParseFile(NULL, filename, 1));
10377}
10378
10379
10380/**
10381 * xmlSetupParserForBuffer:
10382 * @ctxt: an XML parser context
10383 * @buffer: a xmlChar * buffer
10384 * @filename: a file name
10385 *
10386 * Setup the parser context to parse a new buffer; Clears any prior
10387 * contents from the parser context. The buffer parameter must not be
10388 * NULL, but the filename parameter can be
10389 */
10390void
10391xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10392 const char* filename)
10393{
10394 xmlParserInputPtr input;
10395
10396 input = xmlNewInputStream(ctxt);
10397 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010398 xmlGenericError(xmlGenericErrorContext,
10399 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010400 xmlFree(ctxt);
10401 return;
10402 }
10403
10404 xmlClearParserCtxt(ctxt);
10405 if (filename != NULL)
10406 input->filename = xmlMemStrdup(filename);
10407 input->base = buffer;
10408 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010409 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010410 inputPush(ctxt, input);
10411}
10412
10413/**
10414 * xmlSAXUserParseFile:
10415 * @sax: a SAX handler
10416 * @user_data: The user data returned on SAX callbacks
10417 * @filename: a file name
10418 *
10419 * parse an XML file and call the given SAX handler routines.
10420 * Automatic support for ZLIB/Compress compressed document is provided
10421 *
10422 * Returns 0 in case of success or a error number otherwise
10423 */
10424int
10425xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10426 const char *filename) {
10427 int ret = 0;
10428 xmlParserCtxtPtr ctxt;
10429
10430 ctxt = xmlCreateFileParserCtxt(filename);
10431 if (ctxt == NULL) return -1;
10432 if (ctxt->sax != &xmlDefaultSAXHandler)
10433 xmlFree(ctxt->sax);
10434 ctxt->sax = sax;
10435 if (user_data != NULL)
10436 ctxt->userData = user_data;
10437
10438 xmlParseDocument(ctxt);
10439
10440 if (ctxt->wellFormed)
10441 ret = 0;
10442 else {
10443 if (ctxt->errNo != 0)
10444 ret = ctxt->errNo;
10445 else
10446 ret = -1;
10447 }
10448 if (sax != NULL)
10449 ctxt->sax = NULL;
10450 xmlFreeParserCtxt(ctxt);
10451
10452 return ret;
10453}
10454
10455/************************************************************************
10456 * *
10457 * Front ends when parsing from memory *
10458 * *
10459 ************************************************************************/
10460
10461/**
10462 * xmlCreateMemoryParserCtxt:
10463 * @buffer: a pointer to a char array
10464 * @size: the size of the array
10465 *
10466 * Create a parser context for an XML in-memory document.
10467 *
10468 * Returns the new parser context or NULL
10469 */
10470xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010471xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010472 xmlParserCtxtPtr ctxt;
10473 xmlParserInputPtr input;
10474 xmlParserInputBufferPtr buf;
10475
10476 if (buffer == NULL)
10477 return(NULL);
10478 if (size <= 0)
10479 return(NULL);
10480
10481 ctxt = xmlNewParserCtxt();
10482 if (ctxt == NULL)
10483 return(NULL);
10484
10485 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010486 if (buf == NULL) {
10487 xmlFreeParserCtxt(ctxt);
10488 return(NULL);
10489 }
Owen Taylor3473f882001-02-23 17:55:21 +000010490
10491 input = xmlNewInputStream(ctxt);
10492 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010493 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010494 xmlFreeParserCtxt(ctxt);
10495 return(NULL);
10496 }
10497
10498 input->filename = NULL;
10499 input->buf = buf;
10500 input->base = input->buf->buffer->content;
10501 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010502 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010503
10504 inputPush(ctxt, input);
10505 return(ctxt);
10506}
10507
10508/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010509 * xmlSAXParseMemoryWithData:
10510 * @sax: the SAX handler block
10511 * @buffer: an pointer to a char array
10512 * @size: the size of the array
10513 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10514 * documents
10515 * @data: the userdata
10516 *
10517 * parse an XML in-memory block and use the given SAX function block
10518 * to handle the parsing callback. If sax is NULL, fallback to the default
10519 * DOM tree building routines.
10520 *
10521 * User data (void *) is stored within the parser context in the
10522 * context's _private member, so it is available nearly everywhere in libxml
10523 *
10524 * Returns the resulting document tree
10525 */
10526
10527xmlDocPtr
10528xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10529 int size, int recovery, void *data) {
10530 xmlDocPtr ret;
10531 xmlParserCtxtPtr ctxt;
10532
10533 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10534 if (ctxt == NULL) return(NULL);
10535 if (sax != NULL) {
10536 if (ctxt->sax != NULL)
10537 xmlFree(ctxt->sax);
10538 ctxt->sax = sax;
10539 }
10540 if (data!=NULL) {
10541 ctxt->_private=data;
10542 }
10543
10544 xmlParseDocument(ctxt);
10545
10546 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10547 else {
10548 ret = NULL;
10549 xmlFreeDoc(ctxt->myDoc);
10550 ctxt->myDoc = NULL;
10551 }
10552 if (sax != NULL)
10553 ctxt->sax = NULL;
10554 xmlFreeParserCtxt(ctxt);
10555
10556 return(ret);
10557}
10558
10559/**
Owen Taylor3473f882001-02-23 17:55:21 +000010560 * xmlSAXParseMemory:
10561 * @sax: the SAX handler block
10562 * @buffer: an pointer to a char array
10563 * @size: the size of the array
10564 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10565 * documents
10566 *
10567 * parse an XML in-memory block and use the given SAX function block
10568 * to handle the parsing callback. If sax is NULL, fallback to the default
10569 * DOM tree building routines.
10570 *
10571 * Returns the resulting document tree
10572 */
10573xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010574xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10575 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010576 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010577}
10578
10579/**
10580 * xmlParseMemory:
10581 * @buffer: an pointer to a char array
10582 * @size: the size of the array
10583 *
10584 * parse an XML in-memory block and build a tree.
10585 *
10586 * Returns the resulting document tree
10587 */
10588
Daniel Veillard50822cb2001-07-26 20:05:51 +000010589xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010590 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10591}
10592
10593/**
10594 * xmlRecoverMemory:
10595 * @buffer: an pointer to a char array
10596 * @size: the size of the array
10597 *
10598 * parse an XML in-memory block and build a tree.
10599 * In the case the document is not Well Formed, a tree is built anyway
10600 *
10601 * Returns the resulting document tree
10602 */
10603
Daniel Veillard50822cb2001-07-26 20:05:51 +000010604xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010605 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10606}
10607
10608/**
10609 * xmlSAXUserParseMemory:
10610 * @sax: a SAX handler
10611 * @user_data: The user data returned on SAX callbacks
10612 * @buffer: an in-memory XML document input
10613 * @size: the length of the XML document in bytes
10614 *
10615 * A better SAX parsing routine.
10616 * parse an XML in-memory buffer and call the given SAX handler routines.
10617 *
10618 * Returns 0 in case of success or a error number otherwise
10619 */
10620int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010621 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010622 int ret = 0;
10623 xmlParserCtxtPtr ctxt;
10624 xmlSAXHandlerPtr oldsax = NULL;
10625
Daniel Veillard9e923512002-08-14 08:48:52 +000010626 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010627 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10628 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010629 oldsax = ctxt->sax;
10630 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010631 if (user_data != NULL)
10632 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010633
10634 xmlParseDocument(ctxt);
10635
10636 if (ctxt->wellFormed)
10637 ret = 0;
10638 else {
10639 if (ctxt->errNo != 0)
10640 ret = ctxt->errNo;
10641 else
10642 ret = -1;
10643 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010644 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010645 xmlFreeParserCtxt(ctxt);
10646
10647 return ret;
10648}
10649
10650/**
10651 * xmlCreateDocParserCtxt:
10652 * @cur: a pointer to an array of xmlChar
10653 *
10654 * Creates a parser context for an XML in-memory document.
10655 *
10656 * Returns the new parser context or NULL
10657 */
10658xmlParserCtxtPtr
10659xmlCreateDocParserCtxt(xmlChar *cur) {
10660 int len;
10661
10662 if (cur == NULL)
10663 return(NULL);
10664 len = xmlStrlen(cur);
10665 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10666}
10667
10668/**
10669 * xmlSAXParseDoc:
10670 * @sax: the SAX handler block
10671 * @cur: a pointer to an array of xmlChar
10672 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10673 * documents
10674 *
10675 * parse an XML in-memory document and build a tree.
10676 * It use the given SAX function block to handle the parsing callback.
10677 * If sax is NULL, fallback to the default DOM tree building routines.
10678 *
10679 * Returns the resulting document tree
10680 */
10681
10682xmlDocPtr
10683xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10684 xmlDocPtr ret;
10685 xmlParserCtxtPtr ctxt;
10686
10687 if (cur == NULL) return(NULL);
10688
10689
10690 ctxt = xmlCreateDocParserCtxt(cur);
10691 if (ctxt == NULL) return(NULL);
10692 if (sax != NULL) {
10693 ctxt->sax = sax;
10694 ctxt->userData = NULL;
10695 }
10696
10697 xmlParseDocument(ctxt);
10698 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10699 else {
10700 ret = NULL;
10701 xmlFreeDoc(ctxt->myDoc);
10702 ctxt->myDoc = NULL;
10703 }
10704 if (sax != NULL)
10705 ctxt->sax = NULL;
10706 xmlFreeParserCtxt(ctxt);
10707
10708 return(ret);
10709}
10710
10711/**
10712 * xmlParseDoc:
10713 * @cur: a pointer to an array of xmlChar
10714 *
10715 * parse an XML in-memory document and build a tree.
10716 *
10717 * Returns the resulting document tree
10718 */
10719
10720xmlDocPtr
10721xmlParseDoc(xmlChar *cur) {
10722 return(xmlSAXParseDoc(NULL, cur, 0));
10723}
10724
Daniel Veillard8107a222002-01-13 14:10:10 +000010725/************************************************************************
10726 * *
10727 * Specific function to keep track of entities references *
10728 * and used by the XSLT debugger *
10729 * *
10730 ************************************************************************/
10731
10732static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10733
10734/**
10735 * xmlAddEntityReference:
10736 * @ent : A valid entity
10737 * @firstNode : A valid first node for children of entity
10738 * @lastNode : A valid last node of children entity
10739 *
10740 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10741 */
10742static void
10743xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10744 xmlNodePtr lastNode)
10745{
10746 if (xmlEntityRefFunc != NULL) {
10747 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10748 }
10749}
10750
10751
10752/**
10753 * xmlSetEntityReferenceFunc:
10754 * @func : A valid function
10755 *
10756 * Set the function to call call back when a xml reference has been made
10757 */
10758void
10759xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10760{
10761 xmlEntityRefFunc = func;
10762}
Owen Taylor3473f882001-02-23 17:55:21 +000010763
10764/************************************************************************
10765 * *
10766 * Miscellaneous *
10767 * *
10768 ************************************************************************/
10769
10770#ifdef LIBXML_XPATH_ENABLED
10771#include <libxml/xpath.h>
10772#endif
10773
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010774extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010775static int xmlParserInitialized = 0;
10776
10777/**
10778 * xmlInitParser:
10779 *
10780 * Initialization function for the XML parser.
10781 * This is not reentrant. Call once before processing in case of
10782 * use in multithreaded programs.
10783 */
10784
10785void
10786xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010787 if (xmlParserInitialized != 0)
10788 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010789
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010790 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10791 (xmlGenericError == NULL))
10792 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010793 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010794 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010795 xmlInitCharEncodingHandlers();
10796 xmlInitializePredefinedEntities();
10797 xmlDefaultSAXHandlerInit();
10798 xmlRegisterDefaultInputCallbacks();
10799 xmlRegisterDefaultOutputCallbacks();
10800#ifdef LIBXML_HTML_ENABLED
10801 htmlInitAutoClose();
10802 htmlDefaultSAXHandlerInit();
10803#endif
10804#ifdef LIBXML_XPATH_ENABLED
10805 xmlXPathInit();
10806#endif
10807 xmlParserInitialized = 1;
10808}
10809
10810/**
10811 * xmlCleanupParser:
10812 *
10813 * Cleanup function for the XML parser. It tries to reclaim all
10814 * parsing related global memory allocated for the parser processing.
10815 * It doesn't deallocate any document related memory. Calling this
10816 * function should not prevent reusing the parser.
10817 */
10818
10819void
10820xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010821 xmlCleanupCharEncodingHandlers();
10822 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010823#ifdef LIBXML_CATALOG_ENABLED
10824 xmlCatalogCleanup();
10825#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010826 xmlCleanupThreads();
10827 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010828}