blob: ca56c1d8cc74b2b2ba1ffa63f4fd0322919851fb [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Daniel Veillard328f48c2002-11-15 15:24:34 +0000108static int
109xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
110 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000111/************************************************************************
112 * *
113 * Parser stacks related functions and macros *
114 * *
115 ************************************************************************/
116
117xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
118 const xmlChar ** str);
119
120/*
121 * Generic function for accessing stacks in the Parser Context
122 */
123
124#define PUSH_AND_POP(scope, type, name) \
125scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
126 if (ctxt->name##Nr >= ctxt->name##Max) { \
127 ctxt->name##Max *= 2; \
128 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
129 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
130 if (ctxt->name##Tab == NULL) { \
131 xmlGenericError(xmlGenericErrorContext, \
132 "realloc failed !\n"); \
133 return(0); \
134 } \
135 } \
136 ctxt->name##Tab[ctxt->name##Nr] = value; \
137 ctxt->name = value; \
138 return(ctxt->name##Nr++); \
139} \
140scope type name##Pop(xmlParserCtxtPtr ctxt) { \
141 type ret; \
142 if (ctxt->name##Nr <= 0) return(0); \
143 ctxt->name##Nr--; \
144 if (ctxt->name##Nr > 0) \
145 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
146 else \
147 ctxt->name = NULL; \
148 ret = ctxt->name##Tab[ctxt->name##Nr]; \
149 ctxt->name##Tab[ctxt->name##Nr] = 0; \
150 return(ret); \
151} \
152
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000153/**
154 * inputPop:
155 * @ctxt: an XML parser context
156 *
157 * Pops the top parser input from the input stack
158 *
159 * Returns the input just removed
160 */
161/**
162 * inputPush:
163 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000164 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000165 *
166 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000167 *
168 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000169 */
170/**
171 * namePop:
172 * @ctxt: an XML parser context
173 *
174 * Pops the top element name from the name stack
175 *
176 * Returns the name just removed
177 */
178/**
179 * namePush:
180 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000181 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000182 *
183 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000184 *
185 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000186 */
187/**
188 * nodePop:
189 * @ctxt: an XML parser context
190 *
191 * Pops the top element node from the node stack
192 *
193 * Returns the node just removed
194 */
195/**
196 * nodePush:
197 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000198 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000199 *
200 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000201 *
202 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000203 */
Owen Taylor3473f882001-02-23 17:55:21 +0000204/*
205 * Those macros actually generate the functions
206 */
207PUSH_AND_POP(extern, xmlParserInputPtr, input)
208PUSH_AND_POP(extern, xmlNodePtr, node)
209PUSH_AND_POP(extern, xmlChar*, name)
210
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000211static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000212 if (ctxt->spaceNr >= ctxt->spaceMax) {
213 ctxt->spaceMax *= 2;
214 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
215 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
216 if (ctxt->spaceTab == NULL) {
217 xmlGenericError(xmlGenericErrorContext,
218 "realloc failed !\n");
219 return(0);
220 }
221 }
222 ctxt->spaceTab[ctxt->spaceNr] = val;
223 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
224 return(ctxt->spaceNr++);
225}
226
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000227static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000228 int ret;
229 if (ctxt->spaceNr <= 0) return(0);
230 ctxt->spaceNr--;
231 if (ctxt->spaceNr > 0)
232 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
233 else
234 ctxt->space = NULL;
235 ret = ctxt->spaceTab[ctxt->spaceNr];
236 ctxt->spaceTab[ctxt->spaceNr] = -1;
237 return(ret);
238}
239
240/*
241 * Macros for accessing the content. Those should be used only by the parser,
242 * and not exported.
243 *
244 * Dirty macros, i.e. one often need to make assumption on the context to
245 * use them
246 *
247 * CUR_PTR return the current pointer to the xmlChar to be parsed.
248 * To be used with extreme caution since operations consuming
249 * characters may move the input buffer to a different location !
250 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
251 * This should be used internally by the parser
252 * only to compare to ASCII values otherwise it would break when
253 * running with UTF-8 encoding.
254 * RAW same as CUR but in the input buffer, bypass any token
255 * extraction that may have been done
256 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
257 * to compare on ASCII based substring.
258 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
259 * strings within the parser.
260 *
261 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
262 *
263 * NEXT Skip to the next character, this does the proper decoding
264 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000265 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000266 * CUR_CHAR(l) returns the current unicode character (int), set l
267 * to the number of xmlChars used for the encoding [0-5].
268 * CUR_SCHAR same but operate on a string instead of the context
269 * COPY_BUF copy the current unicode char to the target buffer, increment
270 * the index
271 * GROW, SHRINK handling of input buffers
272 */
273
Daniel Veillardfdc91562002-07-01 21:52:03 +0000274#define RAW (*ctxt->input->cur)
275#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000276#define NXT(val) ctxt->input->cur[(val)]
277#define CUR_PTR ctxt->input->cur
278
279#define SKIP(val) do { \
280 ctxt->nbChars += (val),ctxt->input->cur += (val); \
281 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000282 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000283 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
284 xmlPopInput(ctxt); \
285 } while (0)
286
Daniel Veillard46de64e2002-05-29 08:21:33 +0000287#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
288 xmlSHRINK (ctxt);
289
290static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
291 xmlParserInputShrink(ctxt->input);
292 if ((*ctxt->input->cur == 0) &&
293 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
294 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000295 }
Owen Taylor3473f882001-02-23 17:55:21 +0000296
Daniel Veillard46de64e2002-05-29 08:21:33 +0000297#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
298 xmlGROW (ctxt);
299
300static void xmlGROW (xmlParserCtxtPtr ctxt) {
301 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
302 if ((*ctxt->input->cur == 0) &&
303 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
304 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000305 }
Owen Taylor3473f882001-02-23 17:55:21 +0000306
307#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
308
309#define NEXT xmlNextChar(ctxt)
310
Daniel Veillard21a0f912001-02-25 19:54:14 +0000311#define NEXT1 { \
312 ctxt->input->cur++; \
313 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000314 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000315 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
316 }
317
Owen Taylor3473f882001-02-23 17:55:21 +0000318#define NEXTL(l) do { \
319 if (*(ctxt->input->cur) == '\n') { \
320 ctxt->input->line++; ctxt->input->col = 1; \
321 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000322 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000323 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000324 } while (0)
325
326#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
327#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
328
329#define COPY_BUF(l,b,i,v) \
330 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000331 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000332
333/**
334 * xmlSkipBlankChars:
335 * @ctxt: the XML parser context
336 *
337 * skip all blanks character found at that point in the input streams.
338 * It pops up finished entities in the process if allowable at that point.
339 *
340 * Returns the number of space chars skipped
341 */
342
343int
344xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000345 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000346
347 /*
348 * It's Okay to use CUR/NEXT here since all the blanks are on
349 * the ASCII range.
350 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000351 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
352 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000353 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000354 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000355 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000356 cur = ctxt->input->cur;
357 while (IS_BLANK(*cur)) {
358 if (*cur == '\n') {
359 ctxt->input->line++; ctxt->input->col = 1;
360 }
361 cur++;
362 res++;
363 if (*cur == 0) {
364 ctxt->input->cur = cur;
365 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
366 cur = ctxt->input->cur;
367 }
368 }
369 ctxt->input->cur = cur;
370 } else {
371 int cur;
372 do {
373 cur = CUR;
374 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
375 NEXT;
376 cur = CUR;
377 res++;
378 }
379 while ((cur == 0) && (ctxt->inputNr > 1) &&
380 (ctxt->instate != XML_PARSER_COMMENT)) {
381 xmlPopInput(ctxt);
382 cur = CUR;
383 }
384 /*
385 * Need to handle support of entities branching here
386 */
387 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
388 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
389 }
Owen Taylor3473f882001-02-23 17:55:21 +0000390 return(res);
391}
392
393/************************************************************************
394 * *
395 * Commodity functions to handle entities *
396 * *
397 ************************************************************************/
398
399/**
400 * xmlPopInput:
401 * @ctxt: an XML parser context
402 *
403 * xmlPopInput: the current input pointed by ctxt->input came to an end
404 * pop it and return the next char.
405 *
406 * Returns the current xmlChar in the parser context
407 */
408xmlChar
409xmlPopInput(xmlParserCtxtPtr ctxt) {
410 if (ctxt->inputNr == 1) return(0); /* End of main Input */
411 if (xmlParserDebugEntities)
412 xmlGenericError(xmlGenericErrorContext,
413 "Popping input %d\n", ctxt->inputNr);
414 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000415 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000416 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
417 return(xmlPopInput(ctxt));
418 return(CUR);
419}
420
421/**
422 * xmlPushInput:
423 * @ctxt: an XML parser context
424 * @input: an XML parser input fragment (entity, XML fragment ...).
425 *
426 * xmlPushInput: switch to a new input stream which is stacked on top
427 * of the previous one(s).
428 */
429void
430xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
431 if (input == NULL) return;
432
433 if (xmlParserDebugEntities) {
434 if ((ctxt->input != NULL) && (ctxt->input->filename))
435 xmlGenericError(xmlGenericErrorContext,
436 "%s(%d): ", ctxt->input->filename,
437 ctxt->input->line);
438 xmlGenericError(xmlGenericErrorContext,
439 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
440 }
441 inputPush(ctxt, input);
442 GROW;
443}
444
445/**
446 * xmlParseCharRef:
447 * @ctxt: an XML parser context
448 *
449 * parse Reference declarations
450 *
451 * [66] CharRef ::= '&#' [0-9]+ ';' |
452 * '&#x' [0-9a-fA-F]+ ';'
453 *
454 * [ WFC: Legal Character ]
455 * Characters referred to using character references must match the
456 * production for Char.
457 *
458 * Returns the value parsed (as an int), 0 in case of error
459 */
460int
461xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000462 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000463 int count = 0;
464
Owen Taylor3473f882001-02-23 17:55:21 +0000465 /*
466 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
467 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000468 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000469 (NXT(2) == 'x')) {
470 SKIP(3);
471 GROW;
472 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000473 if (count++ > 20) {
474 count = 0;
475 GROW;
476 }
477 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000478 val = val * 16 + (CUR - '0');
479 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
480 val = val * 16 + (CUR - 'a') + 10;
481 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
482 val = val * 16 + (CUR - 'A') + 10;
483 else {
484 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
486 ctxt->sax->error(ctxt->userData,
487 "xmlParseCharRef: invalid hexadecimal value\n");
488 ctxt->wellFormed = 0;
489 ctxt->disableSAX = 1;
490 val = 0;
491 break;
492 }
493 NEXT;
494 count++;
495 }
496 if (RAW == ';') {
497 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
498 ctxt->nbChars ++;
499 ctxt->input->cur++;
500 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000501 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000502 SKIP(2);
503 GROW;
504 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000505 if (count++ > 20) {
506 count = 0;
507 GROW;
508 }
509 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000510 val = val * 10 + (CUR - '0');
511 else {
512 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
514 ctxt->sax->error(ctxt->userData,
515 "xmlParseCharRef: invalid decimal value\n");
516 ctxt->wellFormed = 0;
517 ctxt->disableSAX = 1;
518 val = 0;
519 break;
520 }
521 NEXT;
522 count++;
523 }
524 if (RAW == ';') {
525 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
526 ctxt->nbChars ++;
527 ctxt->input->cur++;
528 }
529 } else {
530 ctxt->errNo = XML_ERR_INVALID_CHARREF;
531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
532 ctxt->sax->error(ctxt->userData,
533 "xmlParseCharRef: invalid value\n");
534 ctxt->wellFormed = 0;
535 ctxt->disableSAX = 1;
536 }
537
538 /*
539 * [ WFC: Legal Character ]
540 * Characters referred to using character references must match the
541 * production for Char.
542 */
543 if (IS_CHAR(val)) {
544 return(val);
545 } else {
546 ctxt->errNo = XML_ERR_INVALID_CHAR;
547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000548 ctxt->sax->error(ctxt->userData,
549 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000550 val);
551 ctxt->wellFormed = 0;
552 ctxt->disableSAX = 1;
553 }
554 return(0);
555}
556
557/**
558 * xmlParseStringCharRef:
559 * @ctxt: an XML parser context
560 * @str: a pointer to an index in the string
561 *
562 * parse Reference declarations, variant parsing from a string rather
563 * than an an input flow.
564 *
565 * [66] CharRef ::= '&#' [0-9]+ ';' |
566 * '&#x' [0-9a-fA-F]+ ';'
567 *
568 * [ WFC: Legal Character ]
569 * Characters referred to using character references must match the
570 * production for Char.
571 *
572 * Returns the value parsed (as an int), 0 in case of error, str will be
573 * updated to the current value of the index
574 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000575static int
Owen Taylor3473f882001-02-23 17:55:21 +0000576xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
577 const xmlChar *ptr;
578 xmlChar cur;
579 int val = 0;
580
581 if ((str == NULL) || (*str == NULL)) return(0);
582 ptr = *str;
583 cur = *ptr;
584 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
585 ptr += 3;
586 cur = *ptr;
587 while (cur != ';') { /* Non input consuming loop */
588 if ((cur >= '0') && (cur <= '9'))
589 val = val * 16 + (cur - '0');
590 else if ((cur >= 'a') && (cur <= 'f'))
591 val = val * 16 + (cur - 'a') + 10;
592 else if ((cur >= 'A') && (cur <= 'F'))
593 val = val * 16 + (cur - 'A') + 10;
594 else {
595 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
597 ctxt->sax->error(ctxt->userData,
598 "xmlParseStringCharRef: invalid hexadecimal value\n");
599 ctxt->wellFormed = 0;
600 ctxt->disableSAX = 1;
601 val = 0;
602 break;
603 }
604 ptr++;
605 cur = *ptr;
606 }
607 if (cur == ';')
608 ptr++;
609 } else if ((cur == '&') && (ptr[1] == '#')){
610 ptr += 2;
611 cur = *ptr;
612 while (cur != ';') { /* Non input consuming loops */
613 if ((cur >= '0') && (cur <= '9'))
614 val = val * 10 + (cur - '0');
615 else {
616 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
618 ctxt->sax->error(ctxt->userData,
619 "xmlParseStringCharRef: invalid decimal value\n");
620 ctxt->wellFormed = 0;
621 ctxt->disableSAX = 1;
622 val = 0;
623 break;
624 }
625 ptr++;
626 cur = *ptr;
627 }
628 if (cur == ';')
629 ptr++;
630 } else {
631 ctxt->errNo = XML_ERR_INVALID_CHARREF;
632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
633 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000634 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000635 ctxt->wellFormed = 0;
636 ctxt->disableSAX = 1;
637 return(0);
638 }
639 *str = ptr;
640
641 /*
642 * [ WFC: Legal Character ]
643 * Characters referred to using character references must match the
644 * production for Char.
645 */
646 if (IS_CHAR(val)) {
647 return(val);
648 } else {
649 ctxt->errNo = XML_ERR_INVALID_CHAR;
650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
651 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000652 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000653 ctxt->wellFormed = 0;
654 ctxt->disableSAX = 1;
655 }
656 return(0);
657}
658
659/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000660 * xmlNewBlanksWrapperInputStream:
661 * @ctxt: an XML parser context
662 * @entity: an Entity pointer
663 *
664 * Create a new input stream for wrapping
665 * blanks around a PEReference
666 *
667 * Returns the new input stream or NULL
668 */
669
670static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
671
Daniel Veillardf4862f02002-09-10 11:13:43 +0000672static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000673xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
674 xmlParserInputPtr input;
675 xmlChar *buffer;
676 size_t length;
677 if (entity == NULL) {
678 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
680 ctxt->sax->error(ctxt->userData,
681 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
682 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
683 return(NULL);
684 }
685 if (xmlParserDebugEntities)
686 xmlGenericError(xmlGenericErrorContext,
687 "new blanks wrapper for entity: %s\n", entity->name);
688 input = xmlNewInputStream(ctxt);
689 if (input == NULL) {
690 return(NULL);
691 }
692 length = xmlStrlen(entity->name) + 5;
693 buffer = xmlMalloc(length);
694 if (buffer == NULL) {
695 return(NULL);
696 }
697 buffer [0] = ' ';
698 buffer [1] = '%';
699 buffer [length-3] = ';';
700 buffer [length-2] = ' ';
701 buffer [length-1] = 0;
702 memcpy(buffer + 2, entity->name, length - 5);
703 input->free = deallocblankswrapper;
704 input->base = buffer;
705 input->cur = buffer;
706 input->length = length;
707 input->end = &buffer[length];
708 return(input);
709}
710
711/**
Owen Taylor3473f882001-02-23 17:55:21 +0000712 * xmlParserHandlePEReference:
713 * @ctxt: the parser context
714 *
715 * [69] PEReference ::= '%' Name ';'
716 *
717 * [ WFC: No Recursion ]
718 * A parsed entity must not contain a recursive
719 * reference to itself, either directly or indirectly.
720 *
721 * [ WFC: Entity Declared ]
722 * In a document without any DTD, a document with only an internal DTD
723 * subset which contains no parameter entity references, or a document
724 * with "standalone='yes'", ... ... The declaration of a parameter
725 * entity must precede any reference to it...
726 *
727 * [ VC: Entity Declared ]
728 * In a document with an external subset or external parameter entities
729 * with "standalone='no'", ... ... The declaration of a parameter entity
730 * must precede any reference to it...
731 *
732 * [ WFC: In DTD ]
733 * Parameter-entity references may only appear in the DTD.
734 * NOTE: misleading but this is handled.
735 *
736 * A PEReference may have been detected in the current input stream
737 * the handling is done accordingly to
738 * http://www.w3.org/TR/REC-xml#entproc
739 * i.e.
740 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000741 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000742 */
743void
744xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
745 xmlChar *name;
746 xmlEntityPtr entity = NULL;
747 xmlParserInputPtr input;
748
Owen Taylor3473f882001-02-23 17:55:21 +0000749 if (RAW != '%') return;
750 switch(ctxt->instate) {
751 case XML_PARSER_CDATA_SECTION:
752 return;
753 case XML_PARSER_COMMENT:
754 return;
755 case XML_PARSER_START_TAG:
756 return;
757 case XML_PARSER_END_TAG:
758 return;
759 case XML_PARSER_EOF:
760 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
762 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
763 ctxt->wellFormed = 0;
764 ctxt->disableSAX = 1;
765 return;
766 case XML_PARSER_PROLOG:
767 case XML_PARSER_START:
768 case XML_PARSER_MISC:
769 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
771 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
772 ctxt->wellFormed = 0;
773 ctxt->disableSAX = 1;
774 return;
775 case XML_PARSER_ENTITY_DECL:
776 case XML_PARSER_CONTENT:
777 case XML_PARSER_ATTRIBUTE_VALUE:
778 case XML_PARSER_PI:
779 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000780 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000781 /* we just ignore it there */
782 return;
783 case XML_PARSER_EPILOG:
784 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
786 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
787 ctxt->wellFormed = 0;
788 ctxt->disableSAX = 1;
789 return;
790 case XML_PARSER_ENTITY_VALUE:
791 /*
792 * NOTE: in the case of entity values, we don't do the
793 * substitution here since we need the literal
794 * entity value to be able to save the internal
795 * subset of the document.
796 * This will be handled by xmlStringDecodeEntities
797 */
798 return;
799 case XML_PARSER_DTD:
800 /*
801 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
802 * In the internal DTD subset, parameter-entity references
803 * can occur only where markup declarations can occur, not
804 * within markup declarations.
805 * In that case this is handled in xmlParseMarkupDecl
806 */
807 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
808 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000809 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
810 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000811 break;
812 case XML_PARSER_IGNORE:
813 return;
814 }
815
816 NEXT;
817 name = xmlParseName(ctxt);
818 if (xmlParserDebugEntities)
819 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000820 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000821 if (name == NULL) {
822 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000824 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000825 ctxt->wellFormed = 0;
826 ctxt->disableSAX = 1;
827 } else {
828 if (RAW == ';') {
829 NEXT;
830 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
831 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
832 if (entity == NULL) {
833
834 /*
835 * [ WFC: Entity Declared ]
836 * In a document without any DTD, a document with only an
837 * internal DTD subset which contains no parameter entity
838 * references, or a document with "standalone='yes'", ...
839 * ... The declaration of a parameter entity must precede
840 * any reference to it...
841 */
842 if ((ctxt->standalone == 1) ||
843 ((ctxt->hasExternalSubset == 0) &&
844 (ctxt->hasPErefs == 0))) {
845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
846 ctxt->sax->error(ctxt->userData,
847 "PEReference: %%%s; not found\n", name);
848 ctxt->wellFormed = 0;
849 ctxt->disableSAX = 1;
850 } else {
851 /*
852 * [ VC: Entity Declared ]
853 * In a document with an external subset or external
854 * parameter entities with "standalone='no'", ...
855 * ... The declaration of a parameter entity must precede
856 * any reference to it...
857 */
858 if ((!ctxt->disableSAX) &&
859 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
860 ctxt->vctxt.error(ctxt->vctxt.userData,
861 "PEReference: %%%s; not found\n", name);
862 } else if ((!ctxt->disableSAX) &&
863 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
864 ctxt->sax->warning(ctxt->userData,
865 "PEReference: %%%s; not found\n", name);
866 ctxt->valid = 0;
867 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000868 } else if (ctxt->input->free != deallocblankswrapper) {
869 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
870 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000871 } else {
872 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
873 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000874 xmlChar start[4];
875 xmlCharEncoding enc;
876
Owen Taylor3473f882001-02-23 17:55:21 +0000877 /*
878 * handle the extra spaces added before and after
879 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000880 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000881 */
882 input = xmlNewEntityInputStream(ctxt, entity);
883 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000884
885 /*
886 * Get the 4 first bytes and decode the charset
887 * if enc != XML_CHAR_ENCODING_NONE
888 * plug some encoding conversion routines.
889 */
890 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000891 if (entity->length >= 4) {
892 start[0] = RAW;
893 start[1] = NXT(1);
894 start[2] = NXT(2);
895 start[3] = NXT(3);
896 enc = xmlDetectCharEncoding(start, 4);
897 if (enc != XML_CHAR_ENCODING_NONE) {
898 xmlSwitchEncoding(ctxt, enc);
899 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000900 }
901
Owen Taylor3473f882001-02-23 17:55:21 +0000902 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
903 (RAW == '<') && (NXT(1) == '?') &&
904 (NXT(2) == 'x') && (NXT(3) == 'm') &&
905 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
906 xmlParseTextDecl(ctxt);
907 }
Owen Taylor3473f882001-02-23 17:55:21 +0000908 } else {
909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
910 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000911 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000912 name);
913 ctxt->wellFormed = 0;
914 ctxt->disableSAX = 1;
915 }
916 }
917 } else {
918 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
920 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000921 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000922 ctxt->wellFormed = 0;
923 ctxt->disableSAX = 1;
924 }
925 xmlFree(name);
926 }
927}
928
929/*
930 * Macro used to grow the current buffer.
931 */
932#define growBuffer(buffer) { \
933 buffer##_size *= 2; \
934 buffer = (xmlChar *) \
935 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
936 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000937 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +0000938 return(NULL); \
939 } \
940}
941
942/**
943 * xmlStringDecodeEntities:
944 * @ctxt: the parser context
945 * @str: the input string
946 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
947 * @end: an end marker xmlChar, 0 if none
948 * @end2: an end marker xmlChar, 0 if none
949 * @end3: an end marker xmlChar, 0 if none
950 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000951 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000952 *
953 * [67] Reference ::= EntityRef | CharRef
954 *
955 * [69] PEReference ::= '%' Name ';'
956 *
957 * Returns A newly allocated string with the substitution done. The caller
958 * must deallocate it !
959 */
960xmlChar *
961xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
962 xmlChar end, xmlChar end2, xmlChar end3) {
963 xmlChar *buffer = NULL;
964 int buffer_size = 0;
965
966 xmlChar *current = NULL;
967 xmlEntityPtr ent;
968 int c,l;
969 int nbchars = 0;
970
971 if (str == NULL)
972 return(NULL);
973
974 if (ctxt->depth > 40) {
975 ctxt->errNo = XML_ERR_ENTITY_LOOP;
976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
977 ctxt->sax->error(ctxt->userData,
978 "Detected entity reference loop\n");
979 ctxt->wellFormed = 0;
980 ctxt->disableSAX = 1;
981 return(NULL);
982 }
983
984 /*
985 * allocate a translation buffer.
986 */
987 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
988 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
989 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000990 xmlGenericError(xmlGenericErrorContext,
991 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000992 return(NULL);
993 }
994
995 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000996 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000997 * we are operating on already parsed values.
998 */
999 c = CUR_SCHAR(str, l);
1000 while ((c != 0) && (c != end) && /* non input consuming loop */
1001 (c != end2) && (c != end3)) {
1002
1003 if (c == 0) break;
1004 if ((c == '&') && (str[1] == '#')) {
1005 int val = xmlParseStringCharRef(ctxt, &str);
1006 if (val != 0) {
1007 COPY_BUF(0,buffer,nbchars,val);
1008 }
1009 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1010 if (xmlParserDebugEntities)
1011 xmlGenericError(xmlGenericErrorContext,
1012 "String decoding Entity Reference: %.30s\n",
1013 str);
1014 ent = xmlParseStringEntityRef(ctxt, &str);
1015 if ((ent != NULL) &&
1016 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1017 if (ent->content != NULL) {
1018 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1019 } else {
1020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1021 ctxt->sax->error(ctxt->userData,
1022 "internal error entity has no content\n");
1023 }
1024 } else if ((ent != NULL) && (ent->content != NULL)) {
1025 xmlChar *rep;
1026
1027 ctxt->depth++;
1028 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1029 0, 0, 0);
1030 ctxt->depth--;
1031 if (rep != NULL) {
1032 current = rep;
1033 while (*current != 0) { /* non input consuming loop */
1034 buffer[nbchars++] = *current++;
1035 if (nbchars >
1036 buffer_size - XML_PARSER_BUFFER_SIZE) {
1037 growBuffer(buffer);
1038 }
1039 }
1040 xmlFree(rep);
1041 }
1042 } else if (ent != NULL) {
1043 int i = xmlStrlen(ent->name);
1044 const xmlChar *cur = ent->name;
1045
1046 buffer[nbchars++] = '&';
1047 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1048 growBuffer(buffer);
1049 }
1050 for (;i > 0;i--)
1051 buffer[nbchars++] = *cur++;
1052 buffer[nbchars++] = ';';
1053 }
1054 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1055 if (xmlParserDebugEntities)
1056 xmlGenericError(xmlGenericErrorContext,
1057 "String decoding PE Reference: %.30s\n", str);
1058 ent = xmlParseStringPEReference(ctxt, &str);
1059 if (ent != NULL) {
1060 xmlChar *rep;
1061
1062 ctxt->depth++;
1063 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1064 0, 0, 0);
1065 ctxt->depth--;
1066 if (rep != NULL) {
1067 current = rep;
1068 while (*current != 0) { /* non input consuming loop */
1069 buffer[nbchars++] = *current++;
1070 if (nbchars >
1071 buffer_size - XML_PARSER_BUFFER_SIZE) {
1072 growBuffer(buffer);
1073 }
1074 }
1075 xmlFree(rep);
1076 }
1077 }
1078 } else {
1079 COPY_BUF(l,buffer,nbchars,c);
1080 str += l;
1081 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1082 growBuffer(buffer);
1083 }
1084 }
1085 c = CUR_SCHAR(str, l);
1086 }
1087 buffer[nbchars++] = 0;
1088 return(buffer);
1089}
1090
1091
1092/************************************************************************
1093 * *
1094 * Commodity functions to handle xmlChars *
1095 * *
1096 ************************************************************************/
1097
1098/**
1099 * xmlStrndup:
1100 * @cur: the input xmlChar *
1101 * @len: the len of @cur
1102 *
1103 * a strndup for array of xmlChar's
1104 *
1105 * Returns a new xmlChar * or NULL
1106 */
1107xmlChar *
1108xmlStrndup(const xmlChar *cur, int len) {
1109 xmlChar *ret;
1110
1111 if ((cur == NULL) || (len < 0)) return(NULL);
1112 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1113 if (ret == NULL) {
1114 xmlGenericError(xmlGenericErrorContext,
1115 "malloc of %ld byte failed\n",
1116 (len + 1) * (long)sizeof(xmlChar));
1117 return(NULL);
1118 }
1119 memcpy(ret, cur, len * sizeof(xmlChar));
1120 ret[len] = 0;
1121 return(ret);
1122}
1123
1124/**
1125 * xmlStrdup:
1126 * @cur: the input xmlChar *
1127 *
1128 * a strdup for array of xmlChar's. Since they are supposed to be
1129 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1130 * a termination mark of '0'.
1131 *
1132 * Returns a new xmlChar * or NULL
1133 */
1134xmlChar *
1135xmlStrdup(const xmlChar *cur) {
1136 const xmlChar *p = cur;
1137
1138 if (cur == NULL) return(NULL);
1139 while (*p != 0) p++; /* non input consuming */
1140 return(xmlStrndup(cur, p - cur));
1141}
1142
1143/**
1144 * xmlCharStrndup:
1145 * @cur: the input char *
1146 * @len: the len of @cur
1147 *
1148 * a strndup for char's to xmlChar's
1149 *
1150 * Returns a new xmlChar * or NULL
1151 */
1152
1153xmlChar *
1154xmlCharStrndup(const char *cur, int len) {
1155 int i;
1156 xmlChar *ret;
1157
1158 if ((cur == NULL) || (len < 0)) return(NULL);
1159 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1160 if (ret == NULL) {
1161 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1162 (len + 1) * (long)sizeof(xmlChar));
1163 return(NULL);
1164 }
1165 for (i = 0;i < len;i++)
1166 ret[i] = (xmlChar) cur[i];
1167 ret[len] = 0;
1168 return(ret);
1169}
1170
1171/**
1172 * xmlCharStrdup:
1173 * @cur: the input char *
1174 * @len: the len of @cur
1175 *
1176 * a strdup for char's to xmlChar's
1177 *
1178 * Returns a new xmlChar * or NULL
1179 */
1180
1181xmlChar *
1182xmlCharStrdup(const char *cur) {
1183 const char *p = cur;
1184
1185 if (cur == NULL) return(NULL);
1186 while (*p != '\0') p++; /* non input consuming */
1187 return(xmlCharStrndup(cur, p - cur));
1188}
1189
1190/**
1191 * xmlStrcmp:
1192 * @str1: the first xmlChar *
1193 * @str2: the second xmlChar *
1194 *
1195 * a strcmp for xmlChar's
1196 *
1197 * Returns the integer result of the comparison
1198 */
1199
1200int
1201xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1202 register int tmp;
1203
1204 if (str1 == str2) return(0);
1205 if (str1 == NULL) return(-1);
1206 if (str2 == NULL) return(1);
1207 do {
1208 tmp = *str1++ - *str2;
1209 if (tmp != 0) return(tmp);
1210 } while (*str2++ != 0);
1211 return 0;
1212}
1213
1214/**
1215 * xmlStrEqual:
1216 * @str1: the first xmlChar *
1217 * @str2: the second xmlChar *
1218 *
1219 * Check if both string are equal of have same content
1220 * Should be a bit more readable and faster than xmlStrEqual()
1221 *
1222 * Returns 1 if they are equal, 0 if they are different
1223 */
1224
1225int
1226xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1227 if (str1 == str2) return(1);
1228 if (str1 == NULL) return(0);
1229 if (str2 == NULL) return(0);
1230 do {
1231 if (*str1++ != *str2) return(0);
1232 } while (*str2++);
1233 return(1);
1234}
1235
1236/**
1237 * xmlStrncmp:
1238 * @str1: the first xmlChar *
1239 * @str2: the second xmlChar *
1240 * @len: the max comparison length
1241 *
1242 * a strncmp for xmlChar's
1243 *
1244 * Returns the integer result of the comparison
1245 */
1246
1247int
1248xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1249 register int tmp;
1250
1251 if (len <= 0) return(0);
1252 if (str1 == str2) return(0);
1253 if (str1 == NULL) return(-1);
1254 if (str2 == NULL) return(1);
1255 do {
1256 tmp = *str1++ - *str2;
1257 if (tmp != 0 || --len == 0) return(tmp);
1258 } while (*str2++ != 0);
1259 return 0;
1260}
1261
Daniel Veillardb44025c2001-10-11 22:55:55 +00001262static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001263 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1264 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1265 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1266 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1267 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1268 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1269 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1270 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1271 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1272 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1273 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1274 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1275 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1276 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1277 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1278 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1279 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1280 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1281 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1282 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1283 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1284 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1285 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1286 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1287 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1288 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1289 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1290 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1291 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1292 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1293 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1294 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1295};
1296
1297/**
1298 * xmlStrcasecmp:
1299 * @str1: the first xmlChar *
1300 * @str2: the second xmlChar *
1301 *
1302 * a strcasecmp for xmlChar's
1303 *
1304 * Returns the integer result of the comparison
1305 */
1306
1307int
1308xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1309 register int tmp;
1310
1311 if (str1 == str2) return(0);
1312 if (str1 == NULL) return(-1);
1313 if (str2 == NULL) return(1);
1314 do {
1315 tmp = casemap[*str1++] - casemap[*str2];
1316 if (tmp != 0) return(tmp);
1317 } while (*str2++ != 0);
1318 return 0;
1319}
1320
1321/**
1322 * xmlStrncasecmp:
1323 * @str1: the first xmlChar *
1324 * @str2: the second xmlChar *
1325 * @len: the max comparison length
1326 *
1327 * a strncasecmp for xmlChar's
1328 *
1329 * Returns the integer result of the comparison
1330 */
1331
1332int
1333xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1334 register int tmp;
1335
1336 if (len <= 0) return(0);
1337 if (str1 == str2) return(0);
1338 if (str1 == NULL) return(-1);
1339 if (str2 == NULL) return(1);
1340 do {
1341 tmp = casemap[*str1++] - casemap[*str2];
1342 if (tmp != 0 || --len == 0) return(tmp);
1343 } while (*str2++ != 0);
1344 return 0;
1345}
1346
1347/**
1348 * xmlStrchr:
1349 * @str: the xmlChar * array
1350 * @val: the xmlChar to search
1351 *
1352 * a strchr for xmlChar's
1353 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001354 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001355 */
1356
1357const xmlChar *
1358xmlStrchr(const xmlChar *str, xmlChar val) {
1359 if (str == NULL) return(NULL);
1360 while (*str != 0) { /* non input consuming */
1361 if (*str == val) return((xmlChar *) str);
1362 str++;
1363 }
1364 return(NULL);
1365}
1366
1367/**
1368 * xmlStrstr:
1369 * @str: the xmlChar * array (haystack)
1370 * @val: the xmlChar to search (needle)
1371 *
1372 * a strstr for xmlChar's
1373 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001374 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001375 */
1376
1377const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001378xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001379 int n;
1380
1381 if (str == NULL) return(NULL);
1382 if (val == NULL) return(NULL);
1383 n = xmlStrlen(val);
1384
1385 if (n == 0) return(str);
1386 while (*str != 0) { /* non input consuming */
1387 if (*str == *val) {
1388 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1389 }
1390 str++;
1391 }
1392 return(NULL);
1393}
1394
1395/**
1396 * xmlStrcasestr:
1397 * @str: the xmlChar * array (haystack)
1398 * @val: the xmlChar to search (needle)
1399 *
1400 * a case-ignoring strstr for xmlChar's
1401 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001402 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001403 */
1404
1405const xmlChar *
1406xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1407 int n;
1408
1409 if (str == NULL) return(NULL);
1410 if (val == NULL) return(NULL);
1411 n = xmlStrlen(val);
1412
1413 if (n == 0) return(str);
1414 while (*str != 0) { /* non input consuming */
1415 if (casemap[*str] == casemap[*val])
1416 if (!xmlStrncasecmp(str, val, n)) return(str);
1417 str++;
1418 }
1419 return(NULL);
1420}
1421
1422/**
1423 * xmlStrsub:
1424 * @str: the xmlChar * array (haystack)
1425 * @start: the index of the first char (zero based)
1426 * @len: the length of the substring
1427 *
1428 * Extract a substring of a given string
1429 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001430 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001431 */
1432
1433xmlChar *
1434xmlStrsub(const xmlChar *str, int start, int len) {
1435 int i;
1436
1437 if (str == NULL) return(NULL);
1438 if (start < 0) return(NULL);
1439 if (len < 0) return(NULL);
1440
1441 for (i = 0;i < start;i++) {
1442 if (*str == 0) return(NULL);
1443 str++;
1444 }
1445 if (*str == 0) return(NULL);
1446 return(xmlStrndup(str, len));
1447}
1448
1449/**
1450 * xmlStrlen:
1451 * @str: the xmlChar * array
1452 *
1453 * length of a xmlChar's string
1454 *
1455 * Returns the number of xmlChar contained in the ARRAY.
1456 */
1457
1458int
1459xmlStrlen(const xmlChar *str) {
1460 int len = 0;
1461
1462 if (str == NULL) return(0);
1463 while (*str != 0) { /* non input consuming */
1464 str++;
1465 len++;
1466 }
1467 return(len);
1468}
1469
1470/**
1471 * xmlStrncat:
1472 * @cur: the original xmlChar * array
1473 * @add: the xmlChar * array added
1474 * @len: the length of @add
1475 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001476 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001477 * first bytes of @add.
1478 *
1479 * Returns a new xmlChar *, the original @cur is reallocated if needed
1480 * and should not be freed
1481 */
1482
1483xmlChar *
1484xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1485 int size;
1486 xmlChar *ret;
1487
1488 if ((add == NULL) || (len == 0))
1489 return(cur);
1490 if (cur == NULL)
1491 return(xmlStrndup(add, len));
1492
1493 size = xmlStrlen(cur);
1494 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1495 if (ret == NULL) {
1496 xmlGenericError(xmlGenericErrorContext,
1497 "xmlStrncat: realloc of %ld byte failed\n",
1498 (size + len + 1) * (long)sizeof(xmlChar));
1499 return(cur);
1500 }
1501 memcpy(&ret[size], add, len * sizeof(xmlChar));
1502 ret[size + len] = 0;
1503 return(ret);
1504}
1505
1506/**
1507 * xmlStrcat:
1508 * @cur: the original xmlChar * array
1509 * @add: the xmlChar * array added
1510 *
1511 * a strcat for array of xmlChar's. Since they are supposed to be
1512 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1513 * a termination mark of '0'.
1514 *
1515 * Returns a new xmlChar * containing the concatenated string.
1516 */
1517xmlChar *
1518xmlStrcat(xmlChar *cur, const xmlChar *add) {
1519 const xmlChar *p = add;
1520
1521 if (add == NULL) return(cur);
1522 if (cur == NULL)
1523 return(xmlStrdup(add));
1524
1525 while (*p != 0) p++; /* non input consuming */
1526 return(xmlStrncat(cur, add, p - add));
1527}
1528
1529/************************************************************************
1530 * *
1531 * Commodity functions, cleanup needed ? *
1532 * *
1533 ************************************************************************/
1534
1535/**
1536 * areBlanks:
1537 * @ctxt: an XML parser context
1538 * @str: a xmlChar *
1539 * @len: the size of @str
1540 *
1541 * Is this a sequence of blank chars that one can ignore ?
1542 *
1543 * Returns 1 if ignorable 0 otherwise.
1544 */
1545
1546static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1547 int i, ret;
1548 xmlNodePtr lastChild;
1549
Daniel Veillard05c13a22001-09-09 08:38:09 +00001550 /*
1551 * Don't spend time trying to differentiate them, the same callback is
1552 * used !
1553 */
1554 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001555 return(0);
1556
Owen Taylor3473f882001-02-23 17:55:21 +00001557 /*
1558 * Check for xml:space value.
1559 */
1560 if (*(ctxt->space) == 1)
1561 return(0);
1562
1563 /*
1564 * Check that the string is made of blanks
1565 */
1566 for (i = 0;i < len;i++)
1567 if (!(IS_BLANK(str[i]))) return(0);
1568
1569 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001570 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001571 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001572 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001573 if (ctxt->myDoc != NULL) {
1574 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1575 if (ret == 0) return(1);
1576 if (ret == 1) return(0);
1577 }
1578
1579 /*
1580 * Otherwise, heuristic :-\
1581 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001582 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001583 if ((ctxt->node->children == NULL) &&
1584 (RAW == '<') && (NXT(1) == '/')) return(0);
1585
1586 lastChild = xmlGetLastChild(ctxt->node);
1587 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001588 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1589 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001590 } else if (xmlNodeIsText(lastChild))
1591 return(0);
1592 else if ((ctxt->node->children != NULL) &&
1593 (xmlNodeIsText(ctxt->node->children)))
1594 return(0);
1595 return(1);
1596}
1597
Owen Taylor3473f882001-02-23 17:55:21 +00001598/************************************************************************
1599 * *
1600 * Extra stuff for namespace support *
1601 * Relates to http://www.w3.org/TR/WD-xml-names *
1602 * *
1603 ************************************************************************/
1604
1605/**
1606 * xmlSplitQName:
1607 * @ctxt: an XML parser context
1608 * @name: an XML parser context
1609 * @prefix: a xmlChar **
1610 *
1611 * parse an UTF8 encoded XML qualified name string
1612 *
1613 * [NS 5] QName ::= (Prefix ':')? LocalPart
1614 *
1615 * [NS 6] Prefix ::= NCName
1616 *
1617 * [NS 7] LocalPart ::= NCName
1618 *
1619 * Returns the local part, and prefix is updated
1620 * to get the Prefix if any.
1621 */
1622
1623xmlChar *
1624xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1625 xmlChar buf[XML_MAX_NAMELEN + 5];
1626 xmlChar *buffer = NULL;
1627 int len = 0;
1628 int max = XML_MAX_NAMELEN;
1629 xmlChar *ret = NULL;
1630 const xmlChar *cur = name;
1631 int c;
1632
1633 *prefix = NULL;
1634
1635#ifndef XML_XML_NAMESPACE
1636 /* xml: prefix is not really a namespace */
1637 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1638 (cur[2] == 'l') && (cur[3] == ':'))
1639 return(xmlStrdup(name));
1640#endif
1641
1642 /* nasty but valid */
1643 if (cur[0] == ':')
1644 return(xmlStrdup(name));
1645
1646 c = *cur++;
1647 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1648 buf[len++] = c;
1649 c = *cur++;
1650 }
1651 if (len >= max) {
1652 /*
1653 * Okay someone managed to make a huge name, so he's ready to pay
1654 * for the processing speed.
1655 */
1656 max = len * 2;
1657
1658 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1659 if (buffer == NULL) {
1660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1661 ctxt->sax->error(ctxt->userData,
1662 "xmlSplitQName: out of memory\n");
1663 return(NULL);
1664 }
1665 memcpy(buffer, buf, len);
1666 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1667 if (len + 10 > max) {
1668 max *= 2;
1669 buffer = (xmlChar *) xmlRealloc(buffer,
1670 max * sizeof(xmlChar));
1671 if (buffer == NULL) {
1672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1673 ctxt->sax->error(ctxt->userData,
1674 "xmlSplitQName: out of memory\n");
1675 return(NULL);
1676 }
1677 }
1678 buffer[len++] = c;
1679 c = *cur++;
1680 }
1681 buffer[len] = 0;
1682 }
1683
1684 if (buffer == NULL)
1685 ret = xmlStrndup(buf, len);
1686 else {
1687 ret = buffer;
1688 buffer = NULL;
1689 max = XML_MAX_NAMELEN;
1690 }
1691
1692
1693 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001694 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001695 if (c == 0) return(ret);
1696 *prefix = ret;
1697 len = 0;
1698
Daniel Veillardbb284f42002-10-16 18:02:47 +00001699 /*
1700 * Check that the first character is proper to start
1701 * a new name
1702 */
1703 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1704 ((c >= 0x41) && (c <= 0x5A)) ||
1705 (c == '_') || (c == ':'))) {
1706 int l;
1707 int first = CUR_SCHAR(cur, l);
1708
1709 if (!IS_LETTER(first) && (first != '_')) {
1710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1711 ctxt->sax->error(ctxt->userData,
1712 "Name %s is not XML Namespace compliant\n",
1713 name);
1714 }
1715 }
1716 cur++;
1717
Owen Taylor3473f882001-02-23 17:55:21 +00001718 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1719 buf[len++] = c;
1720 c = *cur++;
1721 }
1722 if (len >= max) {
1723 /*
1724 * Okay someone managed to make a huge name, so he's ready to pay
1725 * for the processing speed.
1726 */
1727 max = len * 2;
1728
1729 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1730 if (buffer == NULL) {
1731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1732 ctxt->sax->error(ctxt->userData,
1733 "xmlSplitQName: out of memory\n");
1734 return(NULL);
1735 }
1736 memcpy(buffer, buf, len);
1737 while (c != 0) { /* tested bigname2.xml */
1738 if (len + 10 > max) {
1739 max *= 2;
1740 buffer = (xmlChar *) xmlRealloc(buffer,
1741 max * sizeof(xmlChar));
1742 if (buffer == NULL) {
1743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1744 ctxt->sax->error(ctxt->userData,
1745 "xmlSplitQName: out of memory\n");
1746 return(NULL);
1747 }
1748 }
1749 buffer[len++] = c;
1750 c = *cur++;
1751 }
1752 buffer[len] = 0;
1753 }
1754
1755 if (buffer == NULL)
1756 ret = xmlStrndup(buf, len);
1757 else {
1758 ret = buffer;
1759 }
1760 }
1761
1762 return(ret);
1763}
1764
1765/************************************************************************
1766 * *
1767 * The parser itself *
1768 * Relates to http://www.w3.org/TR/REC-xml *
1769 * *
1770 ************************************************************************/
1771
Daniel Veillard76d66f42001-05-16 21:05:17 +00001772static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001773/**
1774 * xmlParseName:
1775 * @ctxt: an XML parser context
1776 *
1777 * parse an XML name.
1778 *
1779 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1780 * CombiningChar | Extender
1781 *
1782 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1783 *
1784 * [6] Names ::= Name (S Name)*
1785 *
1786 * Returns the Name parsed or NULL
1787 */
1788
1789xmlChar *
1790xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001791 const xmlChar *in;
1792 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001793 int count = 0;
1794
1795 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001796
1797 /*
1798 * Accelerator for simple ASCII names
1799 */
1800 in = ctxt->input->cur;
1801 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1802 ((*in >= 0x41) && (*in <= 0x5A)) ||
1803 (*in == '_') || (*in == ':')) {
1804 in++;
1805 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1806 ((*in >= 0x41) && (*in <= 0x5A)) ||
1807 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001808 (*in == '_') || (*in == '-') ||
1809 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001810 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001811 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001812 count = in - ctxt->input->cur;
1813 ret = xmlStrndup(ctxt->input->cur, count);
1814 ctxt->input->cur = in;
1815 return(ret);
1816 }
1817 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001818 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001819}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001820
Daniel Veillard46de64e2002-05-29 08:21:33 +00001821/**
1822 * xmlParseNameAndCompare:
1823 * @ctxt: an XML parser context
1824 *
1825 * parse an XML name and compares for match
1826 * (specialized for endtag parsing)
1827 *
1828 *
1829 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1830 * and the name for mismatch
1831 */
1832
Daniel Veillardf4862f02002-09-10 11:13:43 +00001833static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001834xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1835 const xmlChar *cmp = other;
1836 const xmlChar *in;
1837 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001838
1839 GROW;
1840
1841 in = ctxt->input->cur;
1842 while (*in != 0 && *in == *cmp) {
1843 ++in;
1844 ++cmp;
1845 }
1846 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1847 /* success */
1848 ctxt->input->cur = in;
1849 return (xmlChar*) 1;
1850 }
1851 /* failure (or end of input buffer), check with full function */
1852 ret = xmlParseName (ctxt);
1853 if (ret != 0 && xmlStrEqual (ret, other)) {
1854 xmlFree (ret);
1855 return (xmlChar*) 1;
1856 }
1857 return ret;
1858}
1859
Daniel Veillard76d66f42001-05-16 21:05:17 +00001860static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001861xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1862 xmlChar buf[XML_MAX_NAMELEN + 5];
1863 int len = 0, l;
1864 int c;
1865 int count = 0;
1866
1867 /*
1868 * Handler for more complex cases
1869 */
1870 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001871 c = CUR_CHAR(l);
1872 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1873 (!IS_LETTER(c) && (c != '_') &&
1874 (c != ':'))) {
1875 return(NULL);
1876 }
1877
1878 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1879 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1880 (c == '.') || (c == '-') ||
1881 (c == '_') || (c == ':') ||
1882 (IS_COMBINING(c)) ||
1883 (IS_EXTENDER(c)))) {
1884 if (count++ > 100) {
1885 count = 0;
1886 GROW;
1887 }
1888 COPY_BUF(l,buf,len,c);
1889 NEXTL(l);
1890 c = CUR_CHAR(l);
1891 if (len >= XML_MAX_NAMELEN) {
1892 /*
1893 * Okay someone managed to make a huge name, so he's ready to pay
1894 * for the processing speed.
1895 */
1896 xmlChar *buffer;
1897 int max = len * 2;
1898
1899 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1900 if (buffer == NULL) {
1901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1902 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001903 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001904 return(NULL);
1905 }
1906 memcpy(buffer, buf, len);
1907 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1908 (c == '.') || (c == '-') ||
1909 (c == '_') || (c == ':') ||
1910 (IS_COMBINING(c)) ||
1911 (IS_EXTENDER(c))) {
1912 if (count++ > 100) {
1913 count = 0;
1914 GROW;
1915 }
1916 if (len + 10 > max) {
1917 max *= 2;
1918 buffer = (xmlChar *) xmlRealloc(buffer,
1919 max * sizeof(xmlChar));
1920 if (buffer == NULL) {
1921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1922 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001923 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001924 return(NULL);
1925 }
1926 }
1927 COPY_BUF(l,buffer,len,c);
1928 NEXTL(l);
1929 c = CUR_CHAR(l);
1930 }
1931 buffer[len] = 0;
1932 return(buffer);
1933 }
1934 }
1935 return(xmlStrndup(buf, len));
1936}
1937
1938/**
1939 * xmlParseStringName:
1940 * @ctxt: an XML parser context
1941 * @str: a pointer to the string pointer (IN/OUT)
1942 *
1943 * parse an XML name.
1944 *
1945 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1946 * CombiningChar | Extender
1947 *
1948 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1949 *
1950 * [6] Names ::= Name (S Name)*
1951 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001952 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001953 * is updated to the current location in the string.
1954 */
1955
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001956static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001957xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1958 xmlChar buf[XML_MAX_NAMELEN + 5];
1959 const xmlChar *cur = *str;
1960 int len = 0, l;
1961 int c;
1962
1963 c = CUR_SCHAR(cur, l);
1964 if (!IS_LETTER(c) && (c != '_') &&
1965 (c != ':')) {
1966 return(NULL);
1967 }
1968
1969 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1970 (c == '.') || (c == '-') ||
1971 (c == '_') || (c == ':') ||
1972 (IS_COMBINING(c)) ||
1973 (IS_EXTENDER(c))) {
1974 COPY_BUF(l,buf,len,c);
1975 cur += l;
1976 c = CUR_SCHAR(cur, l);
1977 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1978 /*
1979 * Okay someone managed to make a huge name, so he's ready to pay
1980 * for the processing speed.
1981 */
1982 xmlChar *buffer;
1983 int max = len * 2;
1984
1985 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1986 if (buffer == NULL) {
1987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1988 ctxt->sax->error(ctxt->userData,
1989 "xmlParseStringName: out of memory\n");
1990 return(NULL);
1991 }
1992 memcpy(buffer, buf, len);
1993 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1994 (c == '.') || (c == '-') ||
1995 (c == '_') || (c == ':') ||
1996 (IS_COMBINING(c)) ||
1997 (IS_EXTENDER(c))) {
1998 if (len + 10 > max) {
1999 max *= 2;
2000 buffer = (xmlChar *) xmlRealloc(buffer,
2001 max * sizeof(xmlChar));
2002 if (buffer == NULL) {
2003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2004 ctxt->sax->error(ctxt->userData,
2005 "xmlParseStringName: out of memory\n");
2006 return(NULL);
2007 }
2008 }
2009 COPY_BUF(l,buffer,len,c);
2010 cur += l;
2011 c = CUR_SCHAR(cur, l);
2012 }
2013 buffer[len] = 0;
2014 *str = cur;
2015 return(buffer);
2016 }
2017 }
2018 *str = cur;
2019 return(xmlStrndup(buf, len));
2020}
2021
2022/**
2023 * xmlParseNmtoken:
2024 * @ctxt: an XML parser context
2025 *
2026 * parse an XML Nmtoken.
2027 *
2028 * [7] Nmtoken ::= (NameChar)+
2029 *
2030 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2031 *
2032 * Returns the Nmtoken parsed or NULL
2033 */
2034
2035xmlChar *
2036xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2037 xmlChar buf[XML_MAX_NAMELEN + 5];
2038 int len = 0, l;
2039 int c;
2040 int count = 0;
2041
2042 GROW;
2043 c = CUR_CHAR(l);
2044
2045 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2046 (c == '.') || (c == '-') ||
2047 (c == '_') || (c == ':') ||
2048 (IS_COMBINING(c)) ||
2049 (IS_EXTENDER(c))) {
2050 if (count++ > 100) {
2051 count = 0;
2052 GROW;
2053 }
2054 COPY_BUF(l,buf,len,c);
2055 NEXTL(l);
2056 c = CUR_CHAR(l);
2057 if (len >= XML_MAX_NAMELEN) {
2058 /*
2059 * Okay someone managed to make a huge token, so he's ready to pay
2060 * for the processing speed.
2061 */
2062 xmlChar *buffer;
2063 int max = len * 2;
2064
2065 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2066 if (buffer == NULL) {
2067 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2068 ctxt->sax->error(ctxt->userData,
2069 "xmlParseNmtoken: out of memory\n");
2070 return(NULL);
2071 }
2072 memcpy(buffer, buf, len);
2073 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2074 (c == '.') || (c == '-') ||
2075 (c == '_') || (c == ':') ||
2076 (IS_COMBINING(c)) ||
2077 (IS_EXTENDER(c))) {
2078 if (count++ > 100) {
2079 count = 0;
2080 GROW;
2081 }
2082 if (len + 10 > max) {
2083 max *= 2;
2084 buffer = (xmlChar *) xmlRealloc(buffer,
2085 max * sizeof(xmlChar));
2086 if (buffer == NULL) {
2087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2088 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002089 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002090 return(NULL);
2091 }
2092 }
2093 COPY_BUF(l,buffer,len,c);
2094 NEXTL(l);
2095 c = CUR_CHAR(l);
2096 }
2097 buffer[len] = 0;
2098 return(buffer);
2099 }
2100 }
2101 if (len == 0)
2102 return(NULL);
2103 return(xmlStrndup(buf, len));
2104}
2105
2106/**
2107 * xmlParseEntityValue:
2108 * @ctxt: an XML parser context
2109 * @orig: if non-NULL store a copy of the original entity value
2110 *
2111 * parse a value for ENTITY declarations
2112 *
2113 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2114 * "'" ([^%&'] | PEReference | Reference)* "'"
2115 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002116 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002117 */
2118
2119xmlChar *
2120xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2121 xmlChar *buf = NULL;
2122 int len = 0;
2123 int size = XML_PARSER_BUFFER_SIZE;
2124 int c, l;
2125 xmlChar stop;
2126 xmlChar *ret = NULL;
2127 const xmlChar *cur = NULL;
2128 xmlParserInputPtr input;
2129
2130 if (RAW == '"') stop = '"';
2131 else if (RAW == '\'') stop = '\'';
2132 else {
2133 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2135 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2136 ctxt->wellFormed = 0;
2137 ctxt->disableSAX = 1;
2138 return(NULL);
2139 }
2140 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2141 if (buf == NULL) {
2142 xmlGenericError(xmlGenericErrorContext,
2143 "malloc of %d byte failed\n", size);
2144 return(NULL);
2145 }
2146
2147 /*
2148 * The content of the entity definition is copied in a buffer.
2149 */
2150
2151 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2152 input = ctxt->input;
2153 GROW;
2154 NEXT;
2155 c = CUR_CHAR(l);
2156 /*
2157 * NOTE: 4.4.5 Included in Literal
2158 * When a parameter entity reference appears in a literal entity
2159 * value, ... a single or double quote character in the replacement
2160 * text is always treated as a normal data character and will not
2161 * terminate the literal.
2162 * In practice it means we stop the loop only when back at parsing
2163 * the initial entity and the quote is found
2164 */
2165 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2166 (ctxt->input != input))) {
2167 if (len + 5 >= size) {
2168 size *= 2;
2169 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2170 if (buf == NULL) {
2171 xmlGenericError(xmlGenericErrorContext,
2172 "realloc of %d byte failed\n", size);
2173 return(NULL);
2174 }
2175 }
2176 COPY_BUF(l,buf,len,c);
2177 NEXTL(l);
2178 /*
2179 * Pop-up of finished entities.
2180 */
2181 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2182 xmlPopInput(ctxt);
2183
2184 GROW;
2185 c = CUR_CHAR(l);
2186 if (c == 0) {
2187 GROW;
2188 c = CUR_CHAR(l);
2189 }
2190 }
2191 buf[len] = 0;
2192
2193 /*
2194 * Raise problem w.r.t. '&' and '%' being used in non-entities
2195 * reference constructs. Note Charref will be handled in
2196 * xmlStringDecodeEntities()
2197 */
2198 cur = buf;
2199 while (*cur != 0) { /* non input consuming */
2200 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2201 xmlChar *name;
2202 xmlChar tmp = *cur;
2203
2204 cur++;
2205 name = xmlParseStringName(ctxt, &cur);
2206 if ((name == NULL) || (*cur != ';')) {
2207 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2209 ctxt->sax->error(ctxt->userData,
2210 "EntityValue: '%c' forbidden except for entities references\n",
2211 tmp);
2212 ctxt->wellFormed = 0;
2213 ctxt->disableSAX = 1;
2214 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002215 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2216 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002217 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2219 ctxt->sax->error(ctxt->userData,
2220 "EntityValue: PEReferences forbidden in internal subset\n",
2221 tmp);
2222 ctxt->wellFormed = 0;
2223 ctxt->disableSAX = 1;
2224 }
2225 if (name != NULL)
2226 xmlFree(name);
2227 }
2228 cur++;
2229 }
2230
2231 /*
2232 * Then PEReference entities are substituted.
2233 */
2234 if (c != stop) {
2235 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2237 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2238 ctxt->wellFormed = 0;
2239 ctxt->disableSAX = 1;
2240 xmlFree(buf);
2241 } else {
2242 NEXT;
2243 /*
2244 * NOTE: 4.4.7 Bypassed
2245 * When a general entity reference appears in the EntityValue in
2246 * an entity declaration, it is bypassed and left as is.
2247 * so XML_SUBSTITUTE_REF is not set here.
2248 */
2249 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2250 0, 0, 0);
2251 if (orig != NULL)
2252 *orig = buf;
2253 else
2254 xmlFree(buf);
2255 }
2256
2257 return(ret);
2258}
2259
2260/**
2261 * xmlParseAttValue:
2262 * @ctxt: an XML parser context
2263 *
2264 * parse a value for an attribute
2265 * Note: the parser won't do substitution of entities here, this
2266 * will be handled later in xmlStringGetNodeList
2267 *
2268 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2269 * "'" ([^<&'] | Reference)* "'"
2270 *
2271 * 3.3.3 Attribute-Value Normalization:
2272 * Before the value of an attribute is passed to the application or
2273 * checked for validity, the XML processor must normalize it as follows:
2274 * - a character reference is processed by appending the referenced
2275 * character to the attribute value
2276 * - an entity reference is processed by recursively processing the
2277 * replacement text of the entity
2278 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2279 * appending #x20 to the normalized value, except that only a single
2280 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2281 * parsed entity or the literal entity value of an internal parsed entity
2282 * - other characters are processed by appending them to the normalized value
2283 * If the declared value is not CDATA, then the XML processor must further
2284 * process the normalized attribute value by discarding any leading and
2285 * trailing space (#x20) characters, and by replacing sequences of space
2286 * (#x20) characters by a single space (#x20) character.
2287 * All attributes for which no declaration has been read should be treated
2288 * by a non-validating parser as if declared CDATA.
2289 *
2290 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2291 */
2292
2293xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002294xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2295
2296xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002297xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2298 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002299 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002300 xmlChar *ret = NULL;
2301 SHRINK;
2302 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002303 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002304 if (*in != '"' && *in != '\'') {
2305 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2307 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2308 ctxt->wellFormed = 0;
2309 ctxt->disableSAX = 1;
2310 return(NULL);
2311 }
2312 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2313 limit = *in;
2314 ++in;
2315
2316 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2317 *in != '&' && *in != '<'
2318 ) {
2319 ++in;
2320 }
2321 if (*in != limit) {
2322 return xmlParseAttValueComplex(ctxt);
2323 }
2324 ++in;
2325 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2326 CUR_PTR = in;
2327 return ret;
2328}
2329
2330xmlChar *
2331xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2332 xmlChar limit = 0;
2333 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002334 int len = 0;
2335 int buf_size = 0;
2336 int c, l;
2337 xmlChar *current = NULL;
2338 xmlEntityPtr ent;
2339
2340
2341 SHRINK;
2342 if (NXT(0) == '"') {
2343 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2344 limit = '"';
2345 NEXT;
2346 } else if (NXT(0) == '\'') {
2347 limit = '\'';
2348 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2349 NEXT;
2350 } else {
2351 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2353 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2354 ctxt->wellFormed = 0;
2355 ctxt->disableSAX = 1;
2356 return(NULL);
2357 }
2358
2359 /*
2360 * allocate a translation buffer.
2361 */
2362 buf_size = XML_PARSER_BUFFER_SIZE;
2363 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2364 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002365 xmlGenericError(xmlGenericErrorContext,
2366 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002367 return(NULL);
2368 }
2369
2370 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002371 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002372 */
2373 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002374 while ((NXT(0) != limit) && /* checked */
2375 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002376 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002377 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002378 if (NXT(1) == '#') {
2379 int val = xmlParseCharRef(ctxt);
2380 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002381 if (ctxt->replaceEntities) {
2382 if (len > buf_size - 10) {
2383 growBuffer(buf);
2384 }
2385 buf[len++] = '&';
2386 } else {
2387 /*
2388 * The reparsing will be done in xmlStringGetNodeList()
2389 * called by the attribute() function in SAX.c
2390 */
2391 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002392
Daniel Veillard319a7422001-09-11 09:27:09 +00002393 if (len > buf_size - 10) {
2394 growBuffer(buf);
2395 }
2396 current = &buffer[0];
2397 while (*current != 0) { /* non input consuming */
2398 buf[len++] = *current++;
2399 }
Owen Taylor3473f882001-02-23 17:55:21 +00002400 }
2401 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002402 if (len > buf_size - 10) {
2403 growBuffer(buf);
2404 }
Owen Taylor3473f882001-02-23 17:55:21 +00002405 len += xmlCopyChar(0, &buf[len], val);
2406 }
2407 } else {
2408 ent = xmlParseEntityRef(ctxt);
2409 if ((ent != NULL) &&
2410 (ctxt->replaceEntities != 0)) {
2411 xmlChar *rep;
2412
2413 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2414 rep = xmlStringDecodeEntities(ctxt, ent->content,
2415 XML_SUBSTITUTE_REF, 0, 0, 0);
2416 if (rep != NULL) {
2417 current = rep;
2418 while (*current != 0) { /* non input consuming */
2419 buf[len++] = *current++;
2420 if (len > buf_size - 10) {
2421 growBuffer(buf);
2422 }
2423 }
2424 xmlFree(rep);
2425 }
2426 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002427 if (len > buf_size - 10) {
2428 growBuffer(buf);
2429 }
Owen Taylor3473f882001-02-23 17:55:21 +00002430 if (ent->content != NULL)
2431 buf[len++] = ent->content[0];
2432 }
2433 } else if (ent != NULL) {
2434 int i = xmlStrlen(ent->name);
2435 const xmlChar *cur = ent->name;
2436
2437 /*
2438 * This may look absurd but is needed to detect
2439 * entities problems
2440 */
2441 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2442 (ent->content != NULL)) {
2443 xmlChar *rep;
2444 rep = xmlStringDecodeEntities(ctxt, ent->content,
2445 XML_SUBSTITUTE_REF, 0, 0, 0);
2446 if (rep != NULL)
2447 xmlFree(rep);
2448 }
2449
2450 /*
2451 * Just output the reference
2452 */
2453 buf[len++] = '&';
2454 if (len > buf_size - i - 10) {
2455 growBuffer(buf);
2456 }
2457 for (;i > 0;i--)
2458 buf[len++] = *cur++;
2459 buf[len++] = ';';
2460 }
2461 }
2462 } else {
2463 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2464 COPY_BUF(l,buf,len,0x20);
2465 if (len > buf_size - 10) {
2466 growBuffer(buf);
2467 }
2468 } else {
2469 COPY_BUF(l,buf,len,c);
2470 if (len > buf_size - 10) {
2471 growBuffer(buf);
2472 }
2473 }
2474 NEXTL(l);
2475 }
2476 GROW;
2477 c = CUR_CHAR(l);
2478 }
2479 buf[len++] = 0;
2480 if (RAW == '<') {
2481 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2483 ctxt->sax->error(ctxt->userData,
2484 "Unescaped '<' not allowed in attributes values\n");
2485 ctxt->wellFormed = 0;
2486 ctxt->disableSAX = 1;
2487 } else if (RAW != limit) {
2488 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2490 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2491 ctxt->wellFormed = 0;
2492 ctxt->disableSAX = 1;
2493 } else
2494 NEXT;
2495 return(buf);
2496}
2497
2498/**
2499 * xmlParseSystemLiteral:
2500 * @ctxt: an XML parser context
2501 *
2502 * parse an XML Literal
2503 *
2504 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2505 *
2506 * Returns the SystemLiteral parsed or NULL
2507 */
2508
2509xmlChar *
2510xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2511 xmlChar *buf = NULL;
2512 int len = 0;
2513 int size = XML_PARSER_BUFFER_SIZE;
2514 int cur, l;
2515 xmlChar stop;
2516 int state = ctxt->instate;
2517 int count = 0;
2518
2519 SHRINK;
2520 if (RAW == '"') {
2521 NEXT;
2522 stop = '"';
2523 } else if (RAW == '\'') {
2524 NEXT;
2525 stop = '\'';
2526 } else {
2527 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2529 ctxt->sax->error(ctxt->userData,
2530 "SystemLiteral \" or ' expected\n");
2531 ctxt->wellFormed = 0;
2532 ctxt->disableSAX = 1;
2533 return(NULL);
2534 }
2535
2536 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2537 if (buf == NULL) {
2538 xmlGenericError(xmlGenericErrorContext,
2539 "malloc of %d byte failed\n", size);
2540 return(NULL);
2541 }
2542 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2543 cur = CUR_CHAR(l);
2544 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2545 if (len + 5 >= size) {
2546 size *= 2;
2547 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2548 if (buf == NULL) {
2549 xmlGenericError(xmlGenericErrorContext,
2550 "realloc of %d byte failed\n", size);
2551 ctxt->instate = (xmlParserInputState) state;
2552 return(NULL);
2553 }
2554 }
2555 count++;
2556 if (count > 50) {
2557 GROW;
2558 count = 0;
2559 }
2560 COPY_BUF(l,buf,len,cur);
2561 NEXTL(l);
2562 cur = CUR_CHAR(l);
2563 if (cur == 0) {
2564 GROW;
2565 SHRINK;
2566 cur = CUR_CHAR(l);
2567 }
2568 }
2569 buf[len] = 0;
2570 ctxt->instate = (xmlParserInputState) state;
2571 if (!IS_CHAR(cur)) {
2572 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2574 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2575 ctxt->wellFormed = 0;
2576 ctxt->disableSAX = 1;
2577 } else {
2578 NEXT;
2579 }
2580 return(buf);
2581}
2582
2583/**
2584 * xmlParsePubidLiteral:
2585 * @ctxt: an XML parser context
2586 *
2587 * parse an XML public literal
2588 *
2589 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2590 *
2591 * Returns the PubidLiteral parsed or NULL.
2592 */
2593
2594xmlChar *
2595xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2596 xmlChar *buf = NULL;
2597 int len = 0;
2598 int size = XML_PARSER_BUFFER_SIZE;
2599 xmlChar cur;
2600 xmlChar stop;
2601 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002602 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002603
2604 SHRINK;
2605 if (RAW == '"') {
2606 NEXT;
2607 stop = '"';
2608 } else if (RAW == '\'') {
2609 NEXT;
2610 stop = '\'';
2611 } else {
2612 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2614 ctxt->sax->error(ctxt->userData,
2615 "SystemLiteral \" or ' expected\n");
2616 ctxt->wellFormed = 0;
2617 ctxt->disableSAX = 1;
2618 return(NULL);
2619 }
2620 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2621 if (buf == NULL) {
2622 xmlGenericError(xmlGenericErrorContext,
2623 "malloc of %d byte failed\n", size);
2624 return(NULL);
2625 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002626 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002627 cur = CUR;
2628 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2629 if (len + 1 >= size) {
2630 size *= 2;
2631 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2632 if (buf == NULL) {
2633 xmlGenericError(xmlGenericErrorContext,
2634 "realloc of %d byte failed\n", size);
2635 return(NULL);
2636 }
2637 }
2638 buf[len++] = cur;
2639 count++;
2640 if (count > 50) {
2641 GROW;
2642 count = 0;
2643 }
2644 NEXT;
2645 cur = CUR;
2646 if (cur == 0) {
2647 GROW;
2648 SHRINK;
2649 cur = CUR;
2650 }
2651 }
2652 buf[len] = 0;
2653 if (cur != stop) {
2654 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2656 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2657 ctxt->wellFormed = 0;
2658 ctxt->disableSAX = 1;
2659 } else {
2660 NEXT;
2661 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002662 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002663 return(buf);
2664}
2665
Daniel Veillard48b2f892001-02-25 16:11:03 +00002666void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002667/**
2668 * xmlParseCharData:
2669 * @ctxt: an XML parser context
2670 * @cdata: int indicating whether we are within a CDATA section
2671 *
2672 * parse a CharData section.
2673 * if we are within a CDATA section ']]>' marks an end of section.
2674 *
2675 * The right angle bracket (>) may be represented using the string "&gt;",
2676 * and must, for compatibility, be escaped using "&gt;" or a character
2677 * reference when it appears in the string "]]>" in content, when that
2678 * string is not marking the end of a CDATA section.
2679 *
2680 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2681 */
2682
2683void
2684xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002685 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002686 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002687 int line = ctxt->input->line;
2688 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002689
2690 SHRINK;
2691 GROW;
2692 /*
2693 * Accelerated common case where input don't need to be
2694 * modified before passing it to the handler.
2695 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002696 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002697 in = ctxt->input->cur;
2698 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002699get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002700 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2701 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002702 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002703 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002704 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002705 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002706 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002707 ctxt->input->line++;
2708 in++;
2709 }
2710 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002711 }
2712 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002713 if ((in[1] == ']') && (in[2] == '>')) {
2714 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2716 ctxt->sax->error(ctxt->userData,
2717 "Sequence ']]>' not allowed in content\n");
2718 ctxt->input->cur = in;
2719 ctxt->wellFormed = 0;
2720 ctxt->disableSAX = 1;
2721 return;
2722 }
2723 in++;
2724 goto get_more;
2725 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002726 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002727 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002728 if (IS_BLANK(*ctxt->input->cur)) {
2729 const xmlChar *tmp = ctxt->input->cur;
2730 ctxt->input->cur = in;
2731 if (areBlanks(ctxt, tmp, nbchar)) {
2732 if (ctxt->sax->ignorableWhitespace != NULL)
2733 ctxt->sax->ignorableWhitespace(ctxt->userData,
2734 tmp, nbchar);
2735 } else {
2736 if (ctxt->sax->characters != NULL)
2737 ctxt->sax->characters(ctxt->userData,
2738 tmp, nbchar);
2739 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002740 line = ctxt->input->line;
2741 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002742 } else {
2743 if (ctxt->sax->characters != NULL)
2744 ctxt->sax->characters(ctxt->userData,
2745 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002746 line = ctxt->input->line;
2747 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002748 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002749 }
2750 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002751 if (*in == 0xD) {
2752 in++;
2753 if (*in == 0xA) {
2754 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002755 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002756 ctxt->input->line++;
2757 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002758 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002759 in--;
2760 }
2761 if (*in == '<') {
2762 return;
2763 }
2764 if (*in == '&') {
2765 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002766 }
2767 SHRINK;
2768 GROW;
2769 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002770 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002771 nbchar = 0;
2772 }
Daniel Veillard50582112001-03-26 22:52:16 +00002773 ctxt->input->line = line;
2774 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002775 xmlParseCharDataComplex(ctxt, cdata);
2776}
2777
2778void
2779xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002780 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2781 int nbchar = 0;
2782 int cur, l;
2783 int count = 0;
2784
2785 SHRINK;
2786 GROW;
2787 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002788 while ((cur != '<') && /* checked */
2789 (cur != '&') &&
2790 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002791 if ((cur == ']') && (NXT(1) == ']') &&
2792 (NXT(2) == '>')) {
2793 if (cdata) break;
2794 else {
2795 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2797 ctxt->sax->error(ctxt->userData,
2798 "Sequence ']]>' not allowed in content\n");
2799 /* Should this be relaxed ??? I see a "must here */
2800 ctxt->wellFormed = 0;
2801 ctxt->disableSAX = 1;
2802 }
2803 }
2804 COPY_BUF(l,buf,nbchar,cur);
2805 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2806 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002807 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002808 */
2809 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2810 if (areBlanks(ctxt, buf, nbchar)) {
2811 if (ctxt->sax->ignorableWhitespace != NULL)
2812 ctxt->sax->ignorableWhitespace(ctxt->userData,
2813 buf, nbchar);
2814 } else {
2815 if (ctxt->sax->characters != NULL)
2816 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2817 }
2818 }
2819 nbchar = 0;
2820 }
2821 count++;
2822 if (count > 50) {
2823 GROW;
2824 count = 0;
2825 }
2826 NEXTL(l);
2827 cur = CUR_CHAR(l);
2828 }
2829 if (nbchar != 0) {
2830 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002831 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002832 */
2833 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2834 if (areBlanks(ctxt, buf, nbchar)) {
2835 if (ctxt->sax->ignorableWhitespace != NULL)
2836 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2837 } else {
2838 if (ctxt->sax->characters != NULL)
2839 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2840 }
2841 }
2842 }
2843}
2844
2845/**
2846 * xmlParseExternalID:
2847 * @ctxt: an XML parser context
2848 * @publicID: a xmlChar** receiving PubidLiteral
2849 * @strict: indicate whether we should restrict parsing to only
2850 * production [75], see NOTE below
2851 *
2852 * Parse an External ID or a Public ID
2853 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002854 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002855 * 'PUBLIC' S PubidLiteral S SystemLiteral
2856 *
2857 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2858 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2859 *
2860 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2861 *
2862 * Returns the function returns SystemLiteral and in the second
2863 * case publicID receives PubidLiteral, is strict is off
2864 * it is possible to return NULL and have publicID set.
2865 */
2866
2867xmlChar *
2868xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2869 xmlChar *URI = NULL;
2870
2871 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002872
2873 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002874 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2875 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2876 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2877 SKIP(6);
2878 if (!IS_BLANK(CUR)) {
2879 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882 "Space required after 'SYSTEM'\n");
2883 ctxt->wellFormed = 0;
2884 ctxt->disableSAX = 1;
2885 }
2886 SKIP_BLANKS;
2887 URI = xmlParseSystemLiteral(ctxt);
2888 if (URI == NULL) {
2889 ctxt->errNo = XML_ERR_URI_REQUIRED;
2890 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2891 ctxt->sax->error(ctxt->userData,
2892 "xmlParseExternalID: SYSTEM, no URI\n");
2893 ctxt->wellFormed = 0;
2894 ctxt->disableSAX = 1;
2895 }
2896 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2897 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2898 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2899 SKIP(6);
2900 if (!IS_BLANK(CUR)) {
2901 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2903 ctxt->sax->error(ctxt->userData,
2904 "Space required after 'PUBLIC'\n");
2905 ctxt->wellFormed = 0;
2906 ctxt->disableSAX = 1;
2907 }
2908 SKIP_BLANKS;
2909 *publicID = xmlParsePubidLiteral(ctxt);
2910 if (*publicID == NULL) {
2911 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2913 ctxt->sax->error(ctxt->userData,
2914 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2915 ctxt->wellFormed = 0;
2916 ctxt->disableSAX = 1;
2917 }
2918 if (strict) {
2919 /*
2920 * We don't handle [83] so "S SystemLiteral" is required.
2921 */
2922 if (!IS_BLANK(CUR)) {
2923 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2925 ctxt->sax->error(ctxt->userData,
2926 "Space required after the Public Identifier\n");
2927 ctxt->wellFormed = 0;
2928 ctxt->disableSAX = 1;
2929 }
2930 } else {
2931 /*
2932 * We handle [83] so we return immediately, if
2933 * "S SystemLiteral" is not detected. From a purely parsing
2934 * point of view that's a nice mess.
2935 */
2936 const xmlChar *ptr;
2937 GROW;
2938
2939 ptr = CUR_PTR;
2940 if (!IS_BLANK(*ptr)) return(NULL);
2941
2942 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2943 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2944 }
2945 SKIP_BLANKS;
2946 URI = xmlParseSystemLiteral(ctxt);
2947 if (URI == NULL) {
2948 ctxt->errNo = XML_ERR_URI_REQUIRED;
2949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2950 ctxt->sax->error(ctxt->userData,
2951 "xmlParseExternalID: PUBLIC, no URI\n");
2952 ctxt->wellFormed = 0;
2953 ctxt->disableSAX = 1;
2954 }
2955 }
2956 return(URI);
2957}
2958
2959/**
2960 * xmlParseComment:
2961 * @ctxt: an XML parser context
2962 *
2963 * Skip an XML (SGML) comment <!-- .... -->
2964 * The spec says that "For compatibility, the string "--" (double-hyphen)
2965 * must not occur within comments. "
2966 *
2967 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2968 */
2969void
2970xmlParseComment(xmlParserCtxtPtr ctxt) {
2971 xmlChar *buf = NULL;
2972 int len;
2973 int size = XML_PARSER_BUFFER_SIZE;
2974 int q, ql;
2975 int r, rl;
2976 int cur, l;
2977 xmlParserInputState state;
2978 xmlParserInputPtr input = ctxt->input;
2979 int count = 0;
2980
2981 /*
2982 * Check that there is a comment right here.
2983 */
2984 if ((RAW != '<') || (NXT(1) != '!') ||
2985 (NXT(2) != '-') || (NXT(3) != '-')) return;
2986
2987 state = ctxt->instate;
2988 ctxt->instate = XML_PARSER_COMMENT;
2989 SHRINK;
2990 SKIP(4);
2991 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2992 if (buf == NULL) {
2993 xmlGenericError(xmlGenericErrorContext,
2994 "malloc of %d byte failed\n", size);
2995 ctxt->instate = state;
2996 return;
2997 }
2998 q = CUR_CHAR(ql);
2999 NEXTL(ql);
3000 r = CUR_CHAR(rl);
3001 NEXTL(rl);
3002 cur = CUR_CHAR(l);
3003 len = 0;
3004 while (IS_CHAR(cur) && /* checked */
3005 ((cur != '>') ||
3006 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003007 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003008 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3010 ctxt->sax->error(ctxt->userData,
3011 "Comment must not contain '--' (double-hyphen)`\n");
3012 ctxt->wellFormed = 0;
3013 ctxt->disableSAX = 1;
3014 }
3015 if (len + 5 >= size) {
3016 size *= 2;
3017 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3018 if (buf == NULL) {
3019 xmlGenericError(xmlGenericErrorContext,
3020 "realloc of %d byte failed\n", size);
3021 ctxt->instate = state;
3022 return;
3023 }
3024 }
3025 COPY_BUF(ql,buf,len,q);
3026 q = r;
3027 ql = rl;
3028 r = cur;
3029 rl = l;
3030
3031 count++;
3032 if (count > 50) {
3033 GROW;
3034 count = 0;
3035 }
3036 NEXTL(l);
3037 cur = CUR_CHAR(l);
3038 if (cur == 0) {
3039 SHRINK;
3040 GROW;
3041 cur = CUR_CHAR(l);
3042 }
3043 }
3044 buf[len] = 0;
3045 if (!IS_CHAR(cur)) {
3046 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3048 ctxt->sax->error(ctxt->userData,
3049 "Comment not terminated \n<!--%.50s\n", buf);
3050 ctxt->wellFormed = 0;
3051 ctxt->disableSAX = 1;
3052 xmlFree(buf);
3053 } else {
3054 if (input != ctxt->input) {
3055 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3057 ctxt->sax->error(ctxt->userData,
3058"Comment doesn't start and stop in the same entity\n");
3059 ctxt->wellFormed = 0;
3060 ctxt->disableSAX = 1;
3061 }
3062 NEXT;
3063 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3064 (!ctxt->disableSAX))
3065 ctxt->sax->comment(ctxt->userData, buf);
3066 xmlFree(buf);
3067 }
3068 ctxt->instate = state;
3069}
3070
3071/**
3072 * xmlParsePITarget:
3073 * @ctxt: an XML parser context
3074 *
3075 * parse the name of a PI
3076 *
3077 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3078 *
3079 * Returns the PITarget name or NULL
3080 */
3081
3082xmlChar *
3083xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3084 xmlChar *name;
3085
3086 name = xmlParseName(ctxt);
3087 if ((name != NULL) &&
3088 ((name[0] == 'x') || (name[0] == 'X')) &&
3089 ((name[1] == 'm') || (name[1] == 'M')) &&
3090 ((name[2] == 'l') || (name[2] == 'L'))) {
3091 int i;
3092 if ((name[0] == 'x') && (name[1] == 'm') &&
3093 (name[2] == 'l') && (name[3] == 0)) {
3094 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3096 ctxt->sax->error(ctxt->userData,
3097 "XML declaration allowed only at the start of the document\n");
3098 ctxt->wellFormed = 0;
3099 ctxt->disableSAX = 1;
3100 return(name);
3101 } else if (name[3] == 0) {
3102 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3104 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3105 ctxt->wellFormed = 0;
3106 ctxt->disableSAX = 1;
3107 return(name);
3108 }
3109 for (i = 0;;i++) {
3110 if (xmlW3CPIs[i] == NULL) break;
3111 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3112 return(name);
3113 }
3114 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3115 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3116 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003117 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003118 }
3119 }
3120 return(name);
3121}
3122
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003123#ifdef LIBXML_CATALOG_ENABLED
3124/**
3125 * xmlParseCatalogPI:
3126 * @ctxt: an XML parser context
3127 * @catalog: the PI value string
3128 *
3129 * parse an XML Catalog Processing Instruction.
3130 *
3131 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3132 *
3133 * Occurs only if allowed by the user and if happening in the Misc
3134 * part of the document before any doctype informations
3135 * This will add the given catalog to the parsing context in order
3136 * to be used if there is a resolution need further down in the document
3137 */
3138
3139static void
3140xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3141 xmlChar *URL = NULL;
3142 const xmlChar *tmp, *base;
3143 xmlChar marker;
3144
3145 tmp = catalog;
3146 while (IS_BLANK(*tmp)) tmp++;
3147 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3148 goto error;
3149 tmp += 7;
3150 while (IS_BLANK(*tmp)) tmp++;
3151 if (*tmp != '=') {
3152 return;
3153 }
3154 tmp++;
3155 while (IS_BLANK(*tmp)) tmp++;
3156 marker = *tmp;
3157 if ((marker != '\'') && (marker != '"'))
3158 goto error;
3159 tmp++;
3160 base = tmp;
3161 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3162 if (*tmp == 0)
3163 goto error;
3164 URL = xmlStrndup(base, tmp - base);
3165 tmp++;
3166 while (IS_BLANK(*tmp)) tmp++;
3167 if (*tmp != 0)
3168 goto error;
3169
3170 if (URL != NULL) {
3171 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3172 xmlFree(URL);
3173 }
3174 return;
3175
3176error:
3177 ctxt->errNo = XML_WAR_CATALOG_PI;
3178 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3179 ctxt->sax->warning(ctxt->userData,
3180 "Catalog PI syntax error: %s\n", catalog);
3181 if (URL != NULL)
3182 xmlFree(URL);
3183}
3184#endif
3185
Owen Taylor3473f882001-02-23 17:55:21 +00003186/**
3187 * xmlParsePI:
3188 * @ctxt: an XML parser context
3189 *
3190 * parse an XML Processing Instruction.
3191 *
3192 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3193 *
3194 * The processing is transfered to SAX once parsed.
3195 */
3196
3197void
3198xmlParsePI(xmlParserCtxtPtr ctxt) {
3199 xmlChar *buf = NULL;
3200 int len = 0;
3201 int size = XML_PARSER_BUFFER_SIZE;
3202 int cur, l;
3203 xmlChar *target;
3204 xmlParserInputState state;
3205 int count = 0;
3206
3207 if ((RAW == '<') && (NXT(1) == '?')) {
3208 xmlParserInputPtr input = ctxt->input;
3209 state = ctxt->instate;
3210 ctxt->instate = XML_PARSER_PI;
3211 /*
3212 * this is a Processing Instruction.
3213 */
3214 SKIP(2);
3215 SHRINK;
3216
3217 /*
3218 * Parse the target name and check for special support like
3219 * namespace.
3220 */
3221 target = xmlParsePITarget(ctxt);
3222 if (target != NULL) {
3223 if ((RAW == '?') && (NXT(1) == '>')) {
3224 if (input != ctxt->input) {
3225 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3227 ctxt->sax->error(ctxt->userData,
3228 "PI declaration doesn't start and stop in the same entity\n");
3229 ctxt->wellFormed = 0;
3230 ctxt->disableSAX = 1;
3231 }
3232 SKIP(2);
3233
3234 /*
3235 * SAX: PI detected.
3236 */
3237 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3238 (ctxt->sax->processingInstruction != NULL))
3239 ctxt->sax->processingInstruction(ctxt->userData,
3240 target, NULL);
3241 ctxt->instate = state;
3242 xmlFree(target);
3243 return;
3244 }
3245 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3246 if (buf == NULL) {
3247 xmlGenericError(xmlGenericErrorContext,
3248 "malloc of %d byte failed\n", size);
3249 ctxt->instate = state;
3250 return;
3251 }
3252 cur = CUR;
3253 if (!IS_BLANK(cur)) {
3254 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3256 ctxt->sax->error(ctxt->userData,
3257 "xmlParsePI: PI %s space expected\n", target);
3258 ctxt->wellFormed = 0;
3259 ctxt->disableSAX = 1;
3260 }
3261 SKIP_BLANKS;
3262 cur = CUR_CHAR(l);
3263 while (IS_CHAR(cur) && /* checked */
3264 ((cur != '?') || (NXT(1) != '>'))) {
3265 if (len + 5 >= size) {
3266 size *= 2;
3267 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3268 if (buf == NULL) {
3269 xmlGenericError(xmlGenericErrorContext,
3270 "realloc of %d byte failed\n", size);
3271 ctxt->instate = state;
3272 return;
3273 }
3274 }
3275 count++;
3276 if (count > 50) {
3277 GROW;
3278 count = 0;
3279 }
3280 COPY_BUF(l,buf,len,cur);
3281 NEXTL(l);
3282 cur = CUR_CHAR(l);
3283 if (cur == 0) {
3284 SHRINK;
3285 GROW;
3286 cur = CUR_CHAR(l);
3287 }
3288 }
3289 buf[len] = 0;
3290 if (cur != '?') {
3291 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3292 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3293 ctxt->sax->error(ctxt->userData,
3294 "xmlParsePI: PI %s never end ...\n", target);
3295 ctxt->wellFormed = 0;
3296 ctxt->disableSAX = 1;
3297 } else {
3298 if (input != ctxt->input) {
3299 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3301 ctxt->sax->error(ctxt->userData,
3302 "PI declaration doesn't start and stop in the same entity\n");
3303 ctxt->wellFormed = 0;
3304 ctxt->disableSAX = 1;
3305 }
3306 SKIP(2);
3307
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003308#ifdef LIBXML_CATALOG_ENABLED
3309 if (((state == XML_PARSER_MISC) ||
3310 (state == XML_PARSER_START)) &&
3311 (xmlStrEqual(target, XML_CATALOG_PI))) {
3312 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3313 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3314 (allow == XML_CATA_ALLOW_ALL))
3315 xmlParseCatalogPI(ctxt, buf);
3316 }
3317#endif
3318
3319
Owen Taylor3473f882001-02-23 17:55:21 +00003320 /*
3321 * SAX: PI detected.
3322 */
3323 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3324 (ctxt->sax->processingInstruction != NULL))
3325 ctxt->sax->processingInstruction(ctxt->userData,
3326 target, buf);
3327 }
3328 xmlFree(buf);
3329 xmlFree(target);
3330 } else {
3331 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3333 ctxt->sax->error(ctxt->userData,
3334 "xmlParsePI : no target name\n");
3335 ctxt->wellFormed = 0;
3336 ctxt->disableSAX = 1;
3337 }
3338 ctxt->instate = state;
3339 }
3340}
3341
3342/**
3343 * xmlParseNotationDecl:
3344 * @ctxt: an XML parser context
3345 *
3346 * parse a notation declaration
3347 *
3348 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3349 *
3350 * Hence there is actually 3 choices:
3351 * 'PUBLIC' S PubidLiteral
3352 * 'PUBLIC' S PubidLiteral S SystemLiteral
3353 * and 'SYSTEM' S SystemLiteral
3354 *
3355 * See the NOTE on xmlParseExternalID().
3356 */
3357
3358void
3359xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3360 xmlChar *name;
3361 xmlChar *Pubid;
3362 xmlChar *Systemid;
3363
3364 if ((RAW == '<') && (NXT(1) == '!') &&
3365 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3366 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3367 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3368 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3369 xmlParserInputPtr input = ctxt->input;
3370 SHRINK;
3371 SKIP(10);
3372 if (!IS_BLANK(CUR)) {
3373 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3375 ctxt->sax->error(ctxt->userData,
3376 "Space required after '<!NOTATION'\n");
3377 ctxt->wellFormed = 0;
3378 ctxt->disableSAX = 1;
3379 return;
3380 }
3381 SKIP_BLANKS;
3382
Daniel Veillard76d66f42001-05-16 21:05:17 +00003383 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003384 if (name == NULL) {
3385 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3387 ctxt->sax->error(ctxt->userData,
3388 "NOTATION: Name expected here\n");
3389 ctxt->wellFormed = 0;
3390 ctxt->disableSAX = 1;
3391 return;
3392 }
3393 if (!IS_BLANK(CUR)) {
3394 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3396 ctxt->sax->error(ctxt->userData,
3397 "Space required after the NOTATION name'\n");
3398 ctxt->wellFormed = 0;
3399 ctxt->disableSAX = 1;
3400 return;
3401 }
3402 SKIP_BLANKS;
3403
3404 /*
3405 * Parse the IDs.
3406 */
3407 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3408 SKIP_BLANKS;
3409
3410 if (RAW == '>') {
3411 if (input != ctxt->input) {
3412 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3414 ctxt->sax->error(ctxt->userData,
3415"Notation declaration doesn't start and stop in the same entity\n");
3416 ctxt->wellFormed = 0;
3417 ctxt->disableSAX = 1;
3418 }
3419 NEXT;
3420 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3421 (ctxt->sax->notationDecl != NULL))
3422 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3423 } else {
3424 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3425 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3426 ctxt->sax->error(ctxt->userData,
3427 "'>' required to close NOTATION declaration\n");
3428 ctxt->wellFormed = 0;
3429 ctxt->disableSAX = 1;
3430 }
3431 xmlFree(name);
3432 if (Systemid != NULL) xmlFree(Systemid);
3433 if (Pubid != NULL) xmlFree(Pubid);
3434 }
3435}
3436
3437/**
3438 * xmlParseEntityDecl:
3439 * @ctxt: an XML parser context
3440 *
3441 * parse <!ENTITY declarations
3442 *
3443 * [70] EntityDecl ::= GEDecl | PEDecl
3444 *
3445 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3446 *
3447 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3448 *
3449 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3450 *
3451 * [74] PEDef ::= EntityValue | ExternalID
3452 *
3453 * [76] NDataDecl ::= S 'NDATA' S Name
3454 *
3455 * [ VC: Notation Declared ]
3456 * The Name must match the declared name of a notation.
3457 */
3458
3459void
3460xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3461 xmlChar *name = NULL;
3462 xmlChar *value = NULL;
3463 xmlChar *URI = NULL, *literal = NULL;
3464 xmlChar *ndata = NULL;
3465 int isParameter = 0;
3466 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003467 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003468
3469 GROW;
3470 if ((RAW == '<') && (NXT(1) == '!') &&
3471 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3472 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3473 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3474 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003475 SHRINK;
3476 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003477 skipped = SKIP_BLANKS;
3478 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003479 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3480 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3481 ctxt->sax->error(ctxt->userData,
3482 "Space required after '<!ENTITY'\n");
3483 ctxt->wellFormed = 0;
3484 ctxt->disableSAX = 1;
3485 }
Owen Taylor3473f882001-02-23 17:55:21 +00003486
3487 if (RAW == '%') {
3488 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003489 skipped = SKIP_BLANKS;
3490 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003491 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3493 ctxt->sax->error(ctxt->userData,
3494 "Space required after '%'\n");
3495 ctxt->wellFormed = 0;
3496 ctxt->disableSAX = 1;
3497 }
Owen Taylor3473f882001-02-23 17:55:21 +00003498 isParameter = 1;
3499 }
3500
Daniel Veillard76d66f42001-05-16 21:05:17 +00003501 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003502 if (name == NULL) {
3503 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3505 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3506 ctxt->wellFormed = 0;
3507 ctxt->disableSAX = 1;
3508 return;
3509 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003510 skipped = SKIP_BLANKS;
3511 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003512 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3514 ctxt->sax->error(ctxt->userData,
3515 "Space required after the entity name\n");
3516 ctxt->wellFormed = 0;
3517 ctxt->disableSAX = 1;
3518 }
Owen Taylor3473f882001-02-23 17:55:21 +00003519
Daniel Veillardf5582f12002-06-11 10:08:16 +00003520 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003521 /*
3522 * handle the various case of definitions...
3523 */
3524 if (isParameter) {
3525 if ((RAW == '"') || (RAW == '\'')) {
3526 value = xmlParseEntityValue(ctxt, &orig);
3527 if (value) {
3528 if ((ctxt->sax != NULL) &&
3529 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3530 ctxt->sax->entityDecl(ctxt->userData, name,
3531 XML_INTERNAL_PARAMETER_ENTITY,
3532 NULL, NULL, value);
3533 }
3534 } else {
3535 URI = xmlParseExternalID(ctxt, &literal, 1);
3536 if ((URI == NULL) && (literal == NULL)) {
3537 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3539 ctxt->sax->error(ctxt->userData,
3540 "Entity value required\n");
3541 ctxt->wellFormed = 0;
3542 ctxt->disableSAX = 1;
3543 }
3544 if (URI) {
3545 xmlURIPtr uri;
3546
3547 uri = xmlParseURI((const char *) URI);
3548 if (uri == NULL) {
3549 ctxt->errNo = XML_ERR_INVALID_URI;
3550 if ((ctxt->sax != NULL) &&
3551 (!ctxt->disableSAX) &&
3552 (ctxt->sax->error != NULL))
3553 ctxt->sax->error(ctxt->userData,
3554 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003555 /*
3556 * This really ought to be a well formedness error
3557 * but the XML Core WG decided otherwise c.f. issue
3558 * E26 of the XML erratas.
3559 */
Owen Taylor3473f882001-02-23 17:55:21 +00003560 } else {
3561 if (uri->fragment != NULL) {
3562 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3563 if ((ctxt->sax != NULL) &&
3564 (!ctxt->disableSAX) &&
3565 (ctxt->sax->error != NULL))
3566 ctxt->sax->error(ctxt->userData,
3567 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003568 /*
3569 * Okay this is foolish to block those but not
3570 * invalid URIs.
3571 */
Owen Taylor3473f882001-02-23 17:55:21 +00003572 ctxt->wellFormed = 0;
3573 } else {
3574 if ((ctxt->sax != NULL) &&
3575 (!ctxt->disableSAX) &&
3576 (ctxt->sax->entityDecl != NULL))
3577 ctxt->sax->entityDecl(ctxt->userData, name,
3578 XML_EXTERNAL_PARAMETER_ENTITY,
3579 literal, URI, NULL);
3580 }
3581 xmlFreeURI(uri);
3582 }
3583 }
3584 }
3585 } else {
3586 if ((RAW == '"') || (RAW == '\'')) {
3587 value = xmlParseEntityValue(ctxt, &orig);
3588 if ((ctxt->sax != NULL) &&
3589 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3590 ctxt->sax->entityDecl(ctxt->userData, name,
3591 XML_INTERNAL_GENERAL_ENTITY,
3592 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003593 /*
3594 * For expat compatibility in SAX mode.
3595 */
3596 if ((ctxt->myDoc == NULL) ||
3597 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3598 if (ctxt->myDoc == NULL) {
3599 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3600 }
3601 if (ctxt->myDoc->intSubset == NULL)
3602 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3603 BAD_CAST "fake", NULL, NULL);
3604
3605 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3606 NULL, NULL, value);
3607 }
Owen Taylor3473f882001-02-23 17:55:21 +00003608 } else {
3609 URI = xmlParseExternalID(ctxt, &literal, 1);
3610 if ((URI == NULL) && (literal == NULL)) {
3611 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3613 ctxt->sax->error(ctxt->userData,
3614 "Entity value required\n");
3615 ctxt->wellFormed = 0;
3616 ctxt->disableSAX = 1;
3617 }
3618 if (URI) {
3619 xmlURIPtr uri;
3620
3621 uri = xmlParseURI((const char *)URI);
3622 if (uri == NULL) {
3623 ctxt->errNo = XML_ERR_INVALID_URI;
3624 if ((ctxt->sax != NULL) &&
3625 (!ctxt->disableSAX) &&
3626 (ctxt->sax->error != NULL))
3627 ctxt->sax->error(ctxt->userData,
3628 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003629 /*
3630 * This really ought to be a well formedness error
3631 * but the XML Core WG decided otherwise c.f. issue
3632 * E26 of the XML erratas.
3633 */
Owen Taylor3473f882001-02-23 17:55:21 +00003634 } else {
3635 if (uri->fragment != NULL) {
3636 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3637 if ((ctxt->sax != NULL) &&
3638 (!ctxt->disableSAX) &&
3639 (ctxt->sax->error != NULL))
3640 ctxt->sax->error(ctxt->userData,
3641 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003642 /*
3643 * Okay this is foolish to block those but not
3644 * invalid URIs.
3645 */
Owen Taylor3473f882001-02-23 17:55:21 +00003646 ctxt->wellFormed = 0;
3647 }
3648 xmlFreeURI(uri);
3649 }
3650 }
3651 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3652 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3654 ctxt->sax->error(ctxt->userData,
3655 "Space required before 'NDATA'\n");
3656 ctxt->wellFormed = 0;
3657 ctxt->disableSAX = 1;
3658 }
3659 SKIP_BLANKS;
3660 if ((RAW == 'N') && (NXT(1) == 'D') &&
3661 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3662 (NXT(4) == 'A')) {
3663 SKIP(5);
3664 if (!IS_BLANK(CUR)) {
3665 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3667 ctxt->sax->error(ctxt->userData,
3668 "Space required after 'NDATA'\n");
3669 ctxt->wellFormed = 0;
3670 ctxt->disableSAX = 1;
3671 }
3672 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003673 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003674 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3675 (ctxt->sax->unparsedEntityDecl != NULL))
3676 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3677 literal, URI, ndata);
3678 } else {
3679 if ((ctxt->sax != NULL) &&
3680 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3681 ctxt->sax->entityDecl(ctxt->userData, name,
3682 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3683 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003684 /*
3685 * For expat compatibility in SAX mode.
3686 * assuming the entity repalcement was asked for
3687 */
3688 if ((ctxt->replaceEntities != 0) &&
3689 ((ctxt->myDoc == NULL) ||
3690 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3691 if (ctxt->myDoc == NULL) {
3692 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3693 }
3694
3695 if (ctxt->myDoc->intSubset == NULL)
3696 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3697 BAD_CAST "fake", NULL, NULL);
3698 entityDecl(ctxt, name,
3699 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3700 literal, URI, NULL);
3701 }
Owen Taylor3473f882001-02-23 17:55:21 +00003702 }
3703 }
3704 }
3705 SKIP_BLANKS;
3706 if (RAW != '>') {
3707 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3709 ctxt->sax->error(ctxt->userData,
3710 "xmlParseEntityDecl: entity %s not terminated\n", name);
3711 ctxt->wellFormed = 0;
3712 ctxt->disableSAX = 1;
3713 } else {
3714 if (input != ctxt->input) {
3715 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3717 ctxt->sax->error(ctxt->userData,
3718"Entity declaration doesn't start and stop in the same entity\n");
3719 ctxt->wellFormed = 0;
3720 ctxt->disableSAX = 1;
3721 }
3722 NEXT;
3723 }
3724 if (orig != NULL) {
3725 /*
3726 * Ugly mechanism to save the raw entity value.
3727 */
3728 xmlEntityPtr cur = NULL;
3729
3730 if (isParameter) {
3731 if ((ctxt->sax != NULL) &&
3732 (ctxt->sax->getParameterEntity != NULL))
3733 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3734 } else {
3735 if ((ctxt->sax != NULL) &&
3736 (ctxt->sax->getEntity != NULL))
3737 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003738 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3739 cur = getEntity(ctxt, name);
3740 }
Owen Taylor3473f882001-02-23 17:55:21 +00003741 }
3742 if (cur != NULL) {
3743 if (cur->orig != NULL)
3744 xmlFree(orig);
3745 else
3746 cur->orig = orig;
3747 } else
3748 xmlFree(orig);
3749 }
3750 if (name != NULL) xmlFree(name);
3751 if (value != NULL) xmlFree(value);
3752 if (URI != NULL) xmlFree(URI);
3753 if (literal != NULL) xmlFree(literal);
3754 if (ndata != NULL) xmlFree(ndata);
3755 }
3756}
3757
3758/**
3759 * xmlParseDefaultDecl:
3760 * @ctxt: an XML parser context
3761 * @value: Receive a possible fixed default value for the attribute
3762 *
3763 * Parse an attribute default declaration
3764 *
3765 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3766 *
3767 * [ VC: Required Attribute ]
3768 * if the default declaration is the keyword #REQUIRED, then the
3769 * attribute must be specified for all elements of the type in the
3770 * attribute-list declaration.
3771 *
3772 * [ VC: Attribute Default Legal ]
3773 * The declared default value must meet the lexical constraints of
3774 * the declared attribute type c.f. xmlValidateAttributeDecl()
3775 *
3776 * [ VC: Fixed Attribute Default ]
3777 * if an attribute has a default value declared with the #FIXED
3778 * keyword, instances of that attribute must match the default value.
3779 *
3780 * [ WFC: No < in Attribute Values ]
3781 * handled in xmlParseAttValue()
3782 *
3783 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3784 * or XML_ATTRIBUTE_FIXED.
3785 */
3786
3787int
3788xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3789 int val;
3790 xmlChar *ret;
3791
3792 *value = NULL;
3793 if ((RAW == '#') && (NXT(1) == 'R') &&
3794 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3795 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3796 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3797 (NXT(8) == 'D')) {
3798 SKIP(9);
3799 return(XML_ATTRIBUTE_REQUIRED);
3800 }
3801 if ((RAW == '#') && (NXT(1) == 'I') &&
3802 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3803 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3804 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3805 SKIP(8);
3806 return(XML_ATTRIBUTE_IMPLIED);
3807 }
3808 val = XML_ATTRIBUTE_NONE;
3809 if ((RAW == '#') && (NXT(1) == 'F') &&
3810 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3811 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3812 SKIP(6);
3813 val = XML_ATTRIBUTE_FIXED;
3814 if (!IS_BLANK(CUR)) {
3815 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3817 ctxt->sax->error(ctxt->userData,
3818 "Space required after '#FIXED'\n");
3819 ctxt->wellFormed = 0;
3820 ctxt->disableSAX = 1;
3821 }
3822 SKIP_BLANKS;
3823 }
3824 ret = xmlParseAttValue(ctxt);
3825 ctxt->instate = XML_PARSER_DTD;
3826 if (ret == NULL) {
3827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3828 ctxt->sax->error(ctxt->userData,
3829 "Attribute default value declaration error\n");
3830 ctxt->wellFormed = 0;
3831 ctxt->disableSAX = 1;
3832 } else
3833 *value = ret;
3834 return(val);
3835}
3836
3837/**
3838 * xmlParseNotationType:
3839 * @ctxt: an XML parser context
3840 *
3841 * parse an Notation attribute type.
3842 *
3843 * Note: the leading 'NOTATION' S part has already being parsed...
3844 *
3845 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3846 *
3847 * [ VC: Notation Attributes ]
3848 * Values of this type must match one of the notation names included
3849 * in the declaration; all notation names in the declaration must be declared.
3850 *
3851 * Returns: the notation attribute tree built while parsing
3852 */
3853
3854xmlEnumerationPtr
3855xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3856 xmlChar *name;
3857 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3858
3859 if (RAW != '(') {
3860 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3862 ctxt->sax->error(ctxt->userData,
3863 "'(' required to start 'NOTATION'\n");
3864 ctxt->wellFormed = 0;
3865 ctxt->disableSAX = 1;
3866 return(NULL);
3867 }
3868 SHRINK;
3869 do {
3870 NEXT;
3871 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003872 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003873 if (name == NULL) {
3874 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3876 ctxt->sax->error(ctxt->userData,
3877 "Name expected in NOTATION declaration\n");
3878 ctxt->wellFormed = 0;
3879 ctxt->disableSAX = 1;
3880 return(ret);
3881 }
3882 cur = xmlCreateEnumeration(name);
3883 xmlFree(name);
3884 if (cur == NULL) return(ret);
3885 if (last == NULL) ret = last = cur;
3886 else {
3887 last->next = cur;
3888 last = cur;
3889 }
3890 SKIP_BLANKS;
3891 } while (RAW == '|');
3892 if (RAW != ')') {
3893 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3895 ctxt->sax->error(ctxt->userData,
3896 "')' required to finish NOTATION declaration\n");
3897 ctxt->wellFormed = 0;
3898 ctxt->disableSAX = 1;
3899 if ((last != NULL) && (last != ret))
3900 xmlFreeEnumeration(last);
3901 return(ret);
3902 }
3903 NEXT;
3904 return(ret);
3905}
3906
3907/**
3908 * xmlParseEnumerationType:
3909 * @ctxt: an XML parser context
3910 *
3911 * parse an Enumeration attribute type.
3912 *
3913 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3914 *
3915 * [ VC: Enumeration ]
3916 * Values of this type must match one of the Nmtoken tokens in
3917 * the declaration
3918 *
3919 * Returns: the enumeration attribute tree built while parsing
3920 */
3921
3922xmlEnumerationPtr
3923xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3924 xmlChar *name;
3925 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3926
3927 if (RAW != '(') {
3928 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3930 ctxt->sax->error(ctxt->userData,
3931 "'(' required to start ATTLIST enumeration\n");
3932 ctxt->wellFormed = 0;
3933 ctxt->disableSAX = 1;
3934 return(NULL);
3935 }
3936 SHRINK;
3937 do {
3938 NEXT;
3939 SKIP_BLANKS;
3940 name = xmlParseNmtoken(ctxt);
3941 if (name == NULL) {
3942 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3944 ctxt->sax->error(ctxt->userData,
3945 "NmToken expected in ATTLIST enumeration\n");
3946 ctxt->wellFormed = 0;
3947 ctxt->disableSAX = 1;
3948 return(ret);
3949 }
3950 cur = xmlCreateEnumeration(name);
3951 xmlFree(name);
3952 if (cur == NULL) return(ret);
3953 if (last == NULL) ret = last = cur;
3954 else {
3955 last->next = cur;
3956 last = cur;
3957 }
3958 SKIP_BLANKS;
3959 } while (RAW == '|');
3960 if (RAW != ')') {
3961 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3963 ctxt->sax->error(ctxt->userData,
3964 "')' required to finish ATTLIST enumeration\n");
3965 ctxt->wellFormed = 0;
3966 ctxt->disableSAX = 1;
3967 return(ret);
3968 }
3969 NEXT;
3970 return(ret);
3971}
3972
3973/**
3974 * xmlParseEnumeratedType:
3975 * @ctxt: an XML parser context
3976 * @tree: the enumeration tree built while parsing
3977 *
3978 * parse an Enumerated attribute type.
3979 *
3980 * [57] EnumeratedType ::= NotationType | Enumeration
3981 *
3982 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3983 *
3984 *
3985 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3986 */
3987
3988int
3989xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3990 if ((RAW == 'N') && (NXT(1) == 'O') &&
3991 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3992 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3993 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3994 SKIP(8);
3995 if (!IS_BLANK(CUR)) {
3996 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3998 ctxt->sax->error(ctxt->userData,
3999 "Space required after 'NOTATION'\n");
4000 ctxt->wellFormed = 0;
4001 ctxt->disableSAX = 1;
4002 return(0);
4003 }
4004 SKIP_BLANKS;
4005 *tree = xmlParseNotationType(ctxt);
4006 if (*tree == NULL) return(0);
4007 return(XML_ATTRIBUTE_NOTATION);
4008 }
4009 *tree = xmlParseEnumerationType(ctxt);
4010 if (*tree == NULL) return(0);
4011 return(XML_ATTRIBUTE_ENUMERATION);
4012}
4013
4014/**
4015 * xmlParseAttributeType:
4016 * @ctxt: an XML parser context
4017 * @tree: the enumeration tree built while parsing
4018 *
4019 * parse the Attribute list def for an element
4020 *
4021 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4022 *
4023 * [55] StringType ::= 'CDATA'
4024 *
4025 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4026 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4027 *
4028 * Validity constraints for attribute values syntax are checked in
4029 * xmlValidateAttributeValue()
4030 *
4031 * [ VC: ID ]
4032 * Values of type ID must match the Name production. A name must not
4033 * appear more than once in an XML document as a value of this type;
4034 * i.e., ID values must uniquely identify the elements which bear them.
4035 *
4036 * [ VC: One ID per Element Type ]
4037 * No element type may have more than one ID attribute specified.
4038 *
4039 * [ VC: ID Attribute Default ]
4040 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4041 *
4042 * [ VC: IDREF ]
4043 * Values of type IDREF must match the Name production, and values
4044 * of type IDREFS must match Names; each IDREF Name must match the value
4045 * of an ID attribute on some element in the XML document; i.e. IDREF
4046 * values must match the value of some ID attribute.
4047 *
4048 * [ VC: Entity Name ]
4049 * Values of type ENTITY must match the Name production, values
4050 * of type ENTITIES must match Names; each Entity Name must match the
4051 * name of an unparsed entity declared in the DTD.
4052 *
4053 * [ VC: Name Token ]
4054 * Values of type NMTOKEN must match the Nmtoken production; values
4055 * of type NMTOKENS must match Nmtokens.
4056 *
4057 * Returns the attribute type
4058 */
4059int
4060xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4061 SHRINK;
4062 if ((RAW == 'C') && (NXT(1) == 'D') &&
4063 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4064 (NXT(4) == 'A')) {
4065 SKIP(5);
4066 return(XML_ATTRIBUTE_CDATA);
4067 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4068 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4069 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4070 SKIP(6);
4071 return(XML_ATTRIBUTE_IDREFS);
4072 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4073 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4074 (NXT(4) == 'F')) {
4075 SKIP(5);
4076 return(XML_ATTRIBUTE_IDREF);
4077 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4078 SKIP(2);
4079 return(XML_ATTRIBUTE_ID);
4080 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4081 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4082 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4083 SKIP(6);
4084 return(XML_ATTRIBUTE_ENTITY);
4085 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4086 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4087 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4088 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4089 SKIP(8);
4090 return(XML_ATTRIBUTE_ENTITIES);
4091 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4092 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4093 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4094 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4095 SKIP(8);
4096 return(XML_ATTRIBUTE_NMTOKENS);
4097 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4098 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4099 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4100 (NXT(6) == 'N')) {
4101 SKIP(7);
4102 return(XML_ATTRIBUTE_NMTOKEN);
4103 }
4104 return(xmlParseEnumeratedType(ctxt, tree));
4105}
4106
4107/**
4108 * xmlParseAttributeListDecl:
4109 * @ctxt: an XML parser context
4110 *
4111 * : parse the Attribute list def for an element
4112 *
4113 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4114 *
4115 * [53] AttDef ::= S Name S AttType S DefaultDecl
4116 *
4117 */
4118void
4119xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4120 xmlChar *elemName;
4121 xmlChar *attrName;
4122 xmlEnumerationPtr tree;
4123
4124 if ((RAW == '<') && (NXT(1) == '!') &&
4125 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4126 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4127 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4128 (NXT(8) == 'T')) {
4129 xmlParserInputPtr input = ctxt->input;
4130
4131 SKIP(9);
4132 if (!IS_BLANK(CUR)) {
4133 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4135 ctxt->sax->error(ctxt->userData,
4136 "Space required after '<!ATTLIST'\n");
4137 ctxt->wellFormed = 0;
4138 ctxt->disableSAX = 1;
4139 }
4140 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004141 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004142 if (elemName == NULL) {
4143 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4145 ctxt->sax->error(ctxt->userData,
4146 "ATTLIST: no name for Element\n");
4147 ctxt->wellFormed = 0;
4148 ctxt->disableSAX = 1;
4149 return;
4150 }
4151 SKIP_BLANKS;
4152 GROW;
4153 while (RAW != '>') {
4154 const xmlChar *check = CUR_PTR;
4155 int type;
4156 int def;
4157 xmlChar *defaultValue = NULL;
4158
4159 GROW;
4160 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004161 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004162 if (attrName == NULL) {
4163 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4165 ctxt->sax->error(ctxt->userData,
4166 "ATTLIST: no name for Attribute\n");
4167 ctxt->wellFormed = 0;
4168 ctxt->disableSAX = 1;
4169 break;
4170 }
4171 GROW;
4172 if (!IS_BLANK(CUR)) {
4173 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4175 ctxt->sax->error(ctxt->userData,
4176 "Space required after the attribute name\n");
4177 ctxt->wellFormed = 0;
4178 ctxt->disableSAX = 1;
4179 if (attrName != NULL)
4180 xmlFree(attrName);
4181 if (defaultValue != NULL)
4182 xmlFree(defaultValue);
4183 break;
4184 }
4185 SKIP_BLANKS;
4186
4187 type = xmlParseAttributeType(ctxt, &tree);
4188 if (type <= 0) {
4189 if (attrName != NULL)
4190 xmlFree(attrName);
4191 if (defaultValue != NULL)
4192 xmlFree(defaultValue);
4193 break;
4194 }
4195
4196 GROW;
4197 if (!IS_BLANK(CUR)) {
4198 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4200 ctxt->sax->error(ctxt->userData,
4201 "Space required after the attribute type\n");
4202 ctxt->wellFormed = 0;
4203 ctxt->disableSAX = 1;
4204 if (attrName != NULL)
4205 xmlFree(attrName);
4206 if (defaultValue != NULL)
4207 xmlFree(defaultValue);
4208 if (tree != NULL)
4209 xmlFreeEnumeration(tree);
4210 break;
4211 }
4212 SKIP_BLANKS;
4213
4214 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4215 if (def <= 0) {
4216 if (attrName != NULL)
4217 xmlFree(attrName);
4218 if (defaultValue != NULL)
4219 xmlFree(defaultValue);
4220 if (tree != NULL)
4221 xmlFreeEnumeration(tree);
4222 break;
4223 }
4224
4225 GROW;
4226 if (RAW != '>') {
4227 if (!IS_BLANK(CUR)) {
4228 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4230 ctxt->sax->error(ctxt->userData,
4231 "Space required after the attribute default value\n");
4232 ctxt->wellFormed = 0;
4233 ctxt->disableSAX = 1;
4234 if (attrName != NULL)
4235 xmlFree(attrName);
4236 if (defaultValue != NULL)
4237 xmlFree(defaultValue);
4238 if (tree != NULL)
4239 xmlFreeEnumeration(tree);
4240 break;
4241 }
4242 SKIP_BLANKS;
4243 }
4244 if (check == CUR_PTR) {
4245 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4247 ctxt->sax->error(ctxt->userData,
4248 "xmlParseAttributeListDecl: detected internal error\n");
4249 if (attrName != NULL)
4250 xmlFree(attrName);
4251 if (defaultValue != NULL)
4252 xmlFree(defaultValue);
4253 if (tree != NULL)
4254 xmlFreeEnumeration(tree);
4255 break;
4256 }
4257 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4258 (ctxt->sax->attributeDecl != NULL))
4259 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4260 type, def, defaultValue, tree);
4261 if (attrName != NULL)
4262 xmlFree(attrName);
4263 if (defaultValue != NULL)
4264 xmlFree(defaultValue);
4265 GROW;
4266 }
4267 if (RAW == '>') {
4268 if (input != ctxt->input) {
4269 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4271 ctxt->sax->error(ctxt->userData,
4272"Attribute list declaration doesn't start and stop in the same entity\n");
4273 ctxt->wellFormed = 0;
4274 ctxt->disableSAX = 1;
4275 }
4276 NEXT;
4277 }
4278
4279 xmlFree(elemName);
4280 }
4281}
4282
4283/**
4284 * xmlParseElementMixedContentDecl:
4285 * @ctxt: an XML parser context
4286 *
4287 * parse the declaration for a Mixed Element content
4288 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4289 *
4290 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4291 * '(' S? '#PCDATA' S? ')'
4292 *
4293 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4294 *
4295 * [ VC: No Duplicate Types ]
4296 * The same name must not appear more than once in a single
4297 * mixed-content declaration.
4298 *
4299 * returns: the list of the xmlElementContentPtr describing the element choices
4300 */
4301xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004302xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004303 xmlElementContentPtr ret = NULL, cur = NULL, n;
4304 xmlChar *elem = NULL;
4305
4306 GROW;
4307 if ((RAW == '#') && (NXT(1) == 'P') &&
4308 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4309 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4310 (NXT(6) == 'A')) {
4311 SKIP(7);
4312 SKIP_BLANKS;
4313 SHRINK;
4314 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004315 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4316 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4317 if (ctxt->vctxt.error != NULL)
4318 ctxt->vctxt.error(ctxt->vctxt.userData,
4319"Element content declaration doesn't start and stop in the same entity\n");
4320 ctxt->valid = 0;
4321 }
Owen Taylor3473f882001-02-23 17:55:21 +00004322 NEXT;
4323 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4324 if (RAW == '*') {
4325 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4326 NEXT;
4327 }
4328 return(ret);
4329 }
4330 if ((RAW == '(') || (RAW == '|')) {
4331 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4332 if (ret == NULL) return(NULL);
4333 }
4334 while (RAW == '|') {
4335 NEXT;
4336 if (elem == NULL) {
4337 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4338 if (ret == NULL) return(NULL);
4339 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004340 if (cur != NULL)
4341 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004342 cur = ret;
4343 } else {
4344 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4345 if (n == NULL) return(NULL);
4346 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004347 if (n->c1 != NULL)
4348 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004349 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004350 if (n != NULL)
4351 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004352 cur = n;
4353 xmlFree(elem);
4354 }
4355 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004356 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004357 if (elem == NULL) {
4358 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4360 ctxt->sax->error(ctxt->userData,
4361 "xmlParseElementMixedContentDecl : Name expected\n");
4362 ctxt->wellFormed = 0;
4363 ctxt->disableSAX = 1;
4364 xmlFreeElementContent(cur);
4365 return(NULL);
4366 }
4367 SKIP_BLANKS;
4368 GROW;
4369 }
4370 if ((RAW == ')') && (NXT(1) == '*')) {
4371 if (elem != NULL) {
4372 cur->c2 = xmlNewElementContent(elem,
4373 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004374 if (cur->c2 != NULL)
4375 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004376 xmlFree(elem);
4377 }
4378 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004379 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4380 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4381 if (ctxt->vctxt.error != NULL)
4382 ctxt->vctxt.error(ctxt->vctxt.userData,
4383"Element content declaration doesn't start and stop in the same entity\n");
4384 ctxt->valid = 0;
4385 }
Owen Taylor3473f882001-02-23 17:55:21 +00004386 SKIP(2);
4387 } else {
4388 if (elem != NULL) xmlFree(elem);
4389 xmlFreeElementContent(ret);
4390 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4392 ctxt->sax->error(ctxt->userData,
4393 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4394 ctxt->wellFormed = 0;
4395 ctxt->disableSAX = 1;
4396 return(NULL);
4397 }
4398
4399 } else {
4400 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4402 ctxt->sax->error(ctxt->userData,
4403 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4404 ctxt->wellFormed = 0;
4405 ctxt->disableSAX = 1;
4406 }
4407 return(ret);
4408}
4409
4410/**
4411 * xmlParseElementChildrenContentDecl:
4412 * @ctxt: an XML parser context
4413 *
4414 * parse the declaration for a Mixed Element content
4415 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4416 *
4417 *
4418 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4419 *
4420 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4421 *
4422 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4423 *
4424 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4425 *
4426 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4427 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004428 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004429 * opening or closing parentheses in a choice, seq, or Mixed
4430 * construct is contained in the replacement text for a parameter
4431 * entity, both must be contained in the same replacement text. For
4432 * interoperability, if a parameter-entity reference appears in a
4433 * choice, seq, or Mixed construct, its replacement text should not
4434 * be empty, and neither the first nor last non-blank character of
4435 * the replacement text should be a connector (| or ,).
4436 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004437 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004438 * hierarchy.
4439 */
4440xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004441xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004442(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004443 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4444 xmlChar *elem;
4445 xmlChar type = 0;
4446
4447 SKIP_BLANKS;
4448 GROW;
4449 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004450 xmlParserInputPtr input = ctxt->input;
4451
Owen Taylor3473f882001-02-23 17:55:21 +00004452 /* Recurse on first child */
4453 NEXT;
4454 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004455 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004456 SKIP_BLANKS;
4457 GROW;
4458 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004459 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004460 if (elem == NULL) {
4461 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4463 ctxt->sax->error(ctxt->userData,
4464 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4465 ctxt->wellFormed = 0;
4466 ctxt->disableSAX = 1;
4467 return(NULL);
4468 }
4469 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4470 GROW;
4471 if (RAW == '?') {
4472 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4473 NEXT;
4474 } else if (RAW == '*') {
4475 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4476 NEXT;
4477 } else if (RAW == '+') {
4478 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4479 NEXT;
4480 } else {
4481 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4482 }
4483 xmlFree(elem);
4484 GROW;
4485 }
4486 SKIP_BLANKS;
4487 SHRINK;
4488 while (RAW != ')') {
4489 /*
4490 * Each loop we parse one separator and one element.
4491 */
4492 if (RAW == ',') {
4493 if (type == 0) type = CUR;
4494
4495 /*
4496 * Detect "Name | Name , Name" error
4497 */
4498 else if (type != CUR) {
4499 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4501 ctxt->sax->error(ctxt->userData,
4502 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4503 type);
4504 ctxt->wellFormed = 0;
4505 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004506 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004507 xmlFreeElementContent(last);
4508 if (ret != NULL)
4509 xmlFreeElementContent(ret);
4510 return(NULL);
4511 }
4512 NEXT;
4513
4514 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4515 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004516 if ((last != NULL) && (last != ret))
4517 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004518 xmlFreeElementContent(ret);
4519 return(NULL);
4520 }
4521 if (last == NULL) {
4522 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004523 if (ret != NULL)
4524 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004525 ret = cur = op;
4526 } else {
4527 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004528 if (op != NULL)
4529 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004530 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004531 if (last != NULL)
4532 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004533 cur =op;
4534 last = NULL;
4535 }
4536 } else if (RAW == '|') {
4537 if (type == 0) type = CUR;
4538
4539 /*
4540 * Detect "Name , Name | Name" error
4541 */
4542 else if (type != CUR) {
4543 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4545 ctxt->sax->error(ctxt->userData,
4546 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4547 type);
4548 ctxt->wellFormed = 0;
4549 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004550 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004551 xmlFreeElementContent(last);
4552 if (ret != NULL)
4553 xmlFreeElementContent(ret);
4554 return(NULL);
4555 }
4556 NEXT;
4557
4558 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4559 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004560 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004561 xmlFreeElementContent(last);
4562 if (ret != NULL)
4563 xmlFreeElementContent(ret);
4564 return(NULL);
4565 }
4566 if (last == NULL) {
4567 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004568 if (ret != NULL)
4569 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004570 ret = cur = op;
4571 } else {
4572 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004573 if (op != NULL)
4574 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004575 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004576 if (last != NULL)
4577 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004578 cur =op;
4579 last = NULL;
4580 }
4581 } else {
4582 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4584 ctxt->sax->error(ctxt->userData,
4585 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4586 ctxt->wellFormed = 0;
4587 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004588 if (ret != NULL)
4589 xmlFreeElementContent(ret);
4590 return(NULL);
4591 }
4592 GROW;
4593 SKIP_BLANKS;
4594 GROW;
4595 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004596 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004597 /* Recurse on second child */
4598 NEXT;
4599 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004600 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004601 SKIP_BLANKS;
4602 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004603 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004604 if (elem == NULL) {
4605 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4607 ctxt->sax->error(ctxt->userData,
4608 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4609 ctxt->wellFormed = 0;
4610 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004611 if (ret != NULL)
4612 xmlFreeElementContent(ret);
4613 return(NULL);
4614 }
4615 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4616 xmlFree(elem);
4617 if (RAW == '?') {
4618 last->ocur = XML_ELEMENT_CONTENT_OPT;
4619 NEXT;
4620 } else if (RAW == '*') {
4621 last->ocur = XML_ELEMENT_CONTENT_MULT;
4622 NEXT;
4623 } else if (RAW == '+') {
4624 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4625 NEXT;
4626 } else {
4627 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4628 }
4629 }
4630 SKIP_BLANKS;
4631 GROW;
4632 }
4633 if ((cur != NULL) && (last != NULL)) {
4634 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004635 if (last != NULL)
4636 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004637 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004638 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4639 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4640 if (ctxt->vctxt.error != NULL)
4641 ctxt->vctxt.error(ctxt->vctxt.userData,
4642"Element content declaration doesn't start and stop in the same entity\n");
4643 ctxt->valid = 0;
4644 }
Owen Taylor3473f882001-02-23 17:55:21 +00004645 NEXT;
4646 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004647 if (ret != NULL)
4648 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004649 NEXT;
4650 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004651 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004652 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004653 cur = ret;
4654 /*
4655 * Some normalization:
4656 * (a | b* | c?)* == (a | b | c)*
4657 */
4658 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4659 if ((cur->c1 != NULL) &&
4660 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4661 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4662 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4663 if ((cur->c2 != NULL) &&
4664 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4665 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4666 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4667 cur = cur->c2;
4668 }
4669 }
Owen Taylor3473f882001-02-23 17:55:21 +00004670 NEXT;
4671 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004672 if (ret != NULL) {
4673 int found = 0;
4674
Daniel Veillarde470df72001-04-18 21:41:07 +00004675 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004676 /*
4677 * Some normalization:
4678 * (a | b*)+ == (a | b)*
4679 * (a | b?)+ == (a | b)*
4680 */
4681 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4682 if ((cur->c1 != NULL) &&
4683 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4684 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4685 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4686 found = 1;
4687 }
4688 if ((cur->c2 != NULL) &&
4689 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4690 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4691 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4692 found = 1;
4693 }
4694 cur = cur->c2;
4695 }
4696 if (found)
4697 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4698 }
Owen Taylor3473f882001-02-23 17:55:21 +00004699 NEXT;
4700 }
4701 return(ret);
4702}
4703
4704/**
4705 * xmlParseElementContentDecl:
4706 * @ctxt: an XML parser context
4707 * @name: the name of the element being defined.
4708 * @result: the Element Content pointer will be stored here if any
4709 *
4710 * parse the declaration for an Element content either Mixed or Children,
4711 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4712 *
4713 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4714 *
4715 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4716 */
4717
4718int
4719xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4720 xmlElementContentPtr *result) {
4721
4722 xmlElementContentPtr tree = NULL;
4723 xmlParserInputPtr input = ctxt->input;
4724 int res;
4725
4726 *result = NULL;
4727
4728 if (RAW != '(') {
4729 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4731 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004732 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004733 ctxt->wellFormed = 0;
4734 ctxt->disableSAX = 1;
4735 return(-1);
4736 }
4737 NEXT;
4738 GROW;
4739 SKIP_BLANKS;
4740 if ((RAW == '#') && (NXT(1) == 'P') &&
4741 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4742 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4743 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004744 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004745 res = XML_ELEMENT_TYPE_MIXED;
4746 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004747 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004748 res = XML_ELEMENT_TYPE_ELEMENT;
4749 }
Owen Taylor3473f882001-02-23 17:55:21 +00004750 SKIP_BLANKS;
4751 *result = tree;
4752 return(res);
4753}
4754
4755/**
4756 * xmlParseElementDecl:
4757 * @ctxt: an XML parser context
4758 *
4759 * parse an Element declaration.
4760 *
4761 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4762 *
4763 * [ VC: Unique Element Type Declaration ]
4764 * No element type may be declared more than once
4765 *
4766 * Returns the type of the element, or -1 in case of error
4767 */
4768int
4769xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4770 xmlChar *name;
4771 int ret = -1;
4772 xmlElementContentPtr content = NULL;
4773
4774 GROW;
4775 if ((RAW == '<') && (NXT(1) == '!') &&
4776 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4777 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4778 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4779 (NXT(8) == 'T')) {
4780 xmlParserInputPtr input = ctxt->input;
4781
4782 SKIP(9);
4783 if (!IS_BLANK(CUR)) {
4784 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4786 ctxt->sax->error(ctxt->userData,
4787 "Space required after 'ELEMENT'\n");
4788 ctxt->wellFormed = 0;
4789 ctxt->disableSAX = 1;
4790 }
4791 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004792 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004793 if (name == NULL) {
4794 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4796 ctxt->sax->error(ctxt->userData,
4797 "xmlParseElementDecl: no name for Element\n");
4798 ctxt->wellFormed = 0;
4799 ctxt->disableSAX = 1;
4800 return(-1);
4801 }
4802 while ((RAW == 0) && (ctxt->inputNr > 1))
4803 xmlPopInput(ctxt);
4804 if (!IS_BLANK(CUR)) {
4805 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4807 ctxt->sax->error(ctxt->userData,
4808 "Space required after the element name\n");
4809 ctxt->wellFormed = 0;
4810 ctxt->disableSAX = 1;
4811 }
4812 SKIP_BLANKS;
4813 if ((RAW == 'E') && (NXT(1) == 'M') &&
4814 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4815 (NXT(4) == 'Y')) {
4816 SKIP(5);
4817 /*
4818 * Element must always be empty.
4819 */
4820 ret = XML_ELEMENT_TYPE_EMPTY;
4821 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4822 (NXT(2) == 'Y')) {
4823 SKIP(3);
4824 /*
4825 * Element is a generic container.
4826 */
4827 ret = XML_ELEMENT_TYPE_ANY;
4828 } else if (RAW == '(') {
4829 ret = xmlParseElementContentDecl(ctxt, name, &content);
4830 } else {
4831 /*
4832 * [ WFC: PEs in Internal Subset ] error handling.
4833 */
4834 if ((RAW == '%') && (ctxt->external == 0) &&
4835 (ctxt->inputNr == 1)) {
4836 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4838 ctxt->sax->error(ctxt->userData,
4839 "PEReference: forbidden within markup decl in internal subset\n");
4840 } else {
4841 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4843 ctxt->sax->error(ctxt->userData,
4844 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4845 }
4846 ctxt->wellFormed = 0;
4847 ctxt->disableSAX = 1;
4848 if (name != NULL) xmlFree(name);
4849 return(-1);
4850 }
4851
4852 SKIP_BLANKS;
4853 /*
4854 * Pop-up of finished entities.
4855 */
4856 while ((RAW == 0) && (ctxt->inputNr > 1))
4857 xmlPopInput(ctxt);
4858 SKIP_BLANKS;
4859
4860 if (RAW != '>') {
4861 ctxt->errNo = XML_ERR_GT_REQUIRED;
4862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4863 ctxt->sax->error(ctxt->userData,
4864 "xmlParseElementDecl: expected '>' at the end\n");
4865 ctxt->wellFormed = 0;
4866 ctxt->disableSAX = 1;
4867 } else {
4868 if (input != ctxt->input) {
4869 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4871 ctxt->sax->error(ctxt->userData,
4872"Element declaration doesn't start and stop in the same entity\n");
4873 ctxt->wellFormed = 0;
4874 ctxt->disableSAX = 1;
4875 }
4876
4877 NEXT;
4878 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4879 (ctxt->sax->elementDecl != NULL))
4880 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4881 content);
4882 }
4883 if (content != NULL) {
4884 xmlFreeElementContent(content);
4885 }
4886 if (name != NULL) {
4887 xmlFree(name);
4888 }
4889 }
4890 return(ret);
4891}
4892
4893/**
Owen Taylor3473f882001-02-23 17:55:21 +00004894 * xmlParseConditionalSections
4895 * @ctxt: an XML parser context
4896 *
4897 * [61] conditionalSect ::= includeSect | ignoreSect
4898 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4899 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4900 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4901 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4902 */
4903
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004904static void
Owen Taylor3473f882001-02-23 17:55:21 +00004905xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4906 SKIP(3);
4907 SKIP_BLANKS;
4908 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4909 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4910 (NXT(6) == 'E')) {
4911 SKIP(7);
4912 SKIP_BLANKS;
4913 if (RAW != '[') {
4914 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4916 ctxt->sax->error(ctxt->userData,
4917 "XML conditional section '[' expected\n");
4918 ctxt->wellFormed = 0;
4919 ctxt->disableSAX = 1;
4920 } else {
4921 NEXT;
4922 }
4923 if (xmlParserDebugEntities) {
4924 if ((ctxt->input != NULL) && (ctxt->input->filename))
4925 xmlGenericError(xmlGenericErrorContext,
4926 "%s(%d): ", ctxt->input->filename,
4927 ctxt->input->line);
4928 xmlGenericError(xmlGenericErrorContext,
4929 "Entering INCLUDE Conditional Section\n");
4930 }
4931
4932 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4933 (NXT(2) != '>'))) {
4934 const xmlChar *check = CUR_PTR;
4935 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00004936
4937 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4938 xmlParseConditionalSections(ctxt);
4939 } else if (IS_BLANK(CUR)) {
4940 NEXT;
4941 } else if (RAW == '%') {
4942 xmlParsePEReference(ctxt);
4943 } else
4944 xmlParseMarkupDecl(ctxt);
4945
4946 /*
4947 * Pop-up of finished entities.
4948 */
4949 while ((RAW == 0) && (ctxt->inputNr > 1))
4950 xmlPopInput(ctxt);
4951
Daniel Veillardfdc91562002-07-01 21:52:03 +00004952 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004953 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4955 ctxt->sax->error(ctxt->userData,
4956 "Content error in the external subset\n");
4957 ctxt->wellFormed = 0;
4958 ctxt->disableSAX = 1;
4959 break;
4960 }
4961 }
4962 if (xmlParserDebugEntities) {
4963 if ((ctxt->input != NULL) && (ctxt->input->filename))
4964 xmlGenericError(xmlGenericErrorContext,
4965 "%s(%d): ", ctxt->input->filename,
4966 ctxt->input->line);
4967 xmlGenericError(xmlGenericErrorContext,
4968 "Leaving INCLUDE Conditional Section\n");
4969 }
4970
4971 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4972 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4973 int state;
4974 int instate;
4975 int depth = 0;
4976
4977 SKIP(6);
4978 SKIP_BLANKS;
4979 if (RAW != '[') {
4980 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4981 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4982 ctxt->sax->error(ctxt->userData,
4983 "XML conditional section '[' expected\n");
4984 ctxt->wellFormed = 0;
4985 ctxt->disableSAX = 1;
4986 } else {
4987 NEXT;
4988 }
4989 if (xmlParserDebugEntities) {
4990 if ((ctxt->input != NULL) && (ctxt->input->filename))
4991 xmlGenericError(xmlGenericErrorContext,
4992 "%s(%d): ", ctxt->input->filename,
4993 ctxt->input->line);
4994 xmlGenericError(xmlGenericErrorContext,
4995 "Entering IGNORE Conditional Section\n");
4996 }
4997
4998 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004999 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005000 * But disable SAX event generating DTD building in the meantime
5001 */
5002 state = ctxt->disableSAX;
5003 instate = ctxt->instate;
5004 ctxt->disableSAX = 1;
5005 ctxt->instate = XML_PARSER_IGNORE;
5006
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005007 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005008 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5009 depth++;
5010 SKIP(3);
5011 continue;
5012 }
5013 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5014 if (--depth >= 0) SKIP(3);
5015 continue;
5016 }
5017 NEXT;
5018 continue;
5019 }
5020
5021 ctxt->disableSAX = state;
5022 ctxt->instate = instate;
5023
5024 if (xmlParserDebugEntities) {
5025 if ((ctxt->input != NULL) && (ctxt->input->filename))
5026 xmlGenericError(xmlGenericErrorContext,
5027 "%s(%d): ", ctxt->input->filename,
5028 ctxt->input->line);
5029 xmlGenericError(xmlGenericErrorContext,
5030 "Leaving IGNORE Conditional Section\n");
5031 }
5032
5033 } else {
5034 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5036 ctxt->sax->error(ctxt->userData,
5037 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5038 ctxt->wellFormed = 0;
5039 ctxt->disableSAX = 1;
5040 }
5041
5042 if (RAW == 0)
5043 SHRINK;
5044
5045 if (RAW == 0) {
5046 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5048 ctxt->sax->error(ctxt->userData,
5049 "XML conditional section not closed\n");
5050 ctxt->wellFormed = 0;
5051 ctxt->disableSAX = 1;
5052 } else {
5053 SKIP(3);
5054 }
5055}
5056
5057/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005058 * xmlParseMarkupDecl:
5059 * @ctxt: an XML parser context
5060 *
5061 * parse Markup declarations
5062 *
5063 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5064 * NotationDecl | PI | Comment
5065 *
5066 * [ VC: Proper Declaration/PE Nesting ]
5067 * Parameter-entity replacement text must be properly nested with
5068 * markup declarations. That is to say, if either the first character
5069 * or the last character of a markup declaration (markupdecl above) is
5070 * contained in the replacement text for a parameter-entity reference,
5071 * both must be contained in the same replacement text.
5072 *
5073 * [ WFC: PEs in Internal Subset ]
5074 * In the internal DTD subset, parameter-entity references can occur
5075 * only where markup declarations can occur, not within markup declarations.
5076 * (This does not apply to references that occur in external parameter
5077 * entities or to the external subset.)
5078 */
5079void
5080xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5081 GROW;
5082 xmlParseElementDecl(ctxt);
5083 xmlParseAttributeListDecl(ctxt);
5084 xmlParseEntityDecl(ctxt);
5085 xmlParseNotationDecl(ctxt);
5086 xmlParsePI(ctxt);
5087 xmlParseComment(ctxt);
5088 /*
5089 * This is only for internal subset. On external entities,
5090 * the replacement is done before parsing stage
5091 */
5092 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5093 xmlParsePEReference(ctxt);
5094
5095 /*
5096 * Conditional sections are allowed from entities included
5097 * by PE References in the internal subset.
5098 */
5099 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5100 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5101 xmlParseConditionalSections(ctxt);
5102 }
5103 }
5104
5105 ctxt->instate = XML_PARSER_DTD;
5106}
5107
5108/**
5109 * xmlParseTextDecl:
5110 * @ctxt: an XML parser context
5111 *
5112 * parse an XML declaration header for external entities
5113 *
5114 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5115 *
5116 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5117 */
5118
5119void
5120xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5121 xmlChar *version;
5122
5123 /*
5124 * We know that '<?xml' is here.
5125 */
5126 if ((RAW == '<') && (NXT(1) == '?') &&
5127 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5128 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5129 SKIP(5);
5130 } else {
5131 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5133 ctxt->sax->error(ctxt->userData,
5134 "Text declaration '<?xml' required\n");
5135 ctxt->wellFormed = 0;
5136 ctxt->disableSAX = 1;
5137
5138 return;
5139 }
5140
5141 if (!IS_BLANK(CUR)) {
5142 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5144 ctxt->sax->error(ctxt->userData,
5145 "Space needed after '<?xml'\n");
5146 ctxt->wellFormed = 0;
5147 ctxt->disableSAX = 1;
5148 }
5149 SKIP_BLANKS;
5150
5151 /*
5152 * We may have the VersionInfo here.
5153 */
5154 version = xmlParseVersionInfo(ctxt);
5155 if (version == NULL)
5156 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005157 else {
5158 if (!IS_BLANK(CUR)) {
5159 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5161 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5162 ctxt->wellFormed = 0;
5163 ctxt->disableSAX = 1;
5164 }
5165 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005166 ctxt->input->version = version;
5167
5168 /*
5169 * We must have the encoding declaration
5170 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005171 xmlParseEncodingDecl(ctxt);
5172 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5173 /*
5174 * The XML REC instructs us to stop parsing right here
5175 */
5176 return;
5177 }
5178
5179 SKIP_BLANKS;
5180 if ((RAW == '?') && (NXT(1) == '>')) {
5181 SKIP(2);
5182 } else if (RAW == '>') {
5183 /* Deprecated old WD ... */
5184 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5186 ctxt->sax->error(ctxt->userData,
5187 "XML declaration must end-up with '?>'\n");
5188 ctxt->wellFormed = 0;
5189 ctxt->disableSAX = 1;
5190 NEXT;
5191 } else {
5192 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5194 ctxt->sax->error(ctxt->userData,
5195 "parsing XML declaration: '?>' expected\n");
5196 ctxt->wellFormed = 0;
5197 ctxt->disableSAX = 1;
5198 MOVETO_ENDTAG(CUR_PTR);
5199 NEXT;
5200 }
5201}
5202
5203/**
Owen Taylor3473f882001-02-23 17:55:21 +00005204 * xmlParseExternalSubset:
5205 * @ctxt: an XML parser context
5206 * @ExternalID: the external identifier
5207 * @SystemID: the system identifier (or URL)
5208 *
5209 * parse Markup declarations from an external subset
5210 *
5211 * [30] extSubset ::= textDecl? extSubsetDecl
5212 *
5213 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5214 */
5215void
5216xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5217 const xmlChar *SystemID) {
5218 GROW;
5219 if ((RAW == '<') && (NXT(1) == '?') &&
5220 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5221 (NXT(4) == 'l')) {
5222 xmlParseTextDecl(ctxt);
5223 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5224 /*
5225 * The XML REC instructs us to stop parsing right here
5226 */
5227 ctxt->instate = XML_PARSER_EOF;
5228 return;
5229 }
5230 }
5231 if (ctxt->myDoc == NULL) {
5232 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5233 }
5234 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5235 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5236
5237 ctxt->instate = XML_PARSER_DTD;
5238 ctxt->external = 1;
5239 while (((RAW == '<') && (NXT(1) == '?')) ||
5240 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005241 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005242 const xmlChar *check = CUR_PTR;
5243 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005244
5245 GROW;
5246 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5247 xmlParseConditionalSections(ctxt);
5248 } else if (IS_BLANK(CUR)) {
5249 NEXT;
5250 } else if (RAW == '%') {
5251 xmlParsePEReference(ctxt);
5252 } else
5253 xmlParseMarkupDecl(ctxt);
5254
5255 /*
5256 * Pop-up of finished entities.
5257 */
5258 while ((RAW == 0) && (ctxt->inputNr > 1))
5259 xmlPopInput(ctxt);
5260
Daniel Veillardfdc91562002-07-01 21:52:03 +00005261 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005262 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5264 ctxt->sax->error(ctxt->userData,
5265 "Content error in the external subset\n");
5266 ctxt->wellFormed = 0;
5267 ctxt->disableSAX = 1;
5268 break;
5269 }
5270 }
5271
5272 if (RAW != 0) {
5273 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5275 ctxt->sax->error(ctxt->userData,
5276 "Extra content at the end of the document\n");
5277 ctxt->wellFormed = 0;
5278 ctxt->disableSAX = 1;
5279 }
5280
5281}
5282
5283/**
5284 * xmlParseReference:
5285 * @ctxt: an XML parser context
5286 *
5287 * parse and handle entity references in content, depending on the SAX
5288 * interface, this may end-up in a call to character() if this is a
5289 * CharRef, a predefined entity, if there is no reference() callback.
5290 * or if the parser was asked to switch to that mode.
5291 *
5292 * [67] Reference ::= EntityRef | CharRef
5293 */
5294void
5295xmlParseReference(xmlParserCtxtPtr ctxt) {
5296 xmlEntityPtr ent;
5297 xmlChar *val;
5298 if (RAW != '&') return;
5299
5300 if (NXT(1) == '#') {
5301 int i = 0;
5302 xmlChar out[10];
5303 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005304 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005305
5306 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5307 /*
5308 * So we are using non-UTF-8 buffers
5309 * Check that the char fit on 8bits, if not
5310 * generate a CharRef.
5311 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005312 if (value <= 0xFF) {
5313 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005314 out[1] = 0;
5315 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5316 (!ctxt->disableSAX))
5317 ctxt->sax->characters(ctxt->userData, out, 1);
5318 } else {
5319 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005320 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005321 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005322 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005323 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5324 (!ctxt->disableSAX))
5325 ctxt->sax->reference(ctxt->userData, out);
5326 }
5327 } else {
5328 /*
5329 * Just encode the value in UTF-8
5330 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005331 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005332 out[i] = 0;
5333 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5334 (!ctxt->disableSAX))
5335 ctxt->sax->characters(ctxt->userData, out, i);
5336 }
5337 } else {
5338 ent = xmlParseEntityRef(ctxt);
5339 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005340 if (!ctxt->wellFormed)
5341 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005342 if ((ent->name != NULL) &&
5343 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5344 xmlNodePtr list = NULL;
5345 int ret;
5346
5347
5348 /*
5349 * The first reference to the entity trigger a parsing phase
5350 * where the ent->children is filled with the result from
5351 * the parsing.
5352 */
5353 if (ent->children == NULL) {
5354 xmlChar *value;
5355 value = ent->content;
5356
5357 /*
5358 * Check that this entity is well formed
5359 */
5360 if ((value != NULL) &&
5361 (value[1] == 0) && (value[0] == '<') &&
5362 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5363 /*
5364 * DONE: get definite answer on this !!!
5365 * Lots of entity decls are used to declare a single
5366 * char
5367 * <!ENTITY lt "<">
5368 * Which seems to be valid since
5369 * 2.4: The ampersand character (&) and the left angle
5370 * bracket (<) may appear in their literal form only
5371 * when used ... They are also legal within the literal
5372 * entity value of an internal entity declaration;i
5373 * see "4.3.2 Well-Formed Parsed Entities".
5374 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5375 * Looking at the OASIS test suite and James Clark
5376 * tests, this is broken. However the XML REC uses
5377 * it. Is the XML REC not well-formed ????
5378 * This is a hack to avoid this problem
5379 *
5380 * ANSWER: since lt gt amp .. are already defined,
5381 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005382 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005383 * is lousy but acceptable.
5384 */
5385 list = xmlNewDocText(ctxt->myDoc, value);
5386 if (list != NULL) {
5387 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5388 (ent->children == NULL)) {
5389 ent->children = list;
5390 ent->last = list;
5391 list->parent = (xmlNodePtr) ent;
5392 } else {
5393 xmlFreeNodeList(list);
5394 }
5395 } else if (list != NULL) {
5396 xmlFreeNodeList(list);
5397 }
5398 } else {
5399 /*
5400 * 4.3.2: An internal general parsed entity is well-formed
5401 * if its replacement text matches the production labeled
5402 * content.
5403 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005404
5405 void *user_data;
5406 /*
5407 * This is a bit hackish but this seems the best
5408 * way to make sure both SAX and DOM entity support
5409 * behaves okay.
5410 */
5411 if (ctxt->userData == ctxt)
5412 user_data = NULL;
5413 else
5414 user_data = ctxt->userData;
5415
Owen Taylor3473f882001-02-23 17:55:21 +00005416 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5417 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005418 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5419 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 ctxt->depth--;
5421 } else if (ent->etype ==
5422 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5423 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005424 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005425 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005426 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005427 ctxt->depth--;
5428 } else {
5429 ret = -1;
5430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5431 ctxt->sax->error(ctxt->userData,
5432 "Internal: invalid entity type\n");
5433 }
5434 if (ret == XML_ERR_ENTITY_LOOP) {
5435 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5437 ctxt->sax->error(ctxt->userData,
5438 "Detected entity reference loop\n");
5439 ctxt->wellFormed = 0;
5440 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005441 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005442 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005443 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5444 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005445 (ent->children == NULL)) {
5446 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005447 if (ctxt->replaceEntities) {
5448 /*
5449 * Prune it directly in the generated document
5450 * except for single text nodes.
5451 */
5452 if ((list->type == XML_TEXT_NODE) &&
5453 (list->next == NULL)) {
5454 list->parent = (xmlNodePtr) ent;
5455 list = NULL;
5456 } else {
5457 while (list != NULL) {
5458 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005459 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005460 if (list->next == NULL)
5461 ent->last = list;
5462 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005463 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005464 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005465 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5466 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005467 }
5468 } else {
5469 while (list != NULL) {
5470 list->parent = (xmlNodePtr) ent;
5471 if (list->next == NULL)
5472 ent->last = list;
5473 list = list->next;
5474 }
Owen Taylor3473f882001-02-23 17:55:21 +00005475 }
5476 } else {
5477 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005478 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005479 }
5480 } else if (ret > 0) {
5481 ctxt->errNo = ret;
5482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5483 ctxt->sax->error(ctxt->userData,
5484 "Entity value required\n");
5485 ctxt->wellFormed = 0;
5486 ctxt->disableSAX = 1;
5487 } else if (list != NULL) {
5488 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005489 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005490 }
5491 }
5492 }
5493 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5494 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5495 /*
5496 * Create a node.
5497 */
5498 ctxt->sax->reference(ctxt->userData, ent->name);
5499 return;
5500 } else if (ctxt->replaceEntities) {
5501 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5502 /*
5503 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005504 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005505 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005506 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005507 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005508 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005509 cur = ent->children;
5510 while (cur != NULL) {
5511 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005512 if (firstChild == NULL){
5513 firstChild = new;
5514 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005515 xmlAddChild(ctxt->node, new);
5516 if (cur == ent->last)
5517 break;
5518 cur = cur->next;
5519 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005520 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5521 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005522 } else {
5523 /*
5524 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005525 * node with a possible previous text one which
5526 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005527 */
5528 if (ent->children->type == XML_TEXT_NODE)
5529 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5530 if ((ent->last != ent->children) &&
5531 (ent->last->type == XML_TEXT_NODE))
5532 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5533 xmlAddChildList(ctxt->node, ent->children);
5534 }
5535
Owen Taylor3473f882001-02-23 17:55:21 +00005536 /*
5537 * This is to avoid a nasty side effect, see
5538 * characters() in SAX.c
5539 */
5540 ctxt->nodemem = 0;
5541 ctxt->nodelen = 0;
5542 return;
5543 } else {
5544 /*
5545 * Probably running in SAX mode
5546 */
5547 xmlParserInputPtr input;
5548
5549 input = xmlNewEntityInputStream(ctxt, ent);
5550 xmlPushInput(ctxt, input);
5551 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5552 (RAW == '<') && (NXT(1) == '?') &&
5553 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5554 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5555 xmlParseTextDecl(ctxt);
5556 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5557 /*
5558 * The XML REC instructs us to stop parsing right here
5559 */
5560 ctxt->instate = XML_PARSER_EOF;
5561 return;
5562 }
5563 if (input->standalone == 1) {
5564 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5566 ctxt->sax->error(ctxt->userData,
5567 "external parsed entities cannot be standalone\n");
5568 ctxt->wellFormed = 0;
5569 ctxt->disableSAX = 1;
5570 }
5571 }
5572 return;
5573 }
5574 }
5575 } else {
5576 val = ent->content;
5577 if (val == NULL) return;
5578 /*
5579 * inline the entity.
5580 */
5581 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5582 (!ctxt->disableSAX))
5583 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5584 }
5585 }
5586}
5587
5588/**
5589 * xmlParseEntityRef:
5590 * @ctxt: an XML parser context
5591 *
5592 * parse ENTITY references declarations
5593 *
5594 * [68] EntityRef ::= '&' Name ';'
5595 *
5596 * [ WFC: Entity Declared ]
5597 * In a document without any DTD, a document with only an internal DTD
5598 * subset which contains no parameter entity references, or a document
5599 * with "standalone='yes'", the Name given in the entity reference
5600 * must match that in an entity declaration, except that well-formed
5601 * documents need not declare any of the following entities: amp, lt,
5602 * gt, apos, quot. The declaration of a parameter entity must precede
5603 * any reference to it. Similarly, the declaration of a general entity
5604 * must precede any reference to it which appears in a default value in an
5605 * attribute-list declaration. Note that if entities are declared in the
5606 * external subset or in external parameter entities, a non-validating
5607 * processor is not obligated to read and process their declarations;
5608 * for such documents, the rule that an entity must be declared is a
5609 * well-formedness constraint only if standalone='yes'.
5610 *
5611 * [ WFC: Parsed Entity ]
5612 * An entity reference must not contain the name of an unparsed entity
5613 *
5614 * Returns the xmlEntityPtr if found, or NULL otherwise.
5615 */
5616xmlEntityPtr
5617xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5618 xmlChar *name;
5619 xmlEntityPtr ent = NULL;
5620
5621 GROW;
5622
5623 if (RAW == '&') {
5624 NEXT;
5625 name = xmlParseName(ctxt);
5626 if (name == NULL) {
5627 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5629 ctxt->sax->error(ctxt->userData,
5630 "xmlParseEntityRef: no name\n");
5631 ctxt->wellFormed = 0;
5632 ctxt->disableSAX = 1;
5633 } else {
5634 if (RAW == ';') {
5635 NEXT;
5636 /*
5637 * Ask first SAX for entity resolution, otherwise try the
5638 * predefined set.
5639 */
5640 if (ctxt->sax != NULL) {
5641 if (ctxt->sax->getEntity != NULL)
5642 ent = ctxt->sax->getEntity(ctxt->userData, name);
5643 if (ent == NULL)
5644 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005645 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5646 ent = getEntity(ctxt, name);
5647 }
Owen Taylor3473f882001-02-23 17:55:21 +00005648 }
5649 /*
5650 * [ WFC: Entity Declared ]
5651 * In a document without any DTD, a document with only an
5652 * internal DTD subset which contains no parameter entity
5653 * references, or a document with "standalone='yes'", the
5654 * Name given in the entity reference must match that in an
5655 * entity declaration, except that well-formed documents
5656 * need not declare any of the following entities: amp, lt,
5657 * gt, apos, quot.
5658 * The declaration of a parameter entity must precede any
5659 * reference to it.
5660 * Similarly, the declaration of a general entity must
5661 * precede any reference to it which appears in a default
5662 * value in an attribute-list declaration. Note that if
5663 * entities are declared in the external subset or in
5664 * external parameter entities, a non-validating processor
5665 * is not obligated to read and process their declarations;
5666 * for such documents, the rule that an entity must be
5667 * declared is a well-formedness constraint only if
5668 * standalone='yes'.
5669 */
5670 if (ent == NULL) {
5671 if ((ctxt->standalone == 1) ||
5672 ((ctxt->hasExternalSubset == 0) &&
5673 (ctxt->hasPErefs == 0))) {
5674 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5675 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5676 ctxt->sax->error(ctxt->userData,
5677 "Entity '%s' not defined\n", name);
5678 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005679 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005680 ctxt->disableSAX = 1;
5681 } else {
5682 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005684 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005685 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005686 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005687 }
5688 }
5689
5690 /*
5691 * [ WFC: Parsed Entity ]
5692 * An entity reference must not contain the name of an
5693 * unparsed entity
5694 */
5695 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5696 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5698 ctxt->sax->error(ctxt->userData,
5699 "Entity reference to unparsed entity %s\n", name);
5700 ctxt->wellFormed = 0;
5701 ctxt->disableSAX = 1;
5702 }
5703
5704 /*
5705 * [ WFC: No External Entity References ]
5706 * Attribute values cannot contain direct or indirect
5707 * entity references to external entities.
5708 */
5709 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5710 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5711 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5713 ctxt->sax->error(ctxt->userData,
5714 "Attribute references external entity '%s'\n", name);
5715 ctxt->wellFormed = 0;
5716 ctxt->disableSAX = 1;
5717 }
5718 /*
5719 * [ WFC: No < in Attribute Values ]
5720 * The replacement text of any entity referred to directly or
5721 * indirectly in an attribute value (other than "&lt;") must
5722 * not contain a <.
5723 */
5724 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5725 (ent != NULL) &&
5726 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5727 (ent->content != NULL) &&
5728 (xmlStrchr(ent->content, '<'))) {
5729 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5731 ctxt->sax->error(ctxt->userData,
5732 "'<' in entity '%s' is not allowed in attributes values\n", name);
5733 ctxt->wellFormed = 0;
5734 ctxt->disableSAX = 1;
5735 }
5736
5737 /*
5738 * Internal check, no parameter entities here ...
5739 */
5740 else {
5741 switch (ent->etype) {
5742 case XML_INTERNAL_PARAMETER_ENTITY:
5743 case XML_EXTERNAL_PARAMETER_ENTITY:
5744 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5746 ctxt->sax->error(ctxt->userData,
5747 "Attempt to reference the parameter entity '%s'\n", name);
5748 ctxt->wellFormed = 0;
5749 ctxt->disableSAX = 1;
5750 break;
5751 default:
5752 break;
5753 }
5754 }
5755
5756 /*
5757 * [ WFC: No Recursion ]
5758 * A parsed entity must not contain a recursive reference
5759 * to itself, either directly or indirectly.
5760 * Done somewhere else
5761 */
5762
5763 } else {
5764 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5766 ctxt->sax->error(ctxt->userData,
5767 "xmlParseEntityRef: expecting ';'\n");
5768 ctxt->wellFormed = 0;
5769 ctxt->disableSAX = 1;
5770 }
5771 xmlFree(name);
5772 }
5773 }
5774 return(ent);
5775}
5776
5777/**
5778 * xmlParseStringEntityRef:
5779 * @ctxt: an XML parser context
5780 * @str: a pointer to an index in the string
5781 *
5782 * parse ENTITY references declarations, but this version parses it from
5783 * a string value.
5784 *
5785 * [68] EntityRef ::= '&' Name ';'
5786 *
5787 * [ WFC: Entity Declared ]
5788 * In a document without any DTD, a document with only an internal DTD
5789 * subset which contains no parameter entity references, or a document
5790 * with "standalone='yes'", the Name given in the entity reference
5791 * must match that in an entity declaration, except that well-formed
5792 * documents need not declare any of the following entities: amp, lt,
5793 * gt, apos, quot. The declaration of a parameter entity must precede
5794 * any reference to it. Similarly, the declaration of a general entity
5795 * must precede any reference to it which appears in a default value in an
5796 * attribute-list declaration. Note that if entities are declared in the
5797 * external subset or in external parameter entities, a non-validating
5798 * processor is not obligated to read and process their declarations;
5799 * for such documents, the rule that an entity must be declared is a
5800 * well-formedness constraint only if standalone='yes'.
5801 *
5802 * [ WFC: Parsed Entity ]
5803 * An entity reference must not contain the name of an unparsed entity
5804 *
5805 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5806 * is updated to the current location in the string.
5807 */
5808xmlEntityPtr
5809xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5810 xmlChar *name;
5811 const xmlChar *ptr;
5812 xmlChar cur;
5813 xmlEntityPtr ent = NULL;
5814
5815 if ((str == NULL) || (*str == NULL))
5816 return(NULL);
5817 ptr = *str;
5818 cur = *ptr;
5819 if (cur == '&') {
5820 ptr++;
5821 cur = *ptr;
5822 name = xmlParseStringName(ctxt, &ptr);
5823 if (name == NULL) {
5824 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5826 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005827 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005828 ctxt->wellFormed = 0;
5829 ctxt->disableSAX = 1;
5830 } else {
5831 if (*ptr == ';') {
5832 ptr++;
5833 /*
5834 * Ask first SAX for entity resolution, otherwise try the
5835 * predefined set.
5836 */
5837 if (ctxt->sax != NULL) {
5838 if (ctxt->sax->getEntity != NULL)
5839 ent = ctxt->sax->getEntity(ctxt->userData, name);
5840 if (ent == NULL)
5841 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005842 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5843 ent = getEntity(ctxt, name);
5844 }
Owen Taylor3473f882001-02-23 17:55:21 +00005845 }
5846 /*
5847 * [ WFC: Entity Declared ]
5848 * In a document without any DTD, a document with only an
5849 * internal DTD subset which contains no parameter entity
5850 * references, or a document with "standalone='yes'", the
5851 * Name given in the entity reference must match that in an
5852 * entity declaration, except that well-formed documents
5853 * need not declare any of the following entities: amp, lt,
5854 * gt, apos, quot.
5855 * The declaration of a parameter entity must precede any
5856 * reference to it.
5857 * Similarly, the declaration of a general entity must
5858 * precede any reference to it which appears in a default
5859 * value in an attribute-list declaration. Note that if
5860 * entities are declared in the external subset or in
5861 * external parameter entities, a non-validating processor
5862 * is not obligated to read and process their declarations;
5863 * for such documents, the rule that an entity must be
5864 * declared is a well-formedness constraint only if
5865 * standalone='yes'.
5866 */
5867 if (ent == NULL) {
5868 if ((ctxt->standalone == 1) ||
5869 ((ctxt->hasExternalSubset == 0) &&
5870 (ctxt->hasPErefs == 0))) {
5871 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5873 ctxt->sax->error(ctxt->userData,
5874 "Entity '%s' not defined\n", name);
5875 ctxt->wellFormed = 0;
5876 ctxt->disableSAX = 1;
5877 } else {
5878 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5879 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5880 ctxt->sax->warning(ctxt->userData,
5881 "Entity '%s' not defined\n", name);
5882 }
5883 }
5884
5885 /*
5886 * [ WFC: Parsed Entity ]
5887 * An entity reference must not contain the name of an
5888 * unparsed entity
5889 */
5890 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5891 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5893 ctxt->sax->error(ctxt->userData,
5894 "Entity reference to unparsed entity %s\n", name);
5895 ctxt->wellFormed = 0;
5896 ctxt->disableSAX = 1;
5897 }
5898
5899 /*
5900 * [ WFC: No External Entity References ]
5901 * Attribute values cannot contain direct or indirect
5902 * entity references to external entities.
5903 */
5904 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5905 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5906 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5908 ctxt->sax->error(ctxt->userData,
5909 "Attribute references external entity '%s'\n", name);
5910 ctxt->wellFormed = 0;
5911 ctxt->disableSAX = 1;
5912 }
5913 /*
5914 * [ WFC: No < in Attribute Values ]
5915 * The replacement text of any entity referred to directly or
5916 * indirectly in an attribute value (other than "&lt;") must
5917 * not contain a <.
5918 */
5919 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5920 (ent != NULL) &&
5921 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5922 (ent->content != NULL) &&
5923 (xmlStrchr(ent->content, '<'))) {
5924 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5926 ctxt->sax->error(ctxt->userData,
5927 "'<' in entity '%s' is not allowed in attributes values\n", name);
5928 ctxt->wellFormed = 0;
5929 ctxt->disableSAX = 1;
5930 }
5931
5932 /*
5933 * Internal check, no parameter entities here ...
5934 */
5935 else {
5936 switch (ent->etype) {
5937 case XML_INTERNAL_PARAMETER_ENTITY:
5938 case XML_EXTERNAL_PARAMETER_ENTITY:
5939 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5941 ctxt->sax->error(ctxt->userData,
5942 "Attempt to reference the parameter entity '%s'\n", name);
5943 ctxt->wellFormed = 0;
5944 ctxt->disableSAX = 1;
5945 break;
5946 default:
5947 break;
5948 }
5949 }
5950
5951 /*
5952 * [ WFC: No Recursion ]
5953 * A parsed entity must not contain a recursive reference
5954 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005955 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005956 */
5957
5958 } else {
5959 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5961 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005962 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005963 ctxt->wellFormed = 0;
5964 ctxt->disableSAX = 1;
5965 }
5966 xmlFree(name);
5967 }
5968 }
5969 *str = ptr;
5970 return(ent);
5971}
5972
5973/**
5974 * xmlParsePEReference:
5975 * @ctxt: an XML parser context
5976 *
5977 * parse PEReference declarations
5978 * The entity content is handled directly by pushing it's content as
5979 * a new input stream.
5980 *
5981 * [69] PEReference ::= '%' Name ';'
5982 *
5983 * [ WFC: No Recursion ]
5984 * A parsed entity must not contain a recursive
5985 * reference to itself, either directly or indirectly.
5986 *
5987 * [ WFC: Entity Declared ]
5988 * In a document without any DTD, a document with only an internal DTD
5989 * subset which contains no parameter entity references, or a document
5990 * with "standalone='yes'", ... ... The declaration of a parameter
5991 * entity must precede any reference to it...
5992 *
5993 * [ VC: Entity Declared ]
5994 * In a document with an external subset or external parameter entities
5995 * with "standalone='no'", ... ... The declaration of a parameter entity
5996 * must precede any reference to it...
5997 *
5998 * [ WFC: In DTD ]
5999 * Parameter-entity references may only appear in the DTD.
6000 * NOTE: misleading but this is handled.
6001 */
6002void
6003xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6004 xmlChar *name;
6005 xmlEntityPtr entity = NULL;
6006 xmlParserInputPtr input;
6007
6008 if (RAW == '%') {
6009 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006010 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006011 if (name == NULL) {
6012 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6014 ctxt->sax->error(ctxt->userData,
6015 "xmlParsePEReference: no name\n");
6016 ctxt->wellFormed = 0;
6017 ctxt->disableSAX = 1;
6018 } else {
6019 if (RAW == ';') {
6020 NEXT;
6021 if ((ctxt->sax != NULL) &&
6022 (ctxt->sax->getParameterEntity != NULL))
6023 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6024 name);
6025 if (entity == NULL) {
6026 /*
6027 * [ WFC: Entity Declared ]
6028 * In a document without any DTD, a document with only an
6029 * internal DTD subset which contains no parameter entity
6030 * references, or a document with "standalone='yes'", ...
6031 * ... The declaration of a parameter entity must precede
6032 * any reference to it...
6033 */
6034 if ((ctxt->standalone == 1) ||
6035 ((ctxt->hasExternalSubset == 0) &&
6036 (ctxt->hasPErefs == 0))) {
6037 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6038 if ((!ctxt->disableSAX) &&
6039 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6040 ctxt->sax->error(ctxt->userData,
6041 "PEReference: %%%s; not found\n", name);
6042 ctxt->wellFormed = 0;
6043 ctxt->disableSAX = 1;
6044 } else {
6045 /*
6046 * [ VC: Entity Declared ]
6047 * In a document with an external subset or external
6048 * parameter entities with "standalone='no'", ...
6049 * ... The declaration of a parameter entity must precede
6050 * any reference to it...
6051 */
6052 if ((!ctxt->disableSAX) &&
6053 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6054 ctxt->sax->warning(ctxt->userData,
6055 "PEReference: %%%s; not found\n", name);
6056 ctxt->valid = 0;
6057 }
6058 } else {
6059 /*
6060 * Internal checking in case the entity quest barfed
6061 */
6062 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6063 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6064 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6065 ctxt->sax->warning(ctxt->userData,
6066 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006067 } else if (ctxt->input->free != deallocblankswrapper) {
6068 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6069 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006070 } else {
6071 /*
6072 * TODO !!!
6073 * handle the extra spaces added before and after
6074 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6075 */
6076 input = xmlNewEntityInputStream(ctxt, entity);
6077 xmlPushInput(ctxt, input);
6078 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6079 (RAW == '<') && (NXT(1) == '?') &&
6080 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6081 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6082 xmlParseTextDecl(ctxt);
6083 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6084 /*
6085 * The XML REC instructs us to stop parsing
6086 * right here
6087 */
6088 ctxt->instate = XML_PARSER_EOF;
6089 xmlFree(name);
6090 return;
6091 }
6092 }
Owen Taylor3473f882001-02-23 17:55:21 +00006093 }
6094 }
6095 ctxt->hasPErefs = 1;
6096 } else {
6097 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6099 ctxt->sax->error(ctxt->userData,
6100 "xmlParsePEReference: expecting ';'\n");
6101 ctxt->wellFormed = 0;
6102 ctxt->disableSAX = 1;
6103 }
6104 xmlFree(name);
6105 }
6106 }
6107}
6108
6109/**
6110 * xmlParseStringPEReference:
6111 * @ctxt: an XML parser context
6112 * @str: a pointer to an index in the string
6113 *
6114 * parse PEReference declarations
6115 *
6116 * [69] PEReference ::= '%' Name ';'
6117 *
6118 * [ WFC: No Recursion ]
6119 * A parsed entity must not contain a recursive
6120 * reference to itself, either directly or indirectly.
6121 *
6122 * [ WFC: Entity Declared ]
6123 * In a document without any DTD, a document with only an internal DTD
6124 * subset which contains no parameter entity references, or a document
6125 * with "standalone='yes'", ... ... The declaration of a parameter
6126 * entity must precede any reference to it...
6127 *
6128 * [ VC: Entity Declared ]
6129 * In a document with an external subset or external parameter entities
6130 * with "standalone='no'", ... ... The declaration of a parameter entity
6131 * must precede any reference to it...
6132 *
6133 * [ WFC: In DTD ]
6134 * Parameter-entity references may only appear in the DTD.
6135 * NOTE: misleading but this is handled.
6136 *
6137 * Returns the string of the entity content.
6138 * str is updated to the current value of the index
6139 */
6140xmlEntityPtr
6141xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6142 const xmlChar *ptr;
6143 xmlChar cur;
6144 xmlChar *name;
6145 xmlEntityPtr entity = NULL;
6146
6147 if ((str == NULL) || (*str == NULL)) return(NULL);
6148 ptr = *str;
6149 cur = *ptr;
6150 if (cur == '%') {
6151 ptr++;
6152 cur = *ptr;
6153 name = xmlParseStringName(ctxt, &ptr);
6154 if (name == NULL) {
6155 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6157 ctxt->sax->error(ctxt->userData,
6158 "xmlParseStringPEReference: no name\n");
6159 ctxt->wellFormed = 0;
6160 ctxt->disableSAX = 1;
6161 } else {
6162 cur = *ptr;
6163 if (cur == ';') {
6164 ptr++;
6165 cur = *ptr;
6166 if ((ctxt->sax != NULL) &&
6167 (ctxt->sax->getParameterEntity != NULL))
6168 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6169 name);
6170 if (entity == NULL) {
6171 /*
6172 * [ WFC: Entity Declared ]
6173 * In a document without any DTD, a document with only an
6174 * internal DTD subset which contains no parameter entity
6175 * references, or a document with "standalone='yes'", ...
6176 * ... The declaration of a parameter entity must precede
6177 * any reference to it...
6178 */
6179 if ((ctxt->standalone == 1) ||
6180 ((ctxt->hasExternalSubset == 0) &&
6181 (ctxt->hasPErefs == 0))) {
6182 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6184 ctxt->sax->error(ctxt->userData,
6185 "PEReference: %%%s; not found\n", name);
6186 ctxt->wellFormed = 0;
6187 ctxt->disableSAX = 1;
6188 } else {
6189 /*
6190 * [ VC: Entity Declared ]
6191 * In a document with an external subset or external
6192 * parameter entities with "standalone='no'", ...
6193 * ... The declaration of a parameter entity must
6194 * precede any reference to it...
6195 */
6196 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6197 ctxt->sax->warning(ctxt->userData,
6198 "PEReference: %%%s; not found\n", name);
6199 ctxt->valid = 0;
6200 }
6201 } else {
6202 /*
6203 * Internal checking in case the entity quest barfed
6204 */
6205 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6206 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6207 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6208 ctxt->sax->warning(ctxt->userData,
6209 "Internal: %%%s; is not a parameter entity\n", name);
6210 }
6211 }
6212 ctxt->hasPErefs = 1;
6213 } else {
6214 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6216 ctxt->sax->error(ctxt->userData,
6217 "xmlParseStringPEReference: expecting ';'\n");
6218 ctxt->wellFormed = 0;
6219 ctxt->disableSAX = 1;
6220 }
6221 xmlFree(name);
6222 }
6223 }
6224 *str = ptr;
6225 return(entity);
6226}
6227
6228/**
6229 * xmlParseDocTypeDecl:
6230 * @ctxt: an XML parser context
6231 *
6232 * parse a DOCTYPE declaration
6233 *
6234 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6235 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6236 *
6237 * [ VC: Root Element Type ]
6238 * The Name in the document type declaration must match the element
6239 * type of the root element.
6240 */
6241
6242void
6243xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6244 xmlChar *name = NULL;
6245 xmlChar *ExternalID = NULL;
6246 xmlChar *URI = NULL;
6247
6248 /*
6249 * We know that '<!DOCTYPE' has been detected.
6250 */
6251 SKIP(9);
6252
6253 SKIP_BLANKS;
6254
6255 /*
6256 * Parse the DOCTYPE name.
6257 */
6258 name = xmlParseName(ctxt);
6259 if (name == NULL) {
6260 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6262 ctxt->sax->error(ctxt->userData,
6263 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6264 ctxt->wellFormed = 0;
6265 ctxt->disableSAX = 1;
6266 }
6267 ctxt->intSubName = name;
6268
6269 SKIP_BLANKS;
6270
6271 /*
6272 * Check for SystemID and ExternalID
6273 */
6274 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6275
6276 if ((URI != NULL) || (ExternalID != NULL)) {
6277 ctxt->hasExternalSubset = 1;
6278 }
6279 ctxt->extSubURI = URI;
6280 ctxt->extSubSystem = ExternalID;
6281
6282 SKIP_BLANKS;
6283
6284 /*
6285 * Create and update the internal subset.
6286 */
6287 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6288 (!ctxt->disableSAX))
6289 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6290
6291 /*
6292 * Is there any internal subset declarations ?
6293 * they are handled separately in xmlParseInternalSubset()
6294 */
6295 if (RAW == '[')
6296 return;
6297
6298 /*
6299 * We should be at the end of the DOCTYPE declaration.
6300 */
6301 if (RAW != '>') {
6302 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006304 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006305 ctxt->wellFormed = 0;
6306 ctxt->disableSAX = 1;
6307 }
6308 NEXT;
6309}
6310
6311/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006312 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006313 * @ctxt: an XML parser context
6314 *
6315 * parse the internal subset declaration
6316 *
6317 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6318 */
6319
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006320static void
Owen Taylor3473f882001-02-23 17:55:21 +00006321xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6322 /*
6323 * Is there any DTD definition ?
6324 */
6325 if (RAW == '[') {
6326 ctxt->instate = XML_PARSER_DTD;
6327 NEXT;
6328 /*
6329 * Parse the succession of Markup declarations and
6330 * PEReferences.
6331 * Subsequence (markupdecl | PEReference | S)*
6332 */
6333 while (RAW != ']') {
6334 const xmlChar *check = CUR_PTR;
6335 int cons = ctxt->input->consumed;
6336
6337 SKIP_BLANKS;
6338 xmlParseMarkupDecl(ctxt);
6339 xmlParsePEReference(ctxt);
6340
6341 /*
6342 * Pop-up of finished entities.
6343 */
6344 while ((RAW == 0) && (ctxt->inputNr > 1))
6345 xmlPopInput(ctxt);
6346
6347 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6348 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6350 ctxt->sax->error(ctxt->userData,
6351 "xmlParseInternalSubset: error detected in Markup declaration\n");
6352 ctxt->wellFormed = 0;
6353 ctxt->disableSAX = 1;
6354 break;
6355 }
6356 }
6357 if (RAW == ']') {
6358 NEXT;
6359 SKIP_BLANKS;
6360 }
6361 }
6362
6363 /*
6364 * We should be at the end of the DOCTYPE declaration.
6365 */
6366 if (RAW != '>') {
6367 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006369 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006370 ctxt->wellFormed = 0;
6371 ctxt->disableSAX = 1;
6372 }
6373 NEXT;
6374}
6375
6376/**
6377 * xmlParseAttribute:
6378 * @ctxt: an XML parser context
6379 * @value: a xmlChar ** used to store the value of the attribute
6380 *
6381 * parse an attribute
6382 *
6383 * [41] Attribute ::= Name Eq AttValue
6384 *
6385 * [ WFC: No External Entity References ]
6386 * Attribute values cannot contain direct or indirect entity references
6387 * to external entities.
6388 *
6389 * [ WFC: No < in Attribute Values ]
6390 * The replacement text of any entity referred to directly or indirectly in
6391 * an attribute value (other than "&lt;") must not contain a <.
6392 *
6393 * [ VC: Attribute Value Type ]
6394 * The attribute must have been declared; the value must be of the type
6395 * declared for it.
6396 *
6397 * [25] Eq ::= S? '=' S?
6398 *
6399 * With namespace:
6400 *
6401 * [NS 11] Attribute ::= QName Eq AttValue
6402 *
6403 * Also the case QName == xmlns:??? is handled independently as a namespace
6404 * definition.
6405 *
6406 * Returns the attribute name, and the value in *value.
6407 */
6408
6409xmlChar *
6410xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6411 xmlChar *name, *val;
6412
6413 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006414 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006415 name = xmlParseName(ctxt);
6416 if (name == NULL) {
6417 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6418 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6419 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6420 ctxt->wellFormed = 0;
6421 ctxt->disableSAX = 1;
6422 return(NULL);
6423 }
6424
6425 /*
6426 * read the value
6427 */
6428 SKIP_BLANKS;
6429 if (RAW == '=') {
6430 NEXT;
6431 SKIP_BLANKS;
6432 val = xmlParseAttValue(ctxt);
6433 ctxt->instate = XML_PARSER_CONTENT;
6434 } else {
6435 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6437 ctxt->sax->error(ctxt->userData,
6438 "Specification mandate value for attribute %s\n", name);
6439 ctxt->wellFormed = 0;
6440 ctxt->disableSAX = 1;
6441 xmlFree(name);
6442 return(NULL);
6443 }
6444
6445 /*
6446 * Check that xml:lang conforms to the specification
6447 * No more registered as an error, just generate a warning now
6448 * since this was deprecated in XML second edition
6449 */
6450 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6451 if (!xmlCheckLanguageID(val)) {
6452 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6453 ctxt->sax->warning(ctxt->userData,
6454 "Malformed value for xml:lang : %s\n", val);
6455 }
6456 }
6457
6458 /*
6459 * Check that xml:space conforms to the specification
6460 */
6461 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6462 if (xmlStrEqual(val, BAD_CAST "default"))
6463 *(ctxt->space) = 0;
6464 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6465 *(ctxt->space) = 1;
6466 else {
6467 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6469 ctxt->sax->error(ctxt->userData,
6470"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6471 val);
6472 ctxt->wellFormed = 0;
6473 ctxt->disableSAX = 1;
6474 }
6475 }
6476
6477 *value = val;
6478 return(name);
6479}
6480
6481/**
6482 * xmlParseStartTag:
6483 * @ctxt: an XML parser context
6484 *
6485 * parse a start of tag either for rule element or
6486 * EmptyElement. In both case we don't parse the tag closing chars.
6487 *
6488 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6489 *
6490 * [ WFC: Unique Att Spec ]
6491 * No attribute name may appear more than once in the same start-tag or
6492 * empty-element tag.
6493 *
6494 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6495 *
6496 * [ WFC: Unique Att Spec ]
6497 * No attribute name may appear more than once in the same start-tag or
6498 * empty-element tag.
6499 *
6500 * With namespace:
6501 *
6502 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6503 *
6504 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6505 *
6506 * Returns the element name parsed
6507 */
6508
6509xmlChar *
6510xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6511 xmlChar *name;
6512 xmlChar *attname;
6513 xmlChar *attvalue;
6514 const xmlChar **atts = NULL;
6515 int nbatts = 0;
6516 int maxatts = 0;
6517 int i;
6518
6519 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006520 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006521
6522 name = xmlParseName(ctxt);
6523 if (name == NULL) {
6524 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6526 ctxt->sax->error(ctxt->userData,
6527 "xmlParseStartTag: invalid element name\n");
6528 ctxt->wellFormed = 0;
6529 ctxt->disableSAX = 1;
6530 return(NULL);
6531 }
6532
6533 /*
6534 * Now parse the attributes, it ends up with the ending
6535 *
6536 * (S Attribute)* S?
6537 */
6538 SKIP_BLANKS;
6539 GROW;
6540
Daniel Veillard21a0f912001-02-25 19:54:14 +00006541 while ((RAW != '>') &&
6542 ((RAW != '/') || (NXT(1) != '>')) &&
6543 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006544 const xmlChar *q = CUR_PTR;
6545 int cons = ctxt->input->consumed;
6546
6547 attname = xmlParseAttribute(ctxt, &attvalue);
6548 if ((attname != NULL) && (attvalue != NULL)) {
6549 /*
6550 * [ WFC: Unique Att Spec ]
6551 * No attribute name may appear more than once in the same
6552 * start-tag or empty-element tag.
6553 */
6554 for (i = 0; i < nbatts;i += 2) {
6555 if (xmlStrEqual(atts[i], attname)) {
6556 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6558 ctxt->sax->error(ctxt->userData,
6559 "Attribute %s redefined\n",
6560 attname);
6561 ctxt->wellFormed = 0;
6562 ctxt->disableSAX = 1;
6563 xmlFree(attname);
6564 xmlFree(attvalue);
6565 goto failed;
6566 }
6567 }
6568
6569 /*
6570 * Add the pair to atts
6571 */
6572 if (atts == NULL) {
6573 maxatts = 10;
6574 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6575 if (atts == NULL) {
6576 xmlGenericError(xmlGenericErrorContext,
6577 "malloc of %ld byte failed\n",
6578 maxatts * (long)sizeof(xmlChar *));
6579 return(NULL);
6580 }
6581 } else if (nbatts + 4 > maxatts) {
6582 maxatts *= 2;
6583 atts = (const xmlChar **) xmlRealloc((void *) atts,
6584 maxatts * sizeof(xmlChar *));
6585 if (atts == NULL) {
6586 xmlGenericError(xmlGenericErrorContext,
6587 "realloc of %ld byte failed\n",
6588 maxatts * (long)sizeof(xmlChar *));
6589 return(NULL);
6590 }
6591 }
6592 atts[nbatts++] = attname;
6593 atts[nbatts++] = attvalue;
6594 atts[nbatts] = NULL;
6595 atts[nbatts + 1] = NULL;
6596 } else {
6597 if (attname != NULL)
6598 xmlFree(attname);
6599 if (attvalue != NULL)
6600 xmlFree(attvalue);
6601 }
6602
6603failed:
6604
6605 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6606 break;
6607 if (!IS_BLANK(RAW)) {
6608 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6610 ctxt->sax->error(ctxt->userData,
6611 "attributes construct error\n");
6612 ctxt->wellFormed = 0;
6613 ctxt->disableSAX = 1;
6614 }
6615 SKIP_BLANKS;
6616 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6617 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6619 ctxt->sax->error(ctxt->userData,
6620 "xmlParseStartTag: problem parsing attributes\n");
6621 ctxt->wellFormed = 0;
6622 ctxt->disableSAX = 1;
6623 break;
6624 }
6625 GROW;
6626 }
6627
6628 /*
6629 * SAX: Start of Element !
6630 */
6631 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6632 (!ctxt->disableSAX))
6633 ctxt->sax->startElement(ctxt->userData, name, atts);
6634
6635 if (atts != NULL) {
6636 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6637 xmlFree((void *) atts);
6638 }
6639 return(name);
6640}
6641
6642/**
6643 * xmlParseEndTag:
6644 * @ctxt: an XML parser context
6645 *
6646 * parse an end of tag
6647 *
6648 * [42] ETag ::= '</' Name S? '>'
6649 *
6650 * With namespace
6651 *
6652 * [NS 9] ETag ::= '</' QName S? '>'
6653 */
6654
6655void
6656xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6657 xmlChar *name;
6658 xmlChar *oldname;
6659
6660 GROW;
6661 if ((RAW != '<') || (NXT(1) != '/')) {
6662 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6664 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6665 ctxt->wellFormed = 0;
6666 ctxt->disableSAX = 1;
6667 return;
6668 }
6669 SKIP(2);
6670
Daniel Veillard46de64e2002-05-29 08:21:33 +00006671 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006672
6673 /*
6674 * We should definitely be at the ending "S? '>'" part
6675 */
6676 GROW;
6677 SKIP_BLANKS;
6678 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6679 ctxt->errNo = XML_ERR_GT_REQUIRED;
6680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6681 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6682 ctxt->wellFormed = 0;
6683 ctxt->disableSAX = 1;
6684 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006685 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006686
6687 /*
6688 * [ WFC: Element Type Match ]
6689 * The Name in an element's end-tag must match the element type in the
6690 * start-tag.
6691 *
6692 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006693 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006694 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006696 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006697 ctxt->sax->error(ctxt->userData,
6698 "Opening and ending tag mismatch: %s and %s\n",
6699 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006700 xmlFree(name);
6701 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006702 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006703 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006704 }
6705
6706 }
6707 ctxt->wellFormed = 0;
6708 ctxt->disableSAX = 1;
6709 }
6710
6711 /*
6712 * SAX: End of Tag
6713 */
6714 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6715 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006716 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006717
Owen Taylor3473f882001-02-23 17:55:21 +00006718 oldname = namePop(ctxt);
6719 spacePop(ctxt);
6720 if (oldname != NULL) {
6721#ifdef DEBUG_STACK
6722 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6723#endif
6724 xmlFree(oldname);
6725 }
6726 return;
6727}
6728
6729/**
6730 * xmlParseCDSect:
6731 * @ctxt: an XML parser context
6732 *
6733 * Parse escaped pure raw content.
6734 *
6735 * [18] CDSect ::= CDStart CData CDEnd
6736 *
6737 * [19] CDStart ::= '<![CDATA['
6738 *
6739 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6740 *
6741 * [21] CDEnd ::= ']]>'
6742 */
6743void
6744xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6745 xmlChar *buf = NULL;
6746 int len = 0;
6747 int size = XML_PARSER_BUFFER_SIZE;
6748 int r, rl;
6749 int s, sl;
6750 int cur, l;
6751 int count = 0;
6752
6753 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6754 (NXT(2) == '[') && (NXT(3) == 'C') &&
6755 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6756 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6757 (NXT(8) == '[')) {
6758 SKIP(9);
6759 } else
6760 return;
6761
6762 ctxt->instate = XML_PARSER_CDATA_SECTION;
6763 r = CUR_CHAR(rl);
6764 if (!IS_CHAR(r)) {
6765 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6767 ctxt->sax->error(ctxt->userData,
6768 "CData section not finished\n");
6769 ctxt->wellFormed = 0;
6770 ctxt->disableSAX = 1;
6771 ctxt->instate = XML_PARSER_CONTENT;
6772 return;
6773 }
6774 NEXTL(rl);
6775 s = CUR_CHAR(sl);
6776 if (!IS_CHAR(s)) {
6777 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6779 ctxt->sax->error(ctxt->userData,
6780 "CData section not finished\n");
6781 ctxt->wellFormed = 0;
6782 ctxt->disableSAX = 1;
6783 ctxt->instate = XML_PARSER_CONTENT;
6784 return;
6785 }
6786 NEXTL(sl);
6787 cur = CUR_CHAR(l);
6788 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6789 if (buf == NULL) {
6790 xmlGenericError(xmlGenericErrorContext,
6791 "malloc of %d byte failed\n", size);
6792 return;
6793 }
6794 while (IS_CHAR(cur) &&
6795 ((r != ']') || (s != ']') || (cur != '>'))) {
6796 if (len + 5 >= size) {
6797 size *= 2;
6798 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6799 if (buf == NULL) {
6800 xmlGenericError(xmlGenericErrorContext,
6801 "realloc of %d byte failed\n", size);
6802 return;
6803 }
6804 }
6805 COPY_BUF(rl,buf,len,r);
6806 r = s;
6807 rl = sl;
6808 s = cur;
6809 sl = l;
6810 count++;
6811 if (count > 50) {
6812 GROW;
6813 count = 0;
6814 }
6815 NEXTL(l);
6816 cur = CUR_CHAR(l);
6817 }
6818 buf[len] = 0;
6819 ctxt->instate = XML_PARSER_CONTENT;
6820 if (cur != '>') {
6821 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6823 ctxt->sax->error(ctxt->userData,
6824 "CData section not finished\n%.50s\n", buf);
6825 ctxt->wellFormed = 0;
6826 ctxt->disableSAX = 1;
6827 xmlFree(buf);
6828 return;
6829 }
6830 NEXTL(l);
6831
6832 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006833 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006834 */
6835 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6836 if (ctxt->sax->cdataBlock != NULL)
6837 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006838 else if (ctxt->sax->characters != NULL)
6839 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006840 }
6841 xmlFree(buf);
6842}
6843
6844/**
6845 * xmlParseContent:
6846 * @ctxt: an XML parser context
6847 *
6848 * Parse a content:
6849 *
6850 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6851 */
6852
6853void
6854xmlParseContent(xmlParserCtxtPtr ctxt) {
6855 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006856 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006857 ((RAW != '<') || (NXT(1) != '/'))) {
6858 const xmlChar *test = CUR_PTR;
6859 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006860 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006861
6862 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006863 * First case : a Processing Instruction.
6864 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006865 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006866 xmlParsePI(ctxt);
6867 }
6868
6869 /*
6870 * Second case : a CDSection
6871 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006872 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006873 (NXT(2) == '[') && (NXT(3) == 'C') &&
6874 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6875 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6876 (NXT(8) == '[')) {
6877 xmlParseCDSect(ctxt);
6878 }
6879
6880 /*
6881 * Third case : a comment
6882 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006883 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006884 (NXT(2) == '-') && (NXT(3) == '-')) {
6885 xmlParseComment(ctxt);
6886 ctxt->instate = XML_PARSER_CONTENT;
6887 }
6888
6889 /*
6890 * Fourth case : a sub-element.
6891 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006892 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006893 xmlParseElement(ctxt);
6894 }
6895
6896 /*
6897 * Fifth case : a reference. If if has not been resolved,
6898 * parsing returns it's Name, create the node
6899 */
6900
Daniel Veillard21a0f912001-02-25 19:54:14 +00006901 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006902 xmlParseReference(ctxt);
6903 }
6904
6905 /*
6906 * Last case, text. Note that References are handled directly.
6907 */
6908 else {
6909 xmlParseCharData(ctxt, 0);
6910 }
6911
6912 GROW;
6913 /*
6914 * Pop-up of finished entities.
6915 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00006916 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00006917 xmlPopInput(ctxt);
6918 SHRINK;
6919
Daniel Veillardfdc91562002-07-01 21:52:03 +00006920 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006921 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6923 ctxt->sax->error(ctxt->userData,
6924 "detected an error in element content\n");
6925 ctxt->wellFormed = 0;
6926 ctxt->disableSAX = 1;
6927 ctxt->instate = XML_PARSER_EOF;
6928 break;
6929 }
6930 }
6931}
6932
6933/**
6934 * xmlParseElement:
6935 * @ctxt: an XML parser context
6936 *
6937 * parse an XML element, this is highly recursive
6938 *
6939 * [39] element ::= EmptyElemTag | STag content ETag
6940 *
6941 * [ WFC: Element Type Match ]
6942 * The Name in an element's end-tag must match the element type in the
6943 * start-tag.
6944 *
6945 * [ VC: Element Valid ]
6946 * An element is valid if there is a declaration matching elementdecl
6947 * where the Name matches the element type and one of the following holds:
6948 * - The declaration matches EMPTY and the element has no content.
6949 * - The declaration matches children and the sequence of child elements
6950 * belongs to the language generated by the regular expression in the
6951 * content model, with optional white space (characters matching the
6952 * nonterminal S) between each pair of child elements.
6953 * - The declaration matches Mixed and the content consists of character
6954 * data and child elements whose types match names in the content model.
6955 * - The declaration matches ANY, and the types of any child elements have
6956 * been declared.
6957 */
6958
6959void
6960xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00006961 xmlChar *name;
6962 xmlChar *oldname;
6963 xmlParserNodeInfo node_info;
6964 xmlNodePtr ret;
6965
6966 /* Capture start position */
6967 if (ctxt->record_info) {
6968 node_info.begin_pos = ctxt->input->consumed +
6969 (CUR_PTR - ctxt->input->base);
6970 node_info.begin_line = ctxt->input->line;
6971 }
6972
6973 if (ctxt->spaceNr == 0)
6974 spacePush(ctxt, -1);
6975 else
6976 spacePush(ctxt, *ctxt->space);
6977
6978 name = xmlParseStartTag(ctxt);
6979 if (name == NULL) {
6980 spacePop(ctxt);
6981 return;
6982 }
6983 namePush(ctxt, name);
6984 ret = ctxt->node;
6985
6986 /*
6987 * [ VC: Root Element Type ]
6988 * The Name in the document type declaration must match the element
6989 * type of the root element.
6990 */
6991 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6992 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6993 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6994
6995 /*
6996 * Check for an Empty Element.
6997 */
6998 if ((RAW == '/') && (NXT(1) == '>')) {
6999 SKIP(2);
7000 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7001 (!ctxt->disableSAX))
7002 ctxt->sax->endElement(ctxt->userData, name);
7003 oldname = namePop(ctxt);
7004 spacePop(ctxt);
7005 if (oldname != NULL) {
7006#ifdef DEBUG_STACK
7007 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7008#endif
7009 xmlFree(oldname);
7010 }
7011 if ( ret != NULL && ctxt->record_info ) {
7012 node_info.end_pos = ctxt->input->consumed +
7013 (CUR_PTR - ctxt->input->base);
7014 node_info.end_line = ctxt->input->line;
7015 node_info.node = ret;
7016 xmlParserAddNodeInfo(ctxt, &node_info);
7017 }
7018 return;
7019 }
7020 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007021 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007022 } else {
7023 ctxt->errNo = XML_ERR_GT_REQUIRED;
7024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7025 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007026 "Couldn't find end of Start Tag %s\n",
7027 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007028 ctxt->wellFormed = 0;
7029 ctxt->disableSAX = 1;
7030
7031 /*
7032 * end of parsing of this node.
7033 */
7034 nodePop(ctxt);
7035 oldname = namePop(ctxt);
7036 spacePop(ctxt);
7037 if (oldname != NULL) {
7038#ifdef DEBUG_STACK
7039 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7040#endif
7041 xmlFree(oldname);
7042 }
7043
7044 /*
7045 * Capture end position and add node
7046 */
7047 if ( ret != NULL && ctxt->record_info ) {
7048 node_info.end_pos = ctxt->input->consumed +
7049 (CUR_PTR - ctxt->input->base);
7050 node_info.end_line = ctxt->input->line;
7051 node_info.node = ret;
7052 xmlParserAddNodeInfo(ctxt, &node_info);
7053 }
7054 return;
7055 }
7056
7057 /*
7058 * Parse the content of the element:
7059 */
7060 xmlParseContent(ctxt);
7061 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007062 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007063 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7064 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007065 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007066 ctxt->wellFormed = 0;
7067 ctxt->disableSAX = 1;
7068
7069 /*
7070 * end of parsing of this node.
7071 */
7072 nodePop(ctxt);
7073 oldname = namePop(ctxt);
7074 spacePop(ctxt);
7075 if (oldname != NULL) {
7076#ifdef DEBUG_STACK
7077 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7078#endif
7079 xmlFree(oldname);
7080 }
7081 return;
7082 }
7083
7084 /*
7085 * parse the end of tag: '</' should be here.
7086 */
7087 xmlParseEndTag(ctxt);
7088
7089 /*
7090 * Capture end position and add node
7091 */
7092 if ( ret != NULL && ctxt->record_info ) {
7093 node_info.end_pos = ctxt->input->consumed +
7094 (CUR_PTR - ctxt->input->base);
7095 node_info.end_line = ctxt->input->line;
7096 node_info.node = ret;
7097 xmlParserAddNodeInfo(ctxt, &node_info);
7098 }
7099}
7100
7101/**
7102 * xmlParseVersionNum:
7103 * @ctxt: an XML parser context
7104 *
7105 * parse the XML version value.
7106 *
7107 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7108 *
7109 * Returns the string giving the XML version number, or NULL
7110 */
7111xmlChar *
7112xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7113 xmlChar *buf = NULL;
7114 int len = 0;
7115 int size = 10;
7116 xmlChar cur;
7117
7118 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7119 if (buf == NULL) {
7120 xmlGenericError(xmlGenericErrorContext,
7121 "malloc of %d byte failed\n", size);
7122 return(NULL);
7123 }
7124 cur = CUR;
7125 while (((cur >= 'a') && (cur <= 'z')) ||
7126 ((cur >= 'A') && (cur <= 'Z')) ||
7127 ((cur >= '0') && (cur <= '9')) ||
7128 (cur == '_') || (cur == '.') ||
7129 (cur == ':') || (cur == '-')) {
7130 if (len + 1 >= size) {
7131 size *= 2;
7132 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7133 if (buf == NULL) {
7134 xmlGenericError(xmlGenericErrorContext,
7135 "realloc of %d byte failed\n", size);
7136 return(NULL);
7137 }
7138 }
7139 buf[len++] = cur;
7140 NEXT;
7141 cur=CUR;
7142 }
7143 buf[len] = 0;
7144 return(buf);
7145}
7146
7147/**
7148 * xmlParseVersionInfo:
7149 * @ctxt: an XML parser context
7150 *
7151 * parse the XML version.
7152 *
7153 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7154 *
7155 * [25] Eq ::= S? '=' S?
7156 *
7157 * Returns the version string, e.g. "1.0"
7158 */
7159
7160xmlChar *
7161xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7162 xmlChar *version = NULL;
7163 const xmlChar *q;
7164
7165 if ((RAW == 'v') && (NXT(1) == 'e') &&
7166 (NXT(2) == 'r') && (NXT(3) == 's') &&
7167 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7168 (NXT(6) == 'n')) {
7169 SKIP(7);
7170 SKIP_BLANKS;
7171 if (RAW != '=') {
7172 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7173 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7174 ctxt->sax->error(ctxt->userData,
7175 "xmlParseVersionInfo : expected '='\n");
7176 ctxt->wellFormed = 0;
7177 ctxt->disableSAX = 1;
7178 return(NULL);
7179 }
7180 NEXT;
7181 SKIP_BLANKS;
7182 if (RAW == '"') {
7183 NEXT;
7184 q = CUR_PTR;
7185 version = xmlParseVersionNum(ctxt);
7186 if (RAW != '"') {
7187 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7189 ctxt->sax->error(ctxt->userData,
7190 "String not closed\n%.50s\n", q);
7191 ctxt->wellFormed = 0;
7192 ctxt->disableSAX = 1;
7193 } else
7194 NEXT;
7195 } else if (RAW == '\''){
7196 NEXT;
7197 q = CUR_PTR;
7198 version = xmlParseVersionNum(ctxt);
7199 if (RAW != '\'') {
7200 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7202 ctxt->sax->error(ctxt->userData,
7203 "String not closed\n%.50s\n", q);
7204 ctxt->wellFormed = 0;
7205 ctxt->disableSAX = 1;
7206 } else
7207 NEXT;
7208 } else {
7209 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7210 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7211 ctxt->sax->error(ctxt->userData,
7212 "xmlParseVersionInfo : expected ' or \"\n");
7213 ctxt->wellFormed = 0;
7214 ctxt->disableSAX = 1;
7215 }
7216 }
7217 return(version);
7218}
7219
7220/**
7221 * xmlParseEncName:
7222 * @ctxt: an XML parser context
7223 *
7224 * parse the XML encoding name
7225 *
7226 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7227 *
7228 * Returns the encoding name value or NULL
7229 */
7230xmlChar *
7231xmlParseEncName(xmlParserCtxtPtr ctxt) {
7232 xmlChar *buf = NULL;
7233 int len = 0;
7234 int size = 10;
7235 xmlChar cur;
7236
7237 cur = CUR;
7238 if (((cur >= 'a') && (cur <= 'z')) ||
7239 ((cur >= 'A') && (cur <= 'Z'))) {
7240 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7241 if (buf == NULL) {
7242 xmlGenericError(xmlGenericErrorContext,
7243 "malloc of %d byte failed\n", size);
7244 return(NULL);
7245 }
7246
7247 buf[len++] = cur;
7248 NEXT;
7249 cur = CUR;
7250 while (((cur >= 'a') && (cur <= 'z')) ||
7251 ((cur >= 'A') && (cur <= 'Z')) ||
7252 ((cur >= '0') && (cur <= '9')) ||
7253 (cur == '.') || (cur == '_') ||
7254 (cur == '-')) {
7255 if (len + 1 >= size) {
7256 size *= 2;
7257 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7258 if (buf == NULL) {
7259 xmlGenericError(xmlGenericErrorContext,
7260 "realloc of %d byte failed\n", size);
7261 return(NULL);
7262 }
7263 }
7264 buf[len++] = cur;
7265 NEXT;
7266 cur = CUR;
7267 if (cur == 0) {
7268 SHRINK;
7269 GROW;
7270 cur = CUR;
7271 }
7272 }
7273 buf[len] = 0;
7274 } else {
7275 ctxt->errNo = XML_ERR_ENCODING_NAME;
7276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7277 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7278 ctxt->wellFormed = 0;
7279 ctxt->disableSAX = 1;
7280 }
7281 return(buf);
7282}
7283
7284/**
7285 * xmlParseEncodingDecl:
7286 * @ctxt: an XML parser context
7287 *
7288 * parse the XML encoding declaration
7289 *
7290 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7291 *
7292 * this setups the conversion filters.
7293 *
7294 * Returns the encoding value or NULL
7295 */
7296
7297xmlChar *
7298xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7299 xmlChar *encoding = NULL;
7300 const xmlChar *q;
7301
7302 SKIP_BLANKS;
7303 if ((RAW == 'e') && (NXT(1) == 'n') &&
7304 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7305 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7306 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7307 SKIP(8);
7308 SKIP_BLANKS;
7309 if (RAW != '=') {
7310 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7312 ctxt->sax->error(ctxt->userData,
7313 "xmlParseEncodingDecl : expected '='\n");
7314 ctxt->wellFormed = 0;
7315 ctxt->disableSAX = 1;
7316 return(NULL);
7317 }
7318 NEXT;
7319 SKIP_BLANKS;
7320 if (RAW == '"') {
7321 NEXT;
7322 q = CUR_PTR;
7323 encoding = xmlParseEncName(ctxt);
7324 if (RAW != '"') {
7325 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7327 ctxt->sax->error(ctxt->userData,
7328 "String not closed\n%.50s\n", q);
7329 ctxt->wellFormed = 0;
7330 ctxt->disableSAX = 1;
7331 } else
7332 NEXT;
7333 } else if (RAW == '\''){
7334 NEXT;
7335 q = CUR_PTR;
7336 encoding = xmlParseEncName(ctxt);
7337 if (RAW != '\'') {
7338 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7340 ctxt->sax->error(ctxt->userData,
7341 "String not closed\n%.50s\n", q);
7342 ctxt->wellFormed = 0;
7343 ctxt->disableSAX = 1;
7344 } else
7345 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007346 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007347 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7349 ctxt->sax->error(ctxt->userData,
7350 "xmlParseEncodingDecl : expected ' or \"\n");
7351 ctxt->wellFormed = 0;
7352 ctxt->disableSAX = 1;
7353 }
7354 if (encoding != NULL) {
7355 xmlCharEncoding enc;
7356 xmlCharEncodingHandlerPtr handler;
7357
7358 if (ctxt->input->encoding != NULL)
7359 xmlFree((xmlChar *) ctxt->input->encoding);
7360 ctxt->input->encoding = encoding;
7361
7362 enc = xmlParseCharEncoding((const char *) encoding);
7363 /*
7364 * registered set of known encodings
7365 */
7366 if (enc != XML_CHAR_ENCODING_ERROR) {
7367 xmlSwitchEncoding(ctxt, enc);
7368 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007369 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007370 xmlFree(encoding);
7371 return(NULL);
7372 }
7373 } else {
7374 /*
7375 * fallback for unknown encodings
7376 */
7377 handler = xmlFindCharEncodingHandler((const char *) encoding);
7378 if (handler != NULL) {
7379 xmlSwitchToEncoding(ctxt, handler);
7380 } else {
7381 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7383 ctxt->sax->error(ctxt->userData,
7384 "Unsupported encoding %s\n", encoding);
7385 return(NULL);
7386 }
7387 }
7388 }
7389 }
7390 return(encoding);
7391}
7392
7393/**
7394 * xmlParseSDDecl:
7395 * @ctxt: an XML parser context
7396 *
7397 * parse the XML standalone declaration
7398 *
7399 * [32] SDDecl ::= S 'standalone' Eq
7400 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7401 *
7402 * [ VC: Standalone Document Declaration ]
7403 * TODO The standalone document declaration must have the value "no"
7404 * if any external markup declarations contain declarations of:
7405 * - attributes with default values, if elements to which these
7406 * attributes apply appear in the document without specifications
7407 * of values for these attributes, or
7408 * - entities (other than amp, lt, gt, apos, quot), if references
7409 * to those entities appear in the document, or
7410 * - attributes with values subject to normalization, where the
7411 * attribute appears in the document with a value which will change
7412 * as a result of normalization, or
7413 * - element types with element content, if white space occurs directly
7414 * within any instance of those types.
7415 *
7416 * Returns 1 if standalone, 0 otherwise
7417 */
7418
7419int
7420xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7421 int standalone = -1;
7422
7423 SKIP_BLANKS;
7424 if ((RAW == 's') && (NXT(1) == 't') &&
7425 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7426 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7427 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7428 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7429 SKIP(10);
7430 SKIP_BLANKS;
7431 if (RAW != '=') {
7432 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7433 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7434 ctxt->sax->error(ctxt->userData,
7435 "XML standalone declaration : expected '='\n");
7436 ctxt->wellFormed = 0;
7437 ctxt->disableSAX = 1;
7438 return(standalone);
7439 }
7440 NEXT;
7441 SKIP_BLANKS;
7442 if (RAW == '\''){
7443 NEXT;
7444 if ((RAW == 'n') && (NXT(1) == 'o')) {
7445 standalone = 0;
7446 SKIP(2);
7447 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7448 (NXT(2) == 's')) {
7449 standalone = 1;
7450 SKIP(3);
7451 } else {
7452 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7454 ctxt->sax->error(ctxt->userData,
7455 "standalone accepts only 'yes' or 'no'\n");
7456 ctxt->wellFormed = 0;
7457 ctxt->disableSAX = 1;
7458 }
7459 if (RAW != '\'') {
7460 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7462 ctxt->sax->error(ctxt->userData, "String not closed\n");
7463 ctxt->wellFormed = 0;
7464 ctxt->disableSAX = 1;
7465 } else
7466 NEXT;
7467 } else if (RAW == '"'){
7468 NEXT;
7469 if ((RAW == 'n') && (NXT(1) == 'o')) {
7470 standalone = 0;
7471 SKIP(2);
7472 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7473 (NXT(2) == 's')) {
7474 standalone = 1;
7475 SKIP(3);
7476 } else {
7477 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7479 ctxt->sax->error(ctxt->userData,
7480 "standalone accepts only 'yes' or 'no'\n");
7481 ctxt->wellFormed = 0;
7482 ctxt->disableSAX = 1;
7483 }
7484 if (RAW != '"') {
7485 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7487 ctxt->sax->error(ctxt->userData, "String not closed\n");
7488 ctxt->wellFormed = 0;
7489 ctxt->disableSAX = 1;
7490 } else
7491 NEXT;
7492 } else {
7493 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7495 ctxt->sax->error(ctxt->userData,
7496 "Standalone value not found\n");
7497 ctxt->wellFormed = 0;
7498 ctxt->disableSAX = 1;
7499 }
7500 }
7501 return(standalone);
7502}
7503
7504/**
7505 * xmlParseXMLDecl:
7506 * @ctxt: an XML parser context
7507 *
7508 * parse an XML declaration header
7509 *
7510 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7511 */
7512
7513void
7514xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7515 xmlChar *version;
7516
7517 /*
7518 * We know that '<?xml' is here.
7519 */
7520 SKIP(5);
7521
7522 if (!IS_BLANK(RAW)) {
7523 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7525 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7526 ctxt->wellFormed = 0;
7527 ctxt->disableSAX = 1;
7528 }
7529 SKIP_BLANKS;
7530
7531 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007532 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007533 */
7534 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007535 if (version == NULL) {
7536 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7537 ctxt->sax->error(ctxt->userData,
7538 "Malformed declaration expecting version\n");
7539 ctxt->wellFormed = 0;
7540 ctxt->disableSAX = 1;
7541 } else {
7542 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7543 /*
7544 * TODO: Blueberry should be detected here
7545 */
7546 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7547 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7548 version);
7549 }
7550 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007551 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007552 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007553 }
Owen Taylor3473f882001-02-23 17:55:21 +00007554
7555 /*
7556 * We may have the encoding declaration
7557 */
7558 if (!IS_BLANK(RAW)) {
7559 if ((RAW == '?') && (NXT(1) == '>')) {
7560 SKIP(2);
7561 return;
7562 }
7563 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7565 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7566 ctxt->wellFormed = 0;
7567 ctxt->disableSAX = 1;
7568 }
7569 xmlParseEncodingDecl(ctxt);
7570 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7571 /*
7572 * The XML REC instructs us to stop parsing right here
7573 */
7574 return;
7575 }
7576
7577 /*
7578 * We may have the standalone status.
7579 */
7580 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7581 if ((RAW == '?') && (NXT(1) == '>')) {
7582 SKIP(2);
7583 return;
7584 }
7585 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7587 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7588 ctxt->wellFormed = 0;
7589 ctxt->disableSAX = 1;
7590 }
7591 SKIP_BLANKS;
7592 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7593
7594 SKIP_BLANKS;
7595 if ((RAW == '?') && (NXT(1) == '>')) {
7596 SKIP(2);
7597 } else if (RAW == '>') {
7598 /* Deprecated old WD ... */
7599 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7601 ctxt->sax->error(ctxt->userData,
7602 "XML declaration must end-up with '?>'\n");
7603 ctxt->wellFormed = 0;
7604 ctxt->disableSAX = 1;
7605 NEXT;
7606 } else {
7607 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7609 ctxt->sax->error(ctxt->userData,
7610 "parsing XML declaration: '?>' expected\n");
7611 ctxt->wellFormed = 0;
7612 ctxt->disableSAX = 1;
7613 MOVETO_ENDTAG(CUR_PTR);
7614 NEXT;
7615 }
7616}
7617
7618/**
7619 * xmlParseMisc:
7620 * @ctxt: an XML parser context
7621 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007622 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007623 *
7624 * [27] Misc ::= Comment | PI | S
7625 */
7626
7627void
7628xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007629 while (((RAW == '<') && (NXT(1) == '?')) ||
7630 ((RAW == '<') && (NXT(1) == '!') &&
7631 (NXT(2) == '-') && (NXT(3) == '-')) ||
7632 IS_BLANK(CUR)) {
7633 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007634 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007635 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007636 NEXT;
7637 } else
7638 xmlParseComment(ctxt);
7639 }
7640}
7641
7642/**
7643 * xmlParseDocument:
7644 * @ctxt: an XML parser context
7645 *
7646 * parse an XML document (and build a tree if using the standard SAX
7647 * interface).
7648 *
7649 * [1] document ::= prolog element Misc*
7650 *
7651 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7652 *
7653 * Returns 0, -1 in case of error. the parser context is augmented
7654 * as a result of the parsing.
7655 */
7656
7657int
7658xmlParseDocument(xmlParserCtxtPtr ctxt) {
7659 xmlChar start[4];
7660 xmlCharEncoding enc;
7661
7662 xmlInitParser();
7663
7664 GROW;
7665
7666 /*
7667 * SAX: beginning of the document processing.
7668 */
7669 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7670 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7671
Daniel Veillard50f34372001-08-03 12:06:36 +00007672 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007673 /*
7674 * Get the 4 first bytes and decode the charset
7675 * if enc != XML_CHAR_ENCODING_NONE
7676 * plug some encoding conversion routines.
7677 */
7678 start[0] = RAW;
7679 start[1] = NXT(1);
7680 start[2] = NXT(2);
7681 start[3] = NXT(3);
7682 enc = xmlDetectCharEncoding(start, 4);
7683 if (enc != XML_CHAR_ENCODING_NONE) {
7684 xmlSwitchEncoding(ctxt, enc);
7685 }
Owen Taylor3473f882001-02-23 17:55:21 +00007686 }
7687
7688
7689 if (CUR == 0) {
7690 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7692 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7693 ctxt->wellFormed = 0;
7694 ctxt->disableSAX = 1;
7695 }
7696
7697 /*
7698 * Check for the XMLDecl in the Prolog.
7699 */
7700 GROW;
7701 if ((RAW == '<') && (NXT(1) == '?') &&
7702 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7703 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7704
7705 /*
7706 * Note that we will switch encoding on the fly.
7707 */
7708 xmlParseXMLDecl(ctxt);
7709 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7710 /*
7711 * The XML REC instructs us to stop parsing right here
7712 */
7713 return(-1);
7714 }
7715 ctxt->standalone = ctxt->input->standalone;
7716 SKIP_BLANKS;
7717 } else {
7718 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7719 }
7720 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7721 ctxt->sax->startDocument(ctxt->userData);
7722
7723 /*
7724 * The Misc part of the Prolog
7725 */
7726 GROW;
7727 xmlParseMisc(ctxt);
7728
7729 /*
7730 * Then possibly doc type declaration(s) and more Misc
7731 * (doctypedecl Misc*)?
7732 */
7733 GROW;
7734 if ((RAW == '<') && (NXT(1) == '!') &&
7735 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7736 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7737 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7738 (NXT(8) == 'E')) {
7739
7740 ctxt->inSubset = 1;
7741 xmlParseDocTypeDecl(ctxt);
7742 if (RAW == '[') {
7743 ctxt->instate = XML_PARSER_DTD;
7744 xmlParseInternalSubset(ctxt);
7745 }
7746
7747 /*
7748 * Create and update the external subset.
7749 */
7750 ctxt->inSubset = 2;
7751 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7752 (!ctxt->disableSAX))
7753 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7754 ctxt->extSubSystem, ctxt->extSubURI);
7755 ctxt->inSubset = 0;
7756
7757
7758 ctxt->instate = XML_PARSER_PROLOG;
7759 xmlParseMisc(ctxt);
7760 }
7761
7762 /*
7763 * Time to start parsing the tree itself
7764 */
7765 GROW;
7766 if (RAW != '<') {
7767 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7769 ctxt->sax->error(ctxt->userData,
7770 "Start tag expected, '<' not found\n");
7771 ctxt->wellFormed = 0;
7772 ctxt->disableSAX = 1;
7773 ctxt->instate = XML_PARSER_EOF;
7774 } else {
7775 ctxt->instate = XML_PARSER_CONTENT;
7776 xmlParseElement(ctxt);
7777 ctxt->instate = XML_PARSER_EPILOG;
7778
7779
7780 /*
7781 * The Misc part at the end
7782 */
7783 xmlParseMisc(ctxt);
7784
Daniel Veillard561b7f82002-03-20 21:55:57 +00007785 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007786 ctxt->errNo = XML_ERR_DOCUMENT_END;
7787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7788 ctxt->sax->error(ctxt->userData,
7789 "Extra content at the end of the document\n");
7790 ctxt->wellFormed = 0;
7791 ctxt->disableSAX = 1;
7792 }
7793 ctxt->instate = XML_PARSER_EOF;
7794 }
7795
7796 /*
7797 * SAX: end of the document processing.
7798 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007799 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007800 ctxt->sax->endDocument(ctxt->userData);
7801
Daniel Veillard5997aca2002-03-18 18:36:20 +00007802 /*
7803 * Remove locally kept entity definitions if the tree was not built
7804 */
7805 if ((ctxt->myDoc != NULL) &&
7806 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7807 xmlFreeDoc(ctxt->myDoc);
7808 ctxt->myDoc = NULL;
7809 }
7810
Daniel Veillardc7612992002-02-17 22:47:37 +00007811 if (! ctxt->wellFormed) {
7812 ctxt->valid = 0;
7813 return(-1);
7814 }
Owen Taylor3473f882001-02-23 17:55:21 +00007815 return(0);
7816}
7817
7818/**
7819 * xmlParseExtParsedEnt:
7820 * @ctxt: an XML parser context
7821 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007822 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007823 * An external general parsed entity is well-formed if it matches the
7824 * production labeled extParsedEnt.
7825 *
7826 * [78] extParsedEnt ::= TextDecl? content
7827 *
7828 * Returns 0, -1 in case of error. the parser context is augmented
7829 * as a result of the parsing.
7830 */
7831
7832int
7833xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7834 xmlChar start[4];
7835 xmlCharEncoding enc;
7836
7837 xmlDefaultSAXHandlerInit();
7838
7839 GROW;
7840
7841 /*
7842 * SAX: beginning of the document processing.
7843 */
7844 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7845 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7846
7847 /*
7848 * Get the 4 first bytes and decode the charset
7849 * if enc != XML_CHAR_ENCODING_NONE
7850 * plug some encoding conversion routines.
7851 */
7852 start[0] = RAW;
7853 start[1] = NXT(1);
7854 start[2] = NXT(2);
7855 start[3] = NXT(3);
7856 enc = xmlDetectCharEncoding(start, 4);
7857 if (enc != XML_CHAR_ENCODING_NONE) {
7858 xmlSwitchEncoding(ctxt, enc);
7859 }
7860
7861
7862 if (CUR == 0) {
7863 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7865 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7866 ctxt->wellFormed = 0;
7867 ctxt->disableSAX = 1;
7868 }
7869
7870 /*
7871 * Check for the XMLDecl in the Prolog.
7872 */
7873 GROW;
7874 if ((RAW == '<') && (NXT(1) == '?') &&
7875 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7876 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7877
7878 /*
7879 * Note that we will switch encoding on the fly.
7880 */
7881 xmlParseXMLDecl(ctxt);
7882 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7883 /*
7884 * The XML REC instructs us to stop parsing right here
7885 */
7886 return(-1);
7887 }
7888 SKIP_BLANKS;
7889 } else {
7890 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7891 }
7892 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7893 ctxt->sax->startDocument(ctxt->userData);
7894
7895 /*
7896 * Doing validity checking on chunk doesn't make sense
7897 */
7898 ctxt->instate = XML_PARSER_CONTENT;
7899 ctxt->validate = 0;
7900 ctxt->loadsubset = 0;
7901 ctxt->depth = 0;
7902
7903 xmlParseContent(ctxt);
7904
7905 if ((RAW == '<') && (NXT(1) == '/')) {
7906 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7908 ctxt->sax->error(ctxt->userData,
7909 "chunk is not well balanced\n");
7910 ctxt->wellFormed = 0;
7911 ctxt->disableSAX = 1;
7912 } else if (RAW != 0) {
7913 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7915 ctxt->sax->error(ctxt->userData,
7916 "extra content at the end of well balanced chunk\n");
7917 ctxt->wellFormed = 0;
7918 ctxt->disableSAX = 1;
7919 }
7920
7921 /*
7922 * SAX: end of the document processing.
7923 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007924 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007925 ctxt->sax->endDocument(ctxt->userData);
7926
7927 if (! ctxt->wellFormed) return(-1);
7928 return(0);
7929}
7930
7931/************************************************************************
7932 * *
7933 * Progressive parsing interfaces *
7934 * *
7935 ************************************************************************/
7936
7937/**
7938 * xmlParseLookupSequence:
7939 * @ctxt: an XML parser context
7940 * @first: the first char to lookup
7941 * @next: the next char to lookup or zero
7942 * @third: the next char to lookup or zero
7943 *
7944 * Try to find if a sequence (first, next, third) or just (first next) or
7945 * (first) is available in the input stream.
7946 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7947 * to avoid rescanning sequences of bytes, it DOES change the state of the
7948 * parser, do not use liberally.
7949 *
7950 * Returns the index to the current parsing point if the full sequence
7951 * is available, -1 otherwise.
7952 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007953static int
Owen Taylor3473f882001-02-23 17:55:21 +00007954xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7955 xmlChar next, xmlChar third) {
7956 int base, len;
7957 xmlParserInputPtr in;
7958 const xmlChar *buf;
7959
7960 in = ctxt->input;
7961 if (in == NULL) return(-1);
7962 base = in->cur - in->base;
7963 if (base < 0) return(-1);
7964 if (ctxt->checkIndex > base)
7965 base = ctxt->checkIndex;
7966 if (in->buf == NULL) {
7967 buf = in->base;
7968 len = in->length;
7969 } else {
7970 buf = in->buf->buffer->content;
7971 len = in->buf->buffer->use;
7972 }
7973 /* take into account the sequence length */
7974 if (third) len -= 2;
7975 else if (next) len --;
7976 for (;base < len;base++) {
7977 if (buf[base] == first) {
7978 if (third != 0) {
7979 if ((buf[base + 1] != next) ||
7980 (buf[base + 2] != third)) continue;
7981 } else if (next != 0) {
7982 if (buf[base + 1] != next) continue;
7983 }
7984 ctxt->checkIndex = 0;
7985#ifdef DEBUG_PUSH
7986 if (next == 0)
7987 xmlGenericError(xmlGenericErrorContext,
7988 "PP: lookup '%c' found at %d\n",
7989 first, base);
7990 else if (third == 0)
7991 xmlGenericError(xmlGenericErrorContext,
7992 "PP: lookup '%c%c' found at %d\n",
7993 first, next, base);
7994 else
7995 xmlGenericError(xmlGenericErrorContext,
7996 "PP: lookup '%c%c%c' found at %d\n",
7997 first, next, third, base);
7998#endif
7999 return(base - (in->cur - in->base));
8000 }
8001 }
8002 ctxt->checkIndex = base;
8003#ifdef DEBUG_PUSH
8004 if (next == 0)
8005 xmlGenericError(xmlGenericErrorContext,
8006 "PP: lookup '%c' failed\n", first);
8007 else if (third == 0)
8008 xmlGenericError(xmlGenericErrorContext,
8009 "PP: lookup '%c%c' failed\n", first, next);
8010 else
8011 xmlGenericError(xmlGenericErrorContext,
8012 "PP: lookup '%c%c%c' failed\n", first, next, third);
8013#endif
8014 return(-1);
8015}
8016
8017/**
8018 * xmlParseTryOrFinish:
8019 * @ctxt: an XML parser context
8020 * @terminate: last chunk indicator
8021 *
8022 * Try to progress on parsing
8023 *
8024 * Returns zero if no parsing was possible
8025 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008026static int
Owen Taylor3473f882001-02-23 17:55:21 +00008027xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8028 int ret = 0;
8029 int avail;
8030 xmlChar cur, next;
8031
8032#ifdef DEBUG_PUSH
8033 switch (ctxt->instate) {
8034 case XML_PARSER_EOF:
8035 xmlGenericError(xmlGenericErrorContext,
8036 "PP: try EOF\n"); break;
8037 case XML_PARSER_START:
8038 xmlGenericError(xmlGenericErrorContext,
8039 "PP: try START\n"); break;
8040 case XML_PARSER_MISC:
8041 xmlGenericError(xmlGenericErrorContext,
8042 "PP: try MISC\n");break;
8043 case XML_PARSER_COMMENT:
8044 xmlGenericError(xmlGenericErrorContext,
8045 "PP: try COMMENT\n");break;
8046 case XML_PARSER_PROLOG:
8047 xmlGenericError(xmlGenericErrorContext,
8048 "PP: try PROLOG\n");break;
8049 case XML_PARSER_START_TAG:
8050 xmlGenericError(xmlGenericErrorContext,
8051 "PP: try START_TAG\n");break;
8052 case XML_PARSER_CONTENT:
8053 xmlGenericError(xmlGenericErrorContext,
8054 "PP: try CONTENT\n");break;
8055 case XML_PARSER_CDATA_SECTION:
8056 xmlGenericError(xmlGenericErrorContext,
8057 "PP: try CDATA_SECTION\n");break;
8058 case XML_PARSER_END_TAG:
8059 xmlGenericError(xmlGenericErrorContext,
8060 "PP: try END_TAG\n");break;
8061 case XML_PARSER_ENTITY_DECL:
8062 xmlGenericError(xmlGenericErrorContext,
8063 "PP: try ENTITY_DECL\n");break;
8064 case XML_PARSER_ENTITY_VALUE:
8065 xmlGenericError(xmlGenericErrorContext,
8066 "PP: try ENTITY_VALUE\n");break;
8067 case XML_PARSER_ATTRIBUTE_VALUE:
8068 xmlGenericError(xmlGenericErrorContext,
8069 "PP: try ATTRIBUTE_VALUE\n");break;
8070 case XML_PARSER_DTD:
8071 xmlGenericError(xmlGenericErrorContext,
8072 "PP: try DTD\n");break;
8073 case XML_PARSER_EPILOG:
8074 xmlGenericError(xmlGenericErrorContext,
8075 "PP: try EPILOG\n");break;
8076 case XML_PARSER_PI:
8077 xmlGenericError(xmlGenericErrorContext,
8078 "PP: try PI\n");break;
8079 case XML_PARSER_IGNORE:
8080 xmlGenericError(xmlGenericErrorContext,
8081 "PP: try IGNORE\n");break;
8082 }
8083#endif
8084
8085 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008086 SHRINK;
8087
Owen Taylor3473f882001-02-23 17:55:21 +00008088 /*
8089 * Pop-up of finished entities.
8090 */
8091 while ((RAW == 0) && (ctxt->inputNr > 1))
8092 xmlPopInput(ctxt);
8093
8094 if (ctxt->input ==NULL) break;
8095 if (ctxt->input->buf == NULL)
8096 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008097 else {
8098 /*
8099 * If we are operating on converted input, try to flush
8100 * remainng chars to avoid them stalling in the non-converted
8101 * buffer.
8102 */
8103 if ((ctxt->input->buf->raw != NULL) &&
8104 (ctxt->input->buf->raw->use > 0)) {
8105 int base = ctxt->input->base -
8106 ctxt->input->buf->buffer->content;
8107 int current = ctxt->input->cur - ctxt->input->base;
8108
8109 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8110 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8111 ctxt->input->cur = ctxt->input->base + current;
8112 ctxt->input->end =
8113 &ctxt->input->buf->buffer->content[
8114 ctxt->input->buf->buffer->use];
8115 }
8116 avail = ctxt->input->buf->buffer->use -
8117 (ctxt->input->cur - ctxt->input->base);
8118 }
Owen Taylor3473f882001-02-23 17:55:21 +00008119 if (avail < 1)
8120 goto done;
8121 switch (ctxt->instate) {
8122 case XML_PARSER_EOF:
8123 /*
8124 * Document parsing is done !
8125 */
8126 goto done;
8127 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008128 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8129 xmlChar start[4];
8130 xmlCharEncoding enc;
8131
8132 /*
8133 * Very first chars read from the document flow.
8134 */
8135 if (avail < 4)
8136 goto done;
8137
8138 /*
8139 * Get the 4 first bytes and decode the charset
8140 * if enc != XML_CHAR_ENCODING_NONE
8141 * plug some encoding conversion routines.
8142 */
8143 start[0] = RAW;
8144 start[1] = NXT(1);
8145 start[2] = NXT(2);
8146 start[3] = NXT(3);
8147 enc = xmlDetectCharEncoding(start, 4);
8148 if (enc != XML_CHAR_ENCODING_NONE) {
8149 xmlSwitchEncoding(ctxt, enc);
8150 }
8151 break;
8152 }
Owen Taylor3473f882001-02-23 17:55:21 +00008153
8154 cur = ctxt->input->cur[0];
8155 next = ctxt->input->cur[1];
8156 if (cur == 0) {
8157 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8158 ctxt->sax->setDocumentLocator(ctxt->userData,
8159 &xmlDefaultSAXLocator);
8160 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8162 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8163 ctxt->wellFormed = 0;
8164 ctxt->disableSAX = 1;
8165 ctxt->instate = XML_PARSER_EOF;
8166#ifdef DEBUG_PUSH
8167 xmlGenericError(xmlGenericErrorContext,
8168 "PP: entering EOF\n");
8169#endif
8170 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8171 ctxt->sax->endDocument(ctxt->userData);
8172 goto done;
8173 }
8174 if ((cur == '<') && (next == '?')) {
8175 /* PI or XML decl */
8176 if (avail < 5) return(ret);
8177 if ((!terminate) &&
8178 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8179 return(ret);
8180 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8181 ctxt->sax->setDocumentLocator(ctxt->userData,
8182 &xmlDefaultSAXLocator);
8183 if ((ctxt->input->cur[2] == 'x') &&
8184 (ctxt->input->cur[3] == 'm') &&
8185 (ctxt->input->cur[4] == 'l') &&
8186 (IS_BLANK(ctxt->input->cur[5]))) {
8187 ret += 5;
8188#ifdef DEBUG_PUSH
8189 xmlGenericError(xmlGenericErrorContext,
8190 "PP: Parsing XML Decl\n");
8191#endif
8192 xmlParseXMLDecl(ctxt);
8193 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8194 /*
8195 * The XML REC instructs us to stop parsing right
8196 * here
8197 */
8198 ctxt->instate = XML_PARSER_EOF;
8199 return(0);
8200 }
8201 ctxt->standalone = ctxt->input->standalone;
8202 if ((ctxt->encoding == NULL) &&
8203 (ctxt->input->encoding != NULL))
8204 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8205 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8206 (!ctxt->disableSAX))
8207 ctxt->sax->startDocument(ctxt->userData);
8208 ctxt->instate = XML_PARSER_MISC;
8209#ifdef DEBUG_PUSH
8210 xmlGenericError(xmlGenericErrorContext,
8211 "PP: entering MISC\n");
8212#endif
8213 } else {
8214 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8215 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8216 (!ctxt->disableSAX))
8217 ctxt->sax->startDocument(ctxt->userData);
8218 ctxt->instate = XML_PARSER_MISC;
8219#ifdef DEBUG_PUSH
8220 xmlGenericError(xmlGenericErrorContext,
8221 "PP: entering MISC\n");
8222#endif
8223 }
8224 } else {
8225 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8226 ctxt->sax->setDocumentLocator(ctxt->userData,
8227 &xmlDefaultSAXLocator);
8228 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8229 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8230 (!ctxt->disableSAX))
8231 ctxt->sax->startDocument(ctxt->userData);
8232 ctxt->instate = XML_PARSER_MISC;
8233#ifdef DEBUG_PUSH
8234 xmlGenericError(xmlGenericErrorContext,
8235 "PP: entering MISC\n");
8236#endif
8237 }
8238 break;
8239 case XML_PARSER_MISC:
8240 SKIP_BLANKS;
8241 if (ctxt->input->buf == NULL)
8242 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8243 else
8244 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8245 if (avail < 2)
8246 goto done;
8247 cur = ctxt->input->cur[0];
8248 next = ctxt->input->cur[1];
8249 if ((cur == '<') && (next == '?')) {
8250 if ((!terminate) &&
8251 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8252 goto done;
8253#ifdef DEBUG_PUSH
8254 xmlGenericError(xmlGenericErrorContext,
8255 "PP: Parsing PI\n");
8256#endif
8257 xmlParsePI(ctxt);
8258 } else if ((cur == '<') && (next == '!') &&
8259 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8260 if ((!terminate) &&
8261 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8262 goto done;
8263#ifdef DEBUG_PUSH
8264 xmlGenericError(xmlGenericErrorContext,
8265 "PP: Parsing Comment\n");
8266#endif
8267 xmlParseComment(ctxt);
8268 ctxt->instate = XML_PARSER_MISC;
8269 } else if ((cur == '<') && (next == '!') &&
8270 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8271 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8272 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8273 (ctxt->input->cur[8] == 'E')) {
8274 if ((!terminate) &&
8275 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8276 goto done;
8277#ifdef DEBUG_PUSH
8278 xmlGenericError(xmlGenericErrorContext,
8279 "PP: Parsing internal subset\n");
8280#endif
8281 ctxt->inSubset = 1;
8282 xmlParseDocTypeDecl(ctxt);
8283 if (RAW == '[') {
8284 ctxt->instate = XML_PARSER_DTD;
8285#ifdef DEBUG_PUSH
8286 xmlGenericError(xmlGenericErrorContext,
8287 "PP: entering DTD\n");
8288#endif
8289 } else {
8290 /*
8291 * Create and update the external subset.
8292 */
8293 ctxt->inSubset = 2;
8294 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8295 (ctxt->sax->externalSubset != NULL))
8296 ctxt->sax->externalSubset(ctxt->userData,
8297 ctxt->intSubName, ctxt->extSubSystem,
8298 ctxt->extSubURI);
8299 ctxt->inSubset = 0;
8300 ctxt->instate = XML_PARSER_PROLOG;
8301#ifdef DEBUG_PUSH
8302 xmlGenericError(xmlGenericErrorContext,
8303 "PP: entering PROLOG\n");
8304#endif
8305 }
8306 } else if ((cur == '<') && (next == '!') &&
8307 (avail < 9)) {
8308 goto done;
8309 } else {
8310 ctxt->instate = XML_PARSER_START_TAG;
8311#ifdef DEBUG_PUSH
8312 xmlGenericError(xmlGenericErrorContext,
8313 "PP: entering START_TAG\n");
8314#endif
8315 }
8316 break;
8317 case XML_PARSER_IGNORE:
8318 xmlGenericError(xmlGenericErrorContext,
8319 "PP: internal error, state == IGNORE");
8320 ctxt->instate = XML_PARSER_DTD;
8321#ifdef DEBUG_PUSH
8322 xmlGenericError(xmlGenericErrorContext,
8323 "PP: entering DTD\n");
8324#endif
8325 break;
8326 case XML_PARSER_PROLOG:
8327 SKIP_BLANKS;
8328 if (ctxt->input->buf == NULL)
8329 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8330 else
8331 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8332 if (avail < 2)
8333 goto done;
8334 cur = ctxt->input->cur[0];
8335 next = ctxt->input->cur[1];
8336 if ((cur == '<') && (next == '?')) {
8337 if ((!terminate) &&
8338 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8339 goto done;
8340#ifdef DEBUG_PUSH
8341 xmlGenericError(xmlGenericErrorContext,
8342 "PP: Parsing PI\n");
8343#endif
8344 xmlParsePI(ctxt);
8345 } else if ((cur == '<') && (next == '!') &&
8346 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8347 if ((!terminate) &&
8348 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8349 goto done;
8350#ifdef DEBUG_PUSH
8351 xmlGenericError(xmlGenericErrorContext,
8352 "PP: Parsing Comment\n");
8353#endif
8354 xmlParseComment(ctxt);
8355 ctxt->instate = XML_PARSER_PROLOG;
8356 } else if ((cur == '<') && (next == '!') &&
8357 (avail < 4)) {
8358 goto done;
8359 } else {
8360 ctxt->instate = XML_PARSER_START_TAG;
8361#ifdef DEBUG_PUSH
8362 xmlGenericError(xmlGenericErrorContext,
8363 "PP: entering START_TAG\n");
8364#endif
8365 }
8366 break;
8367 case XML_PARSER_EPILOG:
8368 SKIP_BLANKS;
8369 if (ctxt->input->buf == NULL)
8370 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8371 else
8372 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8373 if (avail < 2)
8374 goto done;
8375 cur = ctxt->input->cur[0];
8376 next = ctxt->input->cur[1];
8377 if ((cur == '<') && (next == '?')) {
8378 if ((!terminate) &&
8379 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8380 goto done;
8381#ifdef DEBUG_PUSH
8382 xmlGenericError(xmlGenericErrorContext,
8383 "PP: Parsing PI\n");
8384#endif
8385 xmlParsePI(ctxt);
8386 ctxt->instate = XML_PARSER_EPILOG;
8387 } else if ((cur == '<') && (next == '!') &&
8388 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8389 if ((!terminate) &&
8390 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8391 goto done;
8392#ifdef DEBUG_PUSH
8393 xmlGenericError(xmlGenericErrorContext,
8394 "PP: Parsing Comment\n");
8395#endif
8396 xmlParseComment(ctxt);
8397 ctxt->instate = XML_PARSER_EPILOG;
8398 } else if ((cur == '<') && (next == '!') &&
8399 (avail < 4)) {
8400 goto done;
8401 } else {
8402 ctxt->errNo = XML_ERR_DOCUMENT_END;
8403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8404 ctxt->sax->error(ctxt->userData,
8405 "Extra content at the end of the document\n");
8406 ctxt->wellFormed = 0;
8407 ctxt->disableSAX = 1;
8408 ctxt->instate = XML_PARSER_EOF;
8409#ifdef DEBUG_PUSH
8410 xmlGenericError(xmlGenericErrorContext,
8411 "PP: entering EOF\n");
8412#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008413 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008414 ctxt->sax->endDocument(ctxt->userData);
8415 goto done;
8416 }
8417 break;
8418 case XML_PARSER_START_TAG: {
8419 xmlChar *name, *oldname;
8420
8421 if ((avail < 2) && (ctxt->inputNr == 1))
8422 goto done;
8423 cur = ctxt->input->cur[0];
8424 if (cur != '<') {
8425 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8427 ctxt->sax->error(ctxt->userData,
8428 "Start tag expect, '<' not found\n");
8429 ctxt->wellFormed = 0;
8430 ctxt->disableSAX = 1;
8431 ctxt->instate = XML_PARSER_EOF;
8432#ifdef DEBUG_PUSH
8433 xmlGenericError(xmlGenericErrorContext,
8434 "PP: entering EOF\n");
8435#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008436 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008437 ctxt->sax->endDocument(ctxt->userData);
8438 goto done;
8439 }
8440 if ((!terminate) &&
8441 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8442 goto done;
8443 if (ctxt->spaceNr == 0)
8444 spacePush(ctxt, -1);
8445 else
8446 spacePush(ctxt, *ctxt->space);
8447 name = xmlParseStartTag(ctxt);
8448 if (name == NULL) {
8449 spacePop(ctxt);
8450 ctxt->instate = XML_PARSER_EOF;
8451#ifdef DEBUG_PUSH
8452 xmlGenericError(xmlGenericErrorContext,
8453 "PP: entering EOF\n");
8454#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008455 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008456 ctxt->sax->endDocument(ctxt->userData);
8457 goto done;
8458 }
8459 namePush(ctxt, xmlStrdup(name));
8460
8461 /*
8462 * [ VC: Root Element Type ]
8463 * The Name in the document type declaration must match
8464 * the element type of the root element.
8465 */
8466 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8467 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8468 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8469
8470 /*
8471 * Check for an Empty Element.
8472 */
8473 if ((RAW == '/') && (NXT(1) == '>')) {
8474 SKIP(2);
8475 if ((ctxt->sax != NULL) &&
8476 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8477 ctxt->sax->endElement(ctxt->userData, name);
8478 xmlFree(name);
8479 oldname = namePop(ctxt);
8480 spacePop(ctxt);
8481 if (oldname != NULL) {
8482#ifdef DEBUG_STACK
8483 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8484#endif
8485 xmlFree(oldname);
8486 }
8487 if (ctxt->name == NULL) {
8488 ctxt->instate = XML_PARSER_EPILOG;
8489#ifdef DEBUG_PUSH
8490 xmlGenericError(xmlGenericErrorContext,
8491 "PP: entering EPILOG\n");
8492#endif
8493 } else {
8494 ctxt->instate = XML_PARSER_CONTENT;
8495#ifdef DEBUG_PUSH
8496 xmlGenericError(xmlGenericErrorContext,
8497 "PP: entering CONTENT\n");
8498#endif
8499 }
8500 break;
8501 }
8502 if (RAW == '>') {
8503 NEXT;
8504 } else {
8505 ctxt->errNo = XML_ERR_GT_REQUIRED;
8506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8507 ctxt->sax->error(ctxt->userData,
8508 "Couldn't find end of Start Tag %s\n",
8509 name);
8510 ctxt->wellFormed = 0;
8511 ctxt->disableSAX = 1;
8512
8513 /*
8514 * end of parsing of this node.
8515 */
8516 nodePop(ctxt);
8517 oldname = namePop(ctxt);
8518 spacePop(ctxt);
8519 if (oldname != NULL) {
8520#ifdef DEBUG_STACK
8521 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8522#endif
8523 xmlFree(oldname);
8524 }
8525 }
8526 xmlFree(name);
8527 ctxt->instate = XML_PARSER_CONTENT;
8528#ifdef DEBUG_PUSH
8529 xmlGenericError(xmlGenericErrorContext,
8530 "PP: entering CONTENT\n");
8531#endif
8532 break;
8533 }
8534 case XML_PARSER_CONTENT: {
8535 const xmlChar *test;
8536 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008537 if ((avail < 2) && (ctxt->inputNr == 1))
8538 goto done;
8539 cur = ctxt->input->cur[0];
8540 next = ctxt->input->cur[1];
8541
8542 test = CUR_PTR;
8543 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008544 if ((cur == '<') && (next == '?')) {
8545 if ((!terminate) &&
8546 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8547 goto done;
8548#ifdef DEBUG_PUSH
8549 xmlGenericError(xmlGenericErrorContext,
8550 "PP: Parsing PI\n");
8551#endif
8552 xmlParsePI(ctxt);
8553 } else if ((cur == '<') && (next == '!') &&
8554 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8555 if ((!terminate) &&
8556 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8557 goto done;
8558#ifdef DEBUG_PUSH
8559 xmlGenericError(xmlGenericErrorContext,
8560 "PP: Parsing Comment\n");
8561#endif
8562 xmlParseComment(ctxt);
8563 ctxt->instate = XML_PARSER_CONTENT;
8564 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8565 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8566 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8567 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8568 (ctxt->input->cur[8] == '[')) {
8569 SKIP(9);
8570 ctxt->instate = XML_PARSER_CDATA_SECTION;
8571#ifdef DEBUG_PUSH
8572 xmlGenericError(xmlGenericErrorContext,
8573 "PP: entering CDATA_SECTION\n");
8574#endif
8575 break;
8576 } else if ((cur == '<') && (next == '!') &&
8577 (avail < 9)) {
8578 goto done;
8579 } else if ((cur == '<') && (next == '/')) {
8580 ctxt->instate = XML_PARSER_END_TAG;
8581#ifdef DEBUG_PUSH
8582 xmlGenericError(xmlGenericErrorContext,
8583 "PP: entering END_TAG\n");
8584#endif
8585 break;
8586 } else if (cur == '<') {
8587 ctxt->instate = XML_PARSER_START_TAG;
8588#ifdef DEBUG_PUSH
8589 xmlGenericError(xmlGenericErrorContext,
8590 "PP: entering START_TAG\n");
8591#endif
8592 break;
8593 } else if (cur == '&') {
8594 if ((!terminate) &&
8595 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8596 goto done;
8597#ifdef DEBUG_PUSH
8598 xmlGenericError(xmlGenericErrorContext,
8599 "PP: Parsing Reference\n");
8600#endif
8601 xmlParseReference(ctxt);
8602 } else {
8603 /* TODO Avoid the extra copy, handle directly !!! */
8604 /*
8605 * Goal of the following test is:
8606 * - minimize calls to the SAX 'character' callback
8607 * when they are mergeable
8608 * - handle an problem for isBlank when we only parse
8609 * a sequence of blank chars and the next one is
8610 * not available to check against '<' presence.
8611 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008612 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008613 * of the parser.
8614 */
8615 if ((ctxt->inputNr == 1) &&
8616 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8617 if ((!terminate) &&
8618 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8619 goto done;
8620 }
8621 ctxt->checkIndex = 0;
8622#ifdef DEBUG_PUSH
8623 xmlGenericError(xmlGenericErrorContext,
8624 "PP: Parsing char data\n");
8625#endif
8626 xmlParseCharData(ctxt, 0);
8627 }
8628 /*
8629 * Pop-up of finished entities.
8630 */
8631 while ((RAW == 0) && (ctxt->inputNr > 1))
8632 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008633 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008634 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8636 ctxt->sax->error(ctxt->userData,
8637 "detected an error in element content\n");
8638 ctxt->wellFormed = 0;
8639 ctxt->disableSAX = 1;
8640 ctxt->instate = XML_PARSER_EOF;
8641 break;
8642 }
8643 break;
8644 }
8645 case XML_PARSER_CDATA_SECTION: {
8646 /*
8647 * The Push mode need to have the SAX callback for
8648 * cdataBlock merge back contiguous callbacks.
8649 */
8650 int base;
8651
8652 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8653 if (base < 0) {
8654 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8655 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8656 if (ctxt->sax->cdataBlock != NULL)
8657 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8658 XML_PARSER_BIG_BUFFER_SIZE);
8659 }
8660 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8661 ctxt->checkIndex = 0;
8662 }
8663 goto done;
8664 } else {
8665 if ((ctxt->sax != NULL) && (base > 0) &&
8666 (!ctxt->disableSAX)) {
8667 if (ctxt->sax->cdataBlock != NULL)
8668 ctxt->sax->cdataBlock(ctxt->userData,
8669 ctxt->input->cur, base);
8670 }
8671 SKIP(base + 3);
8672 ctxt->checkIndex = 0;
8673 ctxt->instate = XML_PARSER_CONTENT;
8674#ifdef DEBUG_PUSH
8675 xmlGenericError(xmlGenericErrorContext,
8676 "PP: entering CONTENT\n");
8677#endif
8678 }
8679 break;
8680 }
8681 case XML_PARSER_END_TAG:
8682 if (avail < 2)
8683 goto done;
8684 if ((!terminate) &&
8685 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8686 goto done;
8687 xmlParseEndTag(ctxt);
8688 if (ctxt->name == NULL) {
8689 ctxt->instate = XML_PARSER_EPILOG;
8690#ifdef DEBUG_PUSH
8691 xmlGenericError(xmlGenericErrorContext,
8692 "PP: entering EPILOG\n");
8693#endif
8694 } else {
8695 ctxt->instate = XML_PARSER_CONTENT;
8696#ifdef DEBUG_PUSH
8697 xmlGenericError(xmlGenericErrorContext,
8698 "PP: entering CONTENT\n");
8699#endif
8700 }
8701 break;
8702 case XML_PARSER_DTD: {
8703 /*
8704 * Sorry but progressive parsing of the internal subset
8705 * is not expected to be supported. We first check that
8706 * the full content of the internal subset is available and
8707 * the parsing is launched only at that point.
8708 * Internal subset ends up with "']' S? '>'" in an unescaped
8709 * section and not in a ']]>' sequence which are conditional
8710 * sections (whoever argued to keep that crap in XML deserve
8711 * a place in hell !).
8712 */
8713 int base, i;
8714 xmlChar *buf;
8715 xmlChar quote = 0;
8716
8717 base = ctxt->input->cur - ctxt->input->base;
8718 if (base < 0) return(0);
8719 if (ctxt->checkIndex > base)
8720 base = ctxt->checkIndex;
8721 buf = ctxt->input->buf->buffer->content;
8722 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8723 base++) {
8724 if (quote != 0) {
8725 if (buf[base] == quote)
8726 quote = 0;
8727 continue;
8728 }
8729 if (buf[base] == '"') {
8730 quote = '"';
8731 continue;
8732 }
8733 if (buf[base] == '\'') {
8734 quote = '\'';
8735 continue;
8736 }
8737 if (buf[base] == ']') {
8738 if ((unsigned int) base +1 >=
8739 ctxt->input->buf->buffer->use)
8740 break;
8741 if (buf[base + 1] == ']') {
8742 /* conditional crap, skip both ']' ! */
8743 base++;
8744 continue;
8745 }
8746 for (i = 0;
8747 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8748 i++) {
8749 if (buf[base + i] == '>')
8750 goto found_end_int_subset;
8751 }
8752 break;
8753 }
8754 }
8755 /*
8756 * We didn't found the end of the Internal subset
8757 */
8758 if (quote == 0)
8759 ctxt->checkIndex = base;
8760#ifdef DEBUG_PUSH
8761 if (next == 0)
8762 xmlGenericError(xmlGenericErrorContext,
8763 "PP: lookup of int subset end filed\n");
8764#endif
8765 goto done;
8766
8767found_end_int_subset:
8768 xmlParseInternalSubset(ctxt);
8769 ctxt->inSubset = 2;
8770 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8771 (ctxt->sax->externalSubset != NULL))
8772 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8773 ctxt->extSubSystem, ctxt->extSubURI);
8774 ctxt->inSubset = 0;
8775 ctxt->instate = XML_PARSER_PROLOG;
8776 ctxt->checkIndex = 0;
8777#ifdef DEBUG_PUSH
8778 xmlGenericError(xmlGenericErrorContext,
8779 "PP: entering PROLOG\n");
8780#endif
8781 break;
8782 }
8783 case XML_PARSER_COMMENT:
8784 xmlGenericError(xmlGenericErrorContext,
8785 "PP: internal error, state == COMMENT\n");
8786 ctxt->instate = XML_PARSER_CONTENT;
8787#ifdef DEBUG_PUSH
8788 xmlGenericError(xmlGenericErrorContext,
8789 "PP: entering CONTENT\n");
8790#endif
8791 break;
8792 case XML_PARSER_PI:
8793 xmlGenericError(xmlGenericErrorContext,
8794 "PP: internal error, state == PI\n");
8795 ctxt->instate = XML_PARSER_CONTENT;
8796#ifdef DEBUG_PUSH
8797 xmlGenericError(xmlGenericErrorContext,
8798 "PP: entering CONTENT\n");
8799#endif
8800 break;
8801 case XML_PARSER_ENTITY_DECL:
8802 xmlGenericError(xmlGenericErrorContext,
8803 "PP: internal error, state == ENTITY_DECL\n");
8804 ctxt->instate = XML_PARSER_DTD;
8805#ifdef DEBUG_PUSH
8806 xmlGenericError(xmlGenericErrorContext,
8807 "PP: entering DTD\n");
8808#endif
8809 break;
8810 case XML_PARSER_ENTITY_VALUE:
8811 xmlGenericError(xmlGenericErrorContext,
8812 "PP: internal error, state == ENTITY_VALUE\n");
8813 ctxt->instate = XML_PARSER_CONTENT;
8814#ifdef DEBUG_PUSH
8815 xmlGenericError(xmlGenericErrorContext,
8816 "PP: entering DTD\n");
8817#endif
8818 break;
8819 case XML_PARSER_ATTRIBUTE_VALUE:
8820 xmlGenericError(xmlGenericErrorContext,
8821 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8822 ctxt->instate = XML_PARSER_START_TAG;
8823#ifdef DEBUG_PUSH
8824 xmlGenericError(xmlGenericErrorContext,
8825 "PP: entering START_TAG\n");
8826#endif
8827 break;
8828 case XML_PARSER_SYSTEM_LITERAL:
8829 xmlGenericError(xmlGenericErrorContext,
8830 "PP: internal error, state == SYSTEM_LITERAL\n");
8831 ctxt->instate = XML_PARSER_START_TAG;
8832#ifdef DEBUG_PUSH
8833 xmlGenericError(xmlGenericErrorContext,
8834 "PP: entering START_TAG\n");
8835#endif
8836 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008837 case XML_PARSER_PUBLIC_LITERAL:
8838 xmlGenericError(xmlGenericErrorContext,
8839 "PP: internal error, state == PUBLIC_LITERAL\n");
8840 ctxt->instate = XML_PARSER_START_TAG;
8841#ifdef DEBUG_PUSH
8842 xmlGenericError(xmlGenericErrorContext,
8843 "PP: entering START_TAG\n");
8844#endif
8845 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008846 }
8847 }
8848done:
8849#ifdef DEBUG_PUSH
8850 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8851#endif
8852 return(ret);
8853}
8854
8855/**
Owen Taylor3473f882001-02-23 17:55:21 +00008856 * xmlParseChunk:
8857 * @ctxt: an XML parser context
8858 * @chunk: an char array
8859 * @size: the size in byte of the chunk
8860 * @terminate: last chunk indicator
8861 *
8862 * Parse a Chunk of memory
8863 *
8864 * Returns zero if no error, the xmlParserErrors otherwise.
8865 */
8866int
8867xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8868 int terminate) {
8869 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8870 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8871 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8872 int cur = ctxt->input->cur - ctxt->input->base;
8873
8874 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8875 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8876 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008877 ctxt->input->end =
8878 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008879#ifdef DEBUG_PUSH
8880 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8881#endif
8882
8883 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8884 xmlParseTryOrFinish(ctxt, terminate);
8885 } else if (ctxt->instate != XML_PARSER_EOF) {
8886 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8887 xmlParserInputBufferPtr in = ctxt->input->buf;
8888 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8889 (in->raw != NULL)) {
8890 int nbchars;
8891
8892 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8893 if (nbchars < 0) {
8894 xmlGenericError(xmlGenericErrorContext,
8895 "xmlParseChunk: encoder error\n");
8896 return(XML_ERR_INVALID_ENCODING);
8897 }
8898 }
8899 }
8900 }
8901 xmlParseTryOrFinish(ctxt, terminate);
8902 if (terminate) {
8903 /*
8904 * Check for termination
8905 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00008906 int avail = 0;
8907 if (ctxt->input->buf == NULL)
8908 avail = ctxt->input->length -
8909 (ctxt->input->cur - ctxt->input->base);
8910 else
8911 avail = ctxt->input->buf->buffer->use -
8912 (ctxt->input->cur - ctxt->input->base);
8913
Owen Taylor3473f882001-02-23 17:55:21 +00008914 if ((ctxt->instate != XML_PARSER_EOF) &&
8915 (ctxt->instate != XML_PARSER_EPILOG)) {
8916 ctxt->errNo = XML_ERR_DOCUMENT_END;
8917 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8918 ctxt->sax->error(ctxt->userData,
8919 "Extra content at the end of the document\n");
8920 ctxt->wellFormed = 0;
8921 ctxt->disableSAX = 1;
8922 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00008923 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
8924 ctxt->errNo = XML_ERR_DOCUMENT_END;
8925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8926 ctxt->sax->error(ctxt->userData,
8927 "Extra content at the end of the document\n");
8928 ctxt->wellFormed = 0;
8929 ctxt->disableSAX = 1;
8930
8931 }
Owen Taylor3473f882001-02-23 17:55:21 +00008932 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008933 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008934 ctxt->sax->endDocument(ctxt->userData);
8935 }
8936 ctxt->instate = XML_PARSER_EOF;
8937 }
8938 return((xmlParserErrors) ctxt->errNo);
8939}
8940
8941/************************************************************************
8942 * *
8943 * I/O front end functions to the parser *
8944 * *
8945 ************************************************************************/
8946
8947/**
8948 * xmlStopParser:
8949 * @ctxt: an XML parser context
8950 *
8951 * Blocks further parser processing
8952 */
8953void
8954xmlStopParser(xmlParserCtxtPtr ctxt) {
8955 ctxt->instate = XML_PARSER_EOF;
8956 if (ctxt->input != NULL)
8957 ctxt->input->cur = BAD_CAST"";
8958}
8959
8960/**
8961 * xmlCreatePushParserCtxt:
8962 * @sax: a SAX handler
8963 * @user_data: The user data returned on SAX callbacks
8964 * @chunk: a pointer to an array of chars
8965 * @size: number of chars in the array
8966 * @filename: an optional file name or URI
8967 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00008968 * Create a parser context for using the XML parser in push mode.
8969 * If @buffer and @size are non-NULL, the data is used to detect
8970 * the encoding. The remaining characters will be parsed so they
8971 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00008972 * To allow content encoding detection, @size should be >= 4
8973 * The value of @filename is used for fetching external entities
8974 * and error/warning reports.
8975 *
8976 * Returns the new parser context or NULL
8977 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00008978
Owen Taylor3473f882001-02-23 17:55:21 +00008979xmlParserCtxtPtr
8980xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8981 const char *chunk, int size, const char *filename) {
8982 xmlParserCtxtPtr ctxt;
8983 xmlParserInputPtr inputStream;
8984 xmlParserInputBufferPtr buf;
8985 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8986
8987 /*
8988 * plug some encoding conversion routines
8989 */
8990 if ((chunk != NULL) && (size >= 4))
8991 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8992
8993 buf = xmlAllocParserInputBuffer(enc);
8994 if (buf == NULL) return(NULL);
8995
8996 ctxt = xmlNewParserCtxt();
8997 if (ctxt == NULL) {
8998 xmlFree(buf);
8999 return(NULL);
9000 }
9001 if (sax != NULL) {
9002 if (ctxt->sax != &xmlDefaultSAXHandler)
9003 xmlFree(ctxt->sax);
9004 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9005 if (ctxt->sax == NULL) {
9006 xmlFree(buf);
9007 xmlFree(ctxt);
9008 return(NULL);
9009 }
9010 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9011 if (user_data != NULL)
9012 ctxt->userData = user_data;
9013 }
9014 if (filename == NULL) {
9015 ctxt->directory = NULL;
9016 } else {
9017 ctxt->directory = xmlParserGetDirectory(filename);
9018 }
9019
9020 inputStream = xmlNewInputStream(ctxt);
9021 if (inputStream == NULL) {
9022 xmlFreeParserCtxt(ctxt);
9023 return(NULL);
9024 }
9025
9026 if (filename == NULL)
9027 inputStream->filename = NULL;
9028 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009029 inputStream->filename = (char *)
9030 xmlNormalizeWindowsPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009031 inputStream->buf = buf;
9032 inputStream->base = inputStream->buf->buffer->content;
9033 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009034 inputStream->end =
9035 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009036
9037 inputPush(ctxt, inputStream);
9038
9039 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9040 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009041 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9042 int cur = ctxt->input->cur - ctxt->input->base;
9043
Owen Taylor3473f882001-02-23 17:55:21 +00009044 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009045
9046 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9047 ctxt->input->cur = ctxt->input->base + cur;
9048 ctxt->input->end =
9049 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009050#ifdef DEBUG_PUSH
9051 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9052#endif
9053 }
9054
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009055 if (enc != XML_CHAR_ENCODING_NONE) {
9056 xmlSwitchEncoding(ctxt, enc);
9057 }
9058
Owen Taylor3473f882001-02-23 17:55:21 +00009059 return(ctxt);
9060}
9061
9062/**
9063 * xmlCreateIOParserCtxt:
9064 * @sax: a SAX handler
9065 * @user_data: The user data returned on SAX callbacks
9066 * @ioread: an I/O read function
9067 * @ioclose: an I/O close function
9068 * @ioctx: an I/O handler
9069 * @enc: the charset encoding if known
9070 *
9071 * Create a parser context for using the XML parser with an existing
9072 * I/O stream
9073 *
9074 * Returns the new parser context or NULL
9075 */
9076xmlParserCtxtPtr
9077xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9078 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9079 void *ioctx, xmlCharEncoding enc) {
9080 xmlParserCtxtPtr ctxt;
9081 xmlParserInputPtr inputStream;
9082 xmlParserInputBufferPtr buf;
9083
9084 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9085 if (buf == NULL) return(NULL);
9086
9087 ctxt = xmlNewParserCtxt();
9088 if (ctxt == NULL) {
9089 xmlFree(buf);
9090 return(NULL);
9091 }
9092 if (sax != NULL) {
9093 if (ctxt->sax != &xmlDefaultSAXHandler)
9094 xmlFree(ctxt->sax);
9095 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9096 if (ctxt->sax == NULL) {
9097 xmlFree(buf);
9098 xmlFree(ctxt);
9099 return(NULL);
9100 }
9101 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9102 if (user_data != NULL)
9103 ctxt->userData = user_data;
9104 }
9105
9106 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9107 if (inputStream == NULL) {
9108 xmlFreeParserCtxt(ctxt);
9109 return(NULL);
9110 }
9111 inputPush(ctxt, inputStream);
9112
9113 return(ctxt);
9114}
9115
9116/************************************************************************
9117 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009118 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009119 * *
9120 ************************************************************************/
9121
9122/**
9123 * xmlIOParseDTD:
9124 * @sax: the SAX handler block or NULL
9125 * @input: an Input Buffer
9126 * @enc: the charset encoding if known
9127 *
9128 * Load and parse a DTD
9129 *
9130 * Returns the resulting xmlDtdPtr or NULL in case of error.
9131 * @input will be freed at parsing end.
9132 */
9133
9134xmlDtdPtr
9135xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9136 xmlCharEncoding enc) {
9137 xmlDtdPtr ret = NULL;
9138 xmlParserCtxtPtr ctxt;
9139 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009140 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009141
9142 if (input == NULL)
9143 return(NULL);
9144
9145 ctxt = xmlNewParserCtxt();
9146 if (ctxt == NULL) {
9147 return(NULL);
9148 }
9149
9150 /*
9151 * Set-up the SAX context
9152 */
9153 if (sax != NULL) {
9154 if (ctxt->sax != NULL)
9155 xmlFree(ctxt->sax);
9156 ctxt->sax = sax;
9157 ctxt->userData = NULL;
9158 }
9159
9160 /*
9161 * generate a parser input from the I/O handler
9162 */
9163
9164 pinput = xmlNewIOInputStream(ctxt, input, enc);
9165 if (pinput == NULL) {
9166 if (sax != NULL) ctxt->sax = NULL;
9167 xmlFreeParserCtxt(ctxt);
9168 return(NULL);
9169 }
9170
9171 /*
9172 * plug some encoding conversion routines here.
9173 */
9174 xmlPushInput(ctxt, pinput);
9175
9176 pinput->filename = NULL;
9177 pinput->line = 1;
9178 pinput->col = 1;
9179 pinput->base = ctxt->input->cur;
9180 pinput->cur = ctxt->input->cur;
9181 pinput->free = NULL;
9182
9183 /*
9184 * let's parse that entity knowing it's an external subset.
9185 */
9186 ctxt->inSubset = 2;
9187 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9188 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9189 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009190
9191 if (enc == XML_CHAR_ENCODING_NONE) {
9192 /*
9193 * Get the 4 first bytes and decode the charset
9194 * if enc != XML_CHAR_ENCODING_NONE
9195 * plug some encoding conversion routines.
9196 */
9197 start[0] = RAW;
9198 start[1] = NXT(1);
9199 start[2] = NXT(2);
9200 start[3] = NXT(3);
9201 enc = xmlDetectCharEncoding(start, 4);
9202 if (enc != XML_CHAR_ENCODING_NONE) {
9203 xmlSwitchEncoding(ctxt, enc);
9204 }
9205 }
9206
Owen Taylor3473f882001-02-23 17:55:21 +00009207 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9208
9209 if (ctxt->myDoc != NULL) {
9210 if (ctxt->wellFormed) {
9211 ret = ctxt->myDoc->extSubset;
9212 ctxt->myDoc->extSubset = NULL;
9213 } else {
9214 ret = NULL;
9215 }
9216 xmlFreeDoc(ctxt->myDoc);
9217 ctxt->myDoc = NULL;
9218 }
9219 if (sax != NULL) ctxt->sax = NULL;
9220 xmlFreeParserCtxt(ctxt);
9221
9222 return(ret);
9223}
9224
9225/**
9226 * xmlSAXParseDTD:
9227 * @sax: the SAX handler block
9228 * @ExternalID: a NAME* containing the External ID of the DTD
9229 * @SystemID: a NAME* containing the URL to the DTD
9230 *
9231 * Load and parse an external subset.
9232 *
9233 * Returns the resulting xmlDtdPtr or NULL in case of error.
9234 */
9235
9236xmlDtdPtr
9237xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9238 const xmlChar *SystemID) {
9239 xmlDtdPtr ret = NULL;
9240 xmlParserCtxtPtr ctxt;
9241 xmlParserInputPtr input = NULL;
9242 xmlCharEncoding enc;
9243
9244 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9245
9246 ctxt = xmlNewParserCtxt();
9247 if (ctxt == NULL) {
9248 return(NULL);
9249 }
9250
9251 /*
9252 * Set-up the SAX context
9253 */
9254 if (sax != NULL) {
9255 if (ctxt->sax != NULL)
9256 xmlFree(ctxt->sax);
9257 ctxt->sax = sax;
9258 ctxt->userData = NULL;
9259 }
9260
9261 /*
9262 * Ask the Entity resolver to load the damn thing
9263 */
9264
9265 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9266 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9267 if (input == NULL) {
9268 if (sax != NULL) ctxt->sax = NULL;
9269 xmlFreeParserCtxt(ctxt);
9270 return(NULL);
9271 }
9272
9273 /*
9274 * plug some encoding conversion routines here.
9275 */
9276 xmlPushInput(ctxt, input);
9277 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9278 xmlSwitchEncoding(ctxt, enc);
9279
9280 if (input->filename == NULL)
9281 input->filename = (char *) xmlStrdup(SystemID);
9282 input->line = 1;
9283 input->col = 1;
9284 input->base = ctxt->input->cur;
9285 input->cur = ctxt->input->cur;
9286 input->free = NULL;
9287
9288 /*
9289 * let's parse that entity knowing it's an external subset.
9290 */
9291 ctxt->inSubset = 2;
9292 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9293 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9294 ExternalID, SystemID);
9295 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9296
9297 if (ctxt->myDoc != NULL) {
9298 if (ctxt->wellFormed) {
9299 ret = ctxt->myDoc->extSubset;
9300 ctxt->myDoc->extSubset = NULL;
9301 } else {
9302 ret = NULL;
9303 }
9304 xmlFreeDoc(ctxt->myDoc);
9305 ctxt->myDoc = NULL;
9306 }
9307 if (sax != NULL) ctxt->sax = NULL;
9308 xmlFreeParserCtxt(ctxt);
9309
9310 return(ret);
9311}
9312
9313/**
9314 * xmlParseDTD:
9315 * @ExternalID: a NAME* containing the External ID of the DTD
9316 * @SystemID: a NAME* containing the URL to the DTD
9317 *
9318 * Load and parse an external subset.
9319 *
9320 * Returns the resulting xmlDtdPtr or NULL in case of error.
9321 */
9322
9323xmlDtdPtr
9324xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9325 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9326}
9327
9328/************************************************************************
9329 * *
9330 * Front ends when parsing an Entity *
9331 * *
9332 ************************************************************************/
9333
9334/**
Owen Taylor3473f882001-02-23 17:55:21 +00009335 * xmlParseCtxtExternalEntity:
9336 * @ctx: the existing parsing context
9337 * @URL: the URL for the entity to load
9338 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009339 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009340 *
9341 * Parse an external general entity within an existing parsing context
9342 * An external general parsed entity is well-formed if it matches the
9343 * production labeled extParsedEnt.
9344 *
9345 * [78] extParsedEnt ::= TextDecl? content
9346 *
9347 * Returns 0 if the entity is well formed, -1 in case of args problem and
9348 * the parser error code otherwise
9349 */
9350
9351int
9352xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009353 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009354 xmlParserCtxtPtr ctxt;
9355 xmlDocPtr newDoc;
9356 xmlSAXHandlerPtr oldsax = NULL;
9357 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009358 xmlChar start[4];
9359 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009360
9361 if (ctx->depth > 40) {
9362 return(XML_ERR_ENTITY_LOOP);
9363 }
9364
Daniel Veillardcda96922001-08-21 10:56:31 +00009365 if (lst != NULL)
9366 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009367 if ((URL == NULL) && (ID == NULL))
9368 return(-1);
9369 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9370 return(-1);
9371
9372
9373 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9374 if (ctxt == NULL) return(-1);
9375 ctxt->userData = ctxt;
9376 oldsax = ctxt->sax;
9377 ctxt->sax = ctx->sax;
9378 newDoc = xmlNewDoc(BAD_CAST "1.0");
9379 if (newDoc == NULL) {
9380 xmlFreeParserCtxt(ctxt);
9381 return(-1);
9382 }
9383 if (ctx->myDoc != NULL) {
9384 newDoc->intSubset = ctx->myDoc->intSubset;
9385 newDoc->extSubset = ctx->myDoc->extSubset;
9386 }
9387 if (ctx->myDoc->URL != NULL) {
9388 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9389 }
9390 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9391 if (newDoc->children == NULL) {
9392 ctxt->sax = oldsax;
9393 xmlFreeParserCtxt(ctxt);
9394 newDoc->intSubset = NULL;
9395 newDoc->extSubset = NULL;
9396 xmlFreeDoc(newDoc);
9397 return(-1);
9398 }
9399 nodePush(ctxt, newDoc->children);
9400 if (ctx->myDoc == NULL) {
9401 ctxt->myDoc = newDoc;
9402 } else {
9403 ctxt->myDoc = ctx->myDoc;
9404 newDoc->children->doc = ctx->myDoc;
9405 }
9406
Daniel Veillard87a764e2001-06-20 17:41:10 +00009407 /*
9408 * Get the 4 first bytes and decode the charset
9409 * if enc != XML_CHAR_ENCODING_NONE
9410 * plug some encoding conversion routines.
9411 */
9412 GROW
9413 start[0] = RAW;
9414 start[1] = NXT(1);
9415 start[2] = NXT(2);
9416 start[3] = NXT(3);
9417 enc = xmlDetectCharEncoding(start, 4);
9418 if (enc != XML_CHAR_ENCODING_NONE) {
9419 xmlSwitchEncoding(ctxt, enc);
9420 }
9421
Owen Taylor3473f882001-02-23 17:55:21 +00009422 /*
9423 * Parse a possible text declaration first
9424 */
Owen Taylor3473f882001-02-23 17:55:21 +00009425 if ((RAW == '<') && (NXT(1) == '?') &&
9426 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9427 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9428 xmlParseTextDecl(ctxt);
9429 }
9430
9431 /*
9432 * Doing validity checking on chunk doesn't make sense
9433 */
9434 ctxt->instate = XML_PARSER_CONTENT;
9435 ctxt->validate = ctx->validate;
9436 ctxt->loadsubset = ctx->loadsubset;
9437 ctxt->depth = ctx->depth + 1;
9438 ctxt->replaceEntities = ctx->replaceEntities;
9439 if (ctxt->validate) {
9440 ctxt->vctxt.error = ctx->vctxt.error;
9441 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009442 } else {
9443 ctxt->vctxt.error = NULL;
9444 ctxt->vctxt.warning = NULL;
9445 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009446 ctxt->vctxt.nodeTab = NULL;
9447 ctxt->vctxt.nodeNr = 0;
9448 ctxt->vctxt.nodeMax = 0;
9449 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009450
9451 xmlParseContent(ctxt);
9452
9453 if ((RAW == '<') && (NXT(1) == '/')) {
9454 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9456 ctxt->sax->error(ctxt->userData,
9457 "chunk is not well balanced\n");
9458 ctxt->wellFormed = 0;
9459 ctxt->disableSAX = 1;
9460 } else if (RAW != 0) {
9461 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9463 ctxt->sax->error(ctxt->userData,
9464 "extra content at the end of well balanced chunk\n");
9465 ctxt->wellFormed = 0;
9466 ctxt->disableSAX = 1;
9467 }
9468 if (ctxt->node != newDoc->children) {
9469 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9471 ctxt->sax->error(ctxt->userData,
9472 "chunk is not well balanced\n");
9473 ctxt->wellFormed = 0;
9474 ctxt->disableSAX = 1;
9475 }
9476
9477 if (!ctxt->wellFormed) {
9478 if (ctxt->errNo == 0)
9479 ret = 1;
9480 else
9481 ret = ctxt->errNo;
9482 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009483 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009484 xmlNodePtr cur;
9485
9486 /*
9487 * Return the newly created nodeset after unlinking it from
9488 * they pseudo parent.
9489 */
9490 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009491 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009492 while (cur != NULL) {
9493 cur->parent = NULL;
9494 cur = cur->next;
9495 }
9496 newDoc->children->children = NULL;
9497 }
9498 ret = 0;
9499 }
9500 ctxt->sax = oldsax;
9501 xmlFreeParserCtxt(ctxt);
9502 newDoc->intSubset = NULL;
9503 newDoc->extSubset = NULL;
9504 xmlFreeDoc(newDoc);
9505
9506 return(ret);
9507}
9508
9509/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009510 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009511 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009512 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009513 * @sax: the SAX handler bloc (possibly NULL)
9514 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9515 * @depth: Used for loop detection, use 0
9516 * @URL: the URL for the entity to load
9517 * @ID: the System ID for the entity to load
9518 * @list: the return value for the set of parsed nodes
9519 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009520 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009521 *
9522 * Returns 0 if the entity is well formed, -1 in case of args problem and
9523 * the parser error code otherwise
9524 */
9525
Daniel Veillard257d9102001-05-08 10:41:44 +00009526static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009527xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9528 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009529 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009530 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009531 xmlParserCtxtPtr ctxt;
9532 xmlDocPtr newDoc;
9533 xmlSAXHandlerPtr oldsax = NULL;
9534 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009535 xmlChar start[4];
9536 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009537
9538 if (depth > 40) {
9539 return(XML_ERR_ENTITY_LOOP);
9540 }
9541
9542
9543
9544 if (list != NULL)
9545 *list = NULL;
9546 if ((URL == NULL) && (ID == NULL))
9547 return(-1);
9548 if (doc == NULL) /* @@ relax but check for dereferences */
9549 return(-1);
9550
9551
9552 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9553 if (ctxt == NULL) return(-1);
9554 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009555 if (oldctxt != NULL) {
9556 ctxt->_private = oldctxt->_private;
9557 ctxt->loadsubset = oldctxt->loadsubset;
9558 ctxt->validate = oldctxt->validate;
9559 ctxt->external = oldctxt->external;
9560 } else {
9561 /*
9562 * Doing validity checking on chunk without context
9563 * doesn't make sense
9564 */
9565 ctxt->_private = NULL;
9566 ctxt->validate = 0;
9567 ctxt->external = 2;
9568 ctxt->loadsubset = 0;
9569 }
Owen Taylor3473f882001-02-23 17:55:21 +00009570 if (sax != NULL) {
9571 oldsax = ctxt->sax;
9572 ctxt->sax = sax;
9573 if (user_data != NULL)
9574 ctxt->userData = user_data;
9575 }
9576 newDoc = xmlNewDoc(BAD_CAST "1.0");
9577 if (newDoc == NULL) {
9578 xmlFreeParserCtxt(ctxt);
9579 return(-1);
9580 }
9581 if (doc != NULL) {
9582 newDoc->intSubset = doc->intSubset;
9583 newDoc->extSubset = doc->extSubset;
9584 }
9585 if (doc->URL != NULL) {
9586 newDoc->URL = xmlStrdup(doc->URL);
9587 }
9588 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9589 if (newDoc->children == NULL) {
9590 if (sax != NULL)
9591 ctxt->sax = oldsax;
9592 xmlFreeParserCtxt(ctxt);
9593 newDoc->intSubset = NULL;
9594 newDoc->extSubset = NULL;
9595 xmlFreeDoc(newDoc);
9596 return(-1);
9597 }
9598 nodePush(ctxt, newDoc->children);
9599 if (doc == NULL) {
9600 ctxt->myDoc = newDoc;
9601 } else {
9602 ctxt->myDoc = doc;
9603 newDoc->children->doc = doc;
9604 }
9605
Daniel Veillard87a764e2001-06-20 17:41:10 +00009606 /*
9607 * Get the 4 first bytes and decode the charset
9608 * if enc != XML_CHAR_ENCODING_NONE
9609 * plug some encoding conversion routines.
9610 */
9611 GROW;
9612 start[0] = RAW;
9613 start[1] = NXT(1);
9614 start[2] = NXT(2);
9615 start[3] = NXT(3);
9616 enc = xmlDetectCharEncoding(start, 4);
9617 if (enc != XML_CHAR_ENCODING_NONE) {
9618 xmlSwitchEncoding(ctxt, enc);
9619 }
9620
Owen Taylor3473f882001-02-23 17:55:21 +00009621 /*
9622 * Parse a possible text declaration first
9623 */
Owen Taylor3473f882001-02-23 17:55:21 +00009624 if ((RAW == '<') && (NXT(1) == '?') &&
9625 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9626 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9627 xmlParseTextDecl(ctxt);
9628 }
9629
Owen Taylor3473f882001-02-23 17:55:21 +00009630 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009631 ctxt->depth = depth;
9632
9633 xmlParseContent(ctxt);
9634
Daniel Veillard561b7f82002-03-20 21:55:57 +00009635 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009636 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9638 ctxt->sax->error(ctxt->userData,
9639 "chunk is not well balanced\n");
9640 ctxt->wellFormed = 0;
9641 ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009642 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009643 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9644 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9645 ctxt->sax->error(ctxt->userData,
9646 "extra content at the end of well balanced chunk\n");
9647 ctxt->wellFormed = 0;
9648 ctxt->disableSAX = 1;
9649 }
9650 if (ctxt->node != newDoc->children) {
9651 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9653 ctxt->sax->error(ctxt->userData,
9654 "chunk is not well balanced\n");
9655 ctxt->wellFormed = 0;
9656 ctxt->disableSAX = 1;
9657 }
9658
9659 if (!ctxt->wellFormed) {
9660 if (ctxt->errNo == 0)
9661 ret = 1;
9662 else
9663 ret = ctxt->errNo;
9664 } else {
9665 if (list != NULL) {
9666 xmlNodePtr cur;
9667
9668 /*
9669 * Return the newly created nodeset after unlinking it from
9670 * they pseudo parent.
9671 */
9672 cur = newDoc->children->children;
9673 *list = cur;
9674 while (cur != NULL) {
9675 cur->parent = NULL;
9676 cur = cur->next;
9677 }
9678 newDoc->children->children = NULL;
9679 }
9680 ret = 0;
9681 }
9682 if (sax != NULL)
9683 ctxt->sax = oldsax;
9684 xmlFreeParserCtxt(ctxt);
9685 newDoc->intSubset = NULL;
9686 newDoc->extSubset = NULL;
9687 xmlFreeDoc(newDoc);
9688
9689 return(ret);
9690}
9691
9692/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009693 * xmlParseExternalEntity:
9694 * @doc: the document the chunk pertains to
9695 * @sax: the SAX handler bloc (possibly NULL)
9696 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9697 * @depth: Used for loop detection, use 0
9698 * @URL: the URL for the entity to load
9699 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009700 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009701 *
9702 * Parse an external general entity
9703 * An external general parsed entity is well-formed if it matches the
9704 * production labeled extParsedEnt.
9705 *
9706 * [78] extParsedEnt ::= TextDecl? content
9707 *
9708 * Returns 0 if the entity is well formed, -1 in case of args problem and
9709 * the parser error code otherwise
9710 */
9711
9712int
9713xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009714 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009715 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009716 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009717}
9718
9719/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009720 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009721 * @doc: the document the chunk pertains to
9722 * @sax: the SAX handler bloc (possibly NULL)
9723 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9724 * @depth: Used for loop detection, use 0
9725 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009726 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009727 *
9728 * Parse a well-balanced chunk of an XML document
9729 * called by the parser
9730 * The allowed sequence for the Well Balanced Chunk is the one defined by
9731 * the content production in the XML grammar:
9732 *
9733 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9734 *
9735 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9736 * the parser error code otherwise
9737 */
9738
9739int
9740xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009741 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009742 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9743 depth, string, lst, 0 );
9744}
9745
9746/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009747 * xmlParseBalancedChunkMemoryInternal:
9748 * @oldctxt: the existing parsing context
9749 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9750 * @user_data: the user data field for the parser context
9751 * @lst: the return value for the set of parsed nodes
9752 *
9753 *
9754 * Parse a well-balanced chunk of an XML document
9755 * called by the parser
9756 * The allowed sequence for the Well Balanced Chunk is the one defined by
9757 * the content production in the XML grammar:
9758 *
9759 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9760 *
9761 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9762 * the parser error code otherwise
9763 *
9764 * In case recover is set to 1, the nodelist will not be empty even if
9765 * the parsed chunk is not well balanced.
9766 */
9767static int
9768xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9769 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9770 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009771 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009772 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009773 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009774 int size;
9775 int ret = 0;
9776
9777 if (oldctxt->depth > 40) {
9778 return(XML_ERR_ENTITY_LOOP);
9779 }
9780
9781
9782 if (lst != NULL)
9783 *lst = NULL;
9784 if (string == NULL)
9785 return(-1);
9786
9787 size = xmlStrlen(string);
9788
9789 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9790 if (ctxt == NULL) return(-1);
9791 if (user_data != NULL)
9792 ctxt->userData = user_data;
9793 else
9794 ctxt->userData = ctxt;
9795
9796 oldsax = ctxt->sax;
9797 ctxt->sax = oldctxt->sax;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009798 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009799 newDoc = xmlNewDoc(BAD_CAST "1.0");
9800 if (newDoc == NULL) {
9801 ctxt->sax = oldsax;
9802 xmlFreeParserCtxt(ctxt);
9803 return(-1);
9804 }
Daniel Veillard328f48c2002-11-15 15:24:34 +00009805 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009806 } else {
9807 ctxt->myDoc = oldctxt->myDoc;
9808 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009809 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009810 ctxt->myDoc->children = xmlNewDocNode(newDoc, NULL,
9811 BAD_CAST "pseudoroot", NULL);
9812 if (ctxt->myDoc->children == NULL) {
9813 ctxt->sax = oldsax;
9814 xmlFreeParserCtxt(ctxt);
9815 if (newDoc != NULL)
9816 xmlFreeDoc(newDoc);
9817 return(-1);
9818 }
9819 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009820 ctxt->instate = XML_PARSER_CONTENT;
9821 ctxt->depth = oldctxt->depth + 1;
9822
9823 /*
9824 * Doing validity checking on chunk doesn't make sense
9825 */
9826 ctxt->validate = 0;
9827 ctxt->loadsubset = oldctxt->loadsubset;
9828
Daniel Veillard68e9e742002-11-16 15:35:11 +00009829 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009830 if ((RAW == '<') && (NXT(1) == '/')) {
9831 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9833 ctxt->sax->error(ctxt->userData,
9834 "chunk is not well balanced\n");
9835 ctxt->wellFormed = 0;
9836 ctxt->disableSAX = 1;
9837 } else if (RAW != 0) {
9838 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9839 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9840 ctxt->sax->error(ctxt->userData,
9841 "extra content at the end of well balanced chunk\n");
9842 ctxt->wellFormed = 0;
9843 ctxt->disableSAX = 1;
9844 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009845 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +00009846 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9848 ctxt->sax->error(ctxt->userData,
9849 "chunk is not well balanced\n");
9850 ctxt->wellFormed = 0;
9851 ctxt->disableSAX = 1;
9852 }
9853
9854 if (!ctxt->wellFormed) {
9855 if (ctxt->errNo == 0)
9856 ret = 1;
9857 else
9858 ret = ctxt->errNo;
9859 } else {
9860 ret = 0;
9861 }
9862
9863 if ((lst != NULL) && (ret == 0)) {
9864 xmlNodePtr cur;
9865
9866 /*
9867 * Return the newly created nodeset after unlinking it from
9868 * they pseudo parent.
9869 */
Daniel Veillard68e9e742002-11-16 15:35:11 +00009870 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009871 *lst = cur;
9872 while (cur != NULL) {
9873 cur->parent = NULL;
9874 cur = cur->next;
9875 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009876 ctxt->myDoc->children->children = NULL;
9877 }
9878 if (ctxt->myDoc != NULL) {
9879 xmlFreeNode(ctxt->myDoc->children);
9880 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009881 }
9882
9883 ctxt->sax = oldsax;
9884 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +00009885 if (newDoc != NULL)
9886 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009887
9888 return(ret);
9889}
9890
9891/**
Daniel Veillard58e44c92002-08-02 22:19:49 +00009892 * xmlParseBalancedChunkMemoryRecover:
9893 * @doc: the document the chunk pertains to
9894 * @sax: the SAX handler bloc (possibly NULL)
9895 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9896 * @depth: Used for loop detection, use 0
9897 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9898 * @lst: the return value for the set of parsed nodes
9899 * @recover: return nodes even if the data is broken (use 0)
9900 *
9901 *
9902 * Parse a well-balanced chunk of an XML document
9903 * called by the parser
9904 * The allowed sequence for the Well Balanced Chunk is the one defined by
9905 * the content production in the XML grammar:
9906 *
9907 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9908 *
9909 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9910 * the parser error code otherwise
9911 *
9912 * In case recover is set to 1, the nodelist will not be empty even if
9913 * the parsed chunk is not well balanced.
9914 */
9915int
9916xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9917 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
9918 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +00009919 xmlParserCtxtPtr ctxt;
9920 xmlDocPtr newDoc;
9921 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +00009922 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +00009923 int size;
9924 int ret = 0;
9925
9926 if (depth > 40) {
9927 return(XML_ERR_ENTITY_LOOP);
9928 }
9929
9930
Daniel Veillardcda96922001-08-21 10:56:31 +00009931 if (lst != NULL)
9932 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009933 if (string == NULL)
9934 return(-1);
9935
9936 size = xmlStrlen(string);
9937
9938 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9939 if (ctxt == NULL) return(-1);
9940 ctxt->userData = ctxt;
9941 if (sax != NULL) {
9942 oldsax = ctxt->sax;
9943 ctxt->sax = sax;
9944 if (user_data != NULL)
9945 ctxt->userData = user_data;
9946 }
9947 newDoc = xmlNewDoc(BAD_CAST "1.0");
9948 if (newDoc == NULL) {
9949 xmlFreeParserCtxt(ctxt);
9950 return(-1);
9951 }
9952 if (doc != NULL) {
9953 newDoc->intSubset = doc->intSubset;
9954 newDoc->extSubset = doc->extSubset;
9955 }
9956 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9957 if (newDoc->children == NULL) {
9958 if (sax != NULL)
9959 ctxt->sax = oldsax;
9960 xmlFreeParserCtxt(ctxt);
9961 newDoc->intSubset = NULL;
9962 newDoc->extSubset = NULL;
9963 xmlFreeDoc(newDoc);
9964 return(-1);
9965 }
9966 nodePush(ctxt, newDoc->children);
9967 if (doc == NULL) {
9968 ctxt->myDoc = newDoc;
9969 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +00009970 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +00009971 newDoc->children->doc = doc;
9972 }
9973 ctxt->instate = XML_PARSER_CONTENT;
9974 ctxt->depth = depth;
9975
9976 /*
9977 * Doing validity checking on chunk doesn't make sense
9978 */
9979 ctxt->validate = 0;
9980 ctxt->loadsubset = 0;
9981
Daniel Veillardb39bc392002-10-26 19:29:51 +00009982 if ( doc != NULL ){
9983 content = doc->children;
9984 doc->children = NULL;
9985 xmlParseContent(ctxt);
9986 doc->children = content;
9987 }
9988 else {
9989 xmlParseContent(ctxt);
9990 }
Owen Taylor3473f882001-02-23 17:55:21 +00009991 if ((RAW == '<') && (NXT(1) == '/')) {
9992 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9994 ctxt->sax->error(ctxt->userData,
9995 "chunk is not well balanced\n");
9996 ctxt->wellFormed = 0;
9997 ctxt->disableSAX = 1;
9998 } else if (RAW != 0) {
9999 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10001 ctxt->sax->error(ctxt->userData,
10002 "extra content at the end of well balanced chunk\n");
10003 ctxt->wellFormed = 0;
10004 ctxt->disableSAX = 1;
10005 }
10006 if (ctxt->node != newDoc->children) {
10007 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10009 ctxt->sax->error(ctxt->userData,
10010 "chunk is not well balanced\n");
10011 ctxt->wellFormed = 0;
10012 ctxt->disableSAX = 1;
10013 }
10014
10015 if (!ctxt->wellFormed) {
10016 if (ctxt->errNo == 0)
10017 ret = 1;
10018 else
10019 ret = ctxt->errNo;
10020 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010021 ret = 0;
10022 }
10023
10024 if (lst != NULL && (ret == 0 || recover == 1)) {
10025 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010026
10027 /*
10028 * Return the newly created nodeset after unlinking it from
10029 * they pseudo parent.
10030 */
10031 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010032 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010033 while (cur != NULL) {
10034 cur->parent = NULL;
10035 cur = cur->next;
10036 }
10037 newDoc->children->children = NULL;
10038 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010039
Owen Taylor3473f882001-02-23 17:55:21 +000010040 if (sax != NULL)
10041 ctxt->sax = oldsax;
10042 xmlFreeParserCtxt(ctxt);
10043 newDoc->intSubset = NULL;
10044 newDoc->extSubset = NULL;
10045 xmlFreeDoc(newDoc);
10046
10047 return(ret);
10048}
10049
10050/**
10051 * xmlSAXParseEntity:
10052 * @sax: the SAX handler block
10053 * @filename: the filename
10054 *
10055 * parse an XML external entity out of context and build a tree.
10056 * It use the given SAX function block to handle the parsing callback.
10057 * If sax is NULL, fallback to the default DOM tree building routines.
10058 *
10059 * [78] extParsedEnt ::= TextDecl? content
10060 *
10061 * This correspond to a "Well Balanced" chunk
10062 *
10063 * Returns the resulting document tree
10064 */
10065
10066xmlDocPtr
10067xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10068 xmlDocPtr ret;
10069 xmlParserCtxtPtr ctxt;
10070 char *directory = NULL;
10071
10072 ctxt = xmlCreateFileParserCtxt(filename);
10073 if (ctxt == NULL) {
10074 return(NULL);
10075 }
10076 if (sax != NULL) {
10077 if (ctxt->sax != NULL)
10078 xmlFree(ctxt->sax);
10079 ctxt->sax = sax;
10080 ctxt->userData = NULL;
10081 }
10082
10083 if ((ctxt->directory == NULL) && (directory == NULL))
10084 directory = xmlParserGetDirectory(filename);
10085
10086 xmlParseExtParsedEnt(ctxt);
10087
10088 if (ctxt->wellFormed)
10089 ret = ctxt->myDoc;
10090 else {
10091 ret = NULL;
10092 xmlFreeDoc(ctxt->myDoc);
10093 ctxt->myDoc = NULL;
10094 }
10095 if (sax != NULL)
10096 ctxt->sax = NULL;
10097 xmlFreeParserCtxt(ctxt);
10098
10099 return(ret);
10100}
10101
10102/**
10103 * xmlParseEntity:
10104 * @filename: the filename
10105 *
10106 * parse an XML external entity out of context and build a tree.
10107 *
10108 * [78] extParsedEnt ::= TextDecl? content
10109 *
10110 * This correspond to a "Well Balanced" chunk
10111 *
10112 * Returns the resulting document tree
10113 */
10114
10115xmlDocPtr
10116xmlParseEntity(const char *filename) {
10117 return(xmlSAXParseEntity(NULL, filename));
10118}
10119
10120/**
10121 * xmlCreateEntityParserCtxt:
10122 * @URL: the entity URL
10123 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010124 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010125 *
10126 * Create a parser context for an external entity
10127 * Automatic support for ZLIB/Compress compressed document is provided
10128 * by default if found at compile-time.
10129 *
10130 * Returns the new parser context or NULL
10131 */
10132xmlParserCtxtPtr
10133xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10134 const xmlChar *base) {
10135 xmlParserCtxtPtr ctxt;
10136 xmlParserInputPtr inputStream;
10137 char *directory = NULL;
10138 xmlChar *uri;
10139
10140 ctxt = xmlNewParserCtxt();
10141 if (ctxt == NULL) {
10142 return(NULL);
10143 }
10144
10145 uri = xmlBuildURI(URL, base);
10146
10147 if (uri == NULL) {
10148 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10149 if (inputStream == NULL) {
10150 xmlFreeParserCtxt(ctxt);
10151 return(NULL);
10152 }
10153
10154 inputPush(ctxt, inputStream);
10155
10156 if ((ctxt->directory == NULL) && (directory == NULL))
10157 directory = xmlParserGetDirectory((char *)URL);
10158 if ((ctxt->directory == NULL) && (directory != NULL))
10159 ctxt->directory = directory;
10160 } else {
10161 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10162 if (inputStream == NULL) {
10163 xmlFree(uri);
10164 xmlFreeParserCtxt(ctxt);
10165 return(NULL);
10166 }
10167
10168 inputPush(ctxt, inputStream);
10169
10170 if ((ctxt->directory == NULL) && (directory == NULL))
10171 directory = xmlParserGetDirectory((char *)uri);
10172 if ((ctxt->directory == NULL) && (directory != NULL))
10173 ctxt->directory = directory;
10174 xmlFree(uri);
10175 }
10176
10177 return(ctxt);
10178}
10179
10180/************************************************************************
10181 * *
10182 * Front ends when parsing from a file *
10183 * *
10184 ************************************************************************/
10185
10186/**
10187 * xmlCreateFileParserCtxt:
10188 * @filename: the filename
10189 *
10190 * Create a parser context for a file content.
10191 * Automatic support for ZLIB/Compress compressed document is provided
10192 * by default if found at compile-time.
10193 *
10194 * Returns the new parser context or NULL
10195 */
10196xmlParserCtxtPtr
10197xmlCreateFileParserCtxt(const char *filename)
10198{
10199 xmlParserCtxtPtr ctxt;
10200 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010201 char *directory = NULL;
Daniel Veillardf4862f02002-09-10 11:13:43 +000010202 xmlChar *normalized;
Owen Taylor3473f882001-02-23 17:55:21 +000010203
Owen Taylor3473f882001-02-23 17:55:21 +000010204 ctxt = xmlNewParserCtxt();
10205 if (ctxt == NULL) {
10206 if (xmlDefaultSAXHandler.error != NULL) {
10207 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10208 }
10209 return(NULL);
10210 }
10211
Daniel Veillardf4862f02002-09-10 11:13:43 +000010212 normalized = xmlNormalizeWindowsPath((const xmlChar *) filename);
10213 if (normalized == NULL) {
10214 xmlFreeParserCtxt(ctxt);
10215 return(NULL);
10216 }
10217 inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010218 if (inputStream == NULL) {
10219 xmlFreeParserCtxt(ctxt);
Daniel Veillardf4862f02002-09-10 11:13:43 +000010220 xmlFree(normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010221 return(NULL);
10222 }
10223
Owen Taylor3473f882001-02-23 17:55:21 +000010224 inputPush(ctxt, inputStream);
10225 if ((ctxt->directory == NULL) && (directory == NULL))
Daniel Veillardf4862f02002-09-10 11:13:43 +000010226 directory = xmlParserGetDirectory((char *) normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010227 if ((ctxt->directory == NULL) && (directory != NULL))
10228 ctxt->directory = directory;
10229
Daniel Veillardf4862f02002-09-10 11:13:43 +000010230 xmlFree(normalized);
10231
Owen Taylor3473f882001-02-23 17:55:21 +000010232 return(ctxt);
10233}
10234
10235/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010236 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010237 * @sax: the SAX handler block
10238 * @filename: the filename
10239 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10240 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010241 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010242 *
10243 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10244 * compressed document is provided by default if found at compile-time.
10245 * It use the given SAX function block to handle the parsing callback.
10246 * If sax is NULL, fallback to the default DOM tree building routines.
10247 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010248 * User data (void *) is stored within the parser context in the
10249 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010250 *
Owen Taylor3473f882001-02-23 17:55:21 +000010251 * Returns the resulting document tree
10252 */
10253
10254xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010255xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10256 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010257 xmlDocPtr ret;
10258 xmlParserCtxtPtr ctxt;
10259 char *directory = NULL;
10260
Daniel Veillard635ef722001-10-29 11:48:19 +000010261 xmlInitParser();
10262
Owen Taylor3473f882001-02-23 17:55:21 +000010263 ctxt = xmlCreateFileParserCtxt(filename);
10264 if (ctxt == NULL) {
10265 return(NULL);
10266 }
10267 if (sax != NULL) {
10268 if (ctxt->sax != NULL)
10269 xmlFree(ctxt->sax);
10270 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010271 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010272 if (data!=NULL) {
10273 ctxt->_private=data;
10274 }
Owen Taylor3473f882001-02-23 17:55:21 +000010275
10276 if ((ctxt->directory == NULL) && (directory == NULL))
10277 directory = xmlParserGetDirectory(filename);
10278 if ((ctxt->directory == NULL) && (directory != NULL))
10279 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10280
10281 xmlParseDocument(ctxt);
10282
10283 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10284 else {
10285 ret = NULL;
10286 xmlFreeDoc(ctxt->myDoc);
10287 ctxt->myDoc = NULL;
10288 }
10289 if (sax != NULL)
10290 ctxt->sax = NULL;
10291 xmlFreeParserCtxt(ctxt);
10292
10293 return(ret);
10294}
10295
10296/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010297 * xmlSAXParseFile:
10298 * @sax: the SAX handler block
10299 * @filename: the filename
10300 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10301 * documents
10302 *
10303 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10304 * compressed document is provided by default if found at compile-time.
10305 * It use the given SAX function block to handle the parsing callback.
10306 * If sax is NULL, fallback to the default DOM tree building routines.
10307 *
10308 * Returns the resulting document tree
10309 */
10310
10311xmlDocPtr
10312xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10313 int recovery) {
10314 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10315}
10316
10317/**
Owen Taylor3473f882001-02-23 17:55:21 +000010318 * xmlRecoverDoc:
10319 * @cur: a pointer to an array of xmlChar
10320 *
10321 * parse an XML in-memory document and build a tree.
10322 * In the case the document is not Well Formed, a tree is built anyway
10323 *
10324 * Returns the resulting document tree
10325 */
10326
10327xmlDocPtr
10328xmlRecoverDoc(xmlChar *cur) {
10329 return(xmlSAXParseDoc(NULL, cur, 1));
10330}
10331
10332/**
10333 * xmlParseFile:
10334 * @filename: the filename
10335 *
10336 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10337 * compressed document is provided by default if found at compile-time.
10338 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010339 * Returns the resulting document tree if the file was wellformed,
10340 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010341 */
10342
10343xmlDocPtr
10344xmlParseFile(const char *filename) {
10345 return(xmlSAXParseFile(NULL, filename, 0));
10346}
10347
10348/**
10349 * xmlRecoverFile:
10350 * @filename: the filename
10351 *
10352 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10353 * compressed document is provided by default if found at compile-time.
10354 * In the case the document is not Well Formed, a tree is built anyway
10355 *
10356 * Returns the resulting document tree
10357 */
10358
10359xmlDocPtr
10360xmlRecoverFile(const char *filename) {
10361 return(xmlSAXParseFile(NULL, filename, 1));
10362}
10363
10364
10365/**
10366 * xmlSetupParserForBuffer:
10367 * @ctxt: an XML parser context
10368 * @buffer: a xmlChar * buffer
10369 * @filename: a file name
10370 *
10371 * Setup the parser context to parse a new buffer; Clears any prior
10372 * contents from the parser context. The buffer parameter must not be
10373 * NULL, but the filename parameter can be
10374 */
10375void
10376xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10377 const char* filename)
10378{
10379 xmlParserInputPtr input;
10380
10381 input = xmlNewInputStream(ctxt);
10382 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010383 xmlGenericError(xmlGenericErrorContext,
10384 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010385 xmlFree(ctxt);
10386 return;
10387 }
10388
10389 xmlClearParserCtxt(ctxt);
10390 if (filename != NULL)
10391 input->filename = xmlMemStrdup(filename);
10392 input->base = buffer;
10393 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010394 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010395 inputPush(ctxt, input);
10396}
10397
10398/**
10399 * xmlSAXUserParseFile:
10400 * @sax: a SAX handler
10401 * @user_data: The user data returned on SAX callbacks
10402 * @filename: a file name
10403 *
10404 * parse an XML file and call the given SAX handler routines.
10405 * Automatic support for ZLIB/Compress compressed document is provided
10406 *
10407 * Returns 0 in case of success or a error number otherwise
10408 */
10409int
10410xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10411 const char *filename) {
10412 int ret = 0;
10413 xmlParserCtxtPtr ctxt;
10414
10415 ctxt = xmlCreateFileParserCtxt(filename);
10416 if (ctxt == NULL) return -1;
10417 if (ctxt->sax != &xmlDefaultSAXHandler)
10418 xmlFree(ctxt->sax);
10419 ctxt->sax = sax;
10420 if (user_data != NULL)
10421 ctxt->userData = user_data;
10422
10423 xmlParseDocument(ctxt);
10424
10425 if (ctxt->wellFormed)
10426 ret = 0;
10427 else {
10428 if (ctxt->errNo != 0)
10429 ret = ctxt->errNo;
10430 else
10431 ret = -1;
10432 }
10433 if (sax != NULL)
10434 ctxt->sax = NULL;
10435 xmlFreeParserCtxt(ctxt);
10436
10437 return ret;
10438}
10439
10440/************************************************************************
10441 * *
10442 * Front ends when parsing from memory *
10443 * *
10444 ************************************************************************/
10445
10446/**
10447 * xmlCreateMemoryParserCtxt:
10448 * @buffer: a pointer to a char array
10449 * @size: the size of the array
10450 *
10451 * Create a parser context for an XML in-memory document.
10452 *
10453 * Returns the new parser context or NULL
10454 */
10455xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010456xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010457 xmlParserCtxtPtr ctxt;
10458 xmlParserInputPtr input;
10459 xmlParserInputBufferPtr buf;
10460
10461 if (buffer == NULL)
10462 return(NULL);
10463 if (size <= 0)
10464 return(NULL);
10465
10466 ctxt = xmlNewParserCtxt();
10467 if (ctxt == NULL)
10468 return(NULL);
10469
10470 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10471 if (buf == NULL) return(NULL);
10472
10473 input = xmlNewInputStream(ctxt);
10474 if (input == NULL) {
10475 xmlFreeParserCtxt(ctxt);
10476 return(NULL);
10477 }
10478
10479 input->filename = NULL;
10480 input->buf = buf;
10481 input->base = input->buf->buffer->content;
10482 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010483 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010484
10485 inputPush(ctxt, input);
10486 return(ctxt);
10487}
10488
10489/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010490 * xmlSAXParseMemoryWithData:
10491 * @sax: the SAX handler block
10492 * @buffer: an pointer to a char array
10493 * @size: the size of the array
10494 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10495 * documents
10496 * @data: the userdata
10497 *
10498 * parse an XML in-memory block and use the given SAX function block
10499 * to handle the parsing callback. If sax is NULL, fallback to the default
10500 * DOM tree building routines.
10501 *
10502 * User data (void *) is stored within the parser context in the
10503 * context's _private member, so it is available nearly everywhere in libxml
10504 *
10505 * Returns the resulting document tree
10506 */
10507
10508xmlDocPtr
10509xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10510 int size, int recovery, void *data) {
10511 xmlDocPtr ret;
10512 xmlParserCtxtPtr ctxt;
10513
10514 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10515 if (ctxt == NULL) return(NULL);
10516 if (sax != NULL) {
10517 if (ctxt->sax != NULL)
10518 xmlFree(ctxt->sax);
10519 ctxt->sax = sax;
10520 }
10521 if (data!=NULL) {
10522 ctxt->_private=data;
10523 }
10524
10525 xmlParseDocument(ctxt);
10526
10527 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10528 else {
10529 ret = NULL;
10530 xmlFreeDoc(ctxt->myDoc);
10531 ctxt->myDoc = NULL;
10532 }
10533 if (sax != NULL)
10534 ctxt->sax = NULL;
10535 xmlFreeParserCtxt(ctxt);
10536
10537 return(ret);
10538}
10539
10540/**
Owen Taylor3473f882001-02-23 17:55:21 +000010541 * xmlSAXParseMemory:
10542 * @sax: the SAX handler block
10543 * @buffer: an pointer to a char array
10544 * @size: the size of the array
10545 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10546 * documents
10547 *
10548 * parse an XML in-memory block and use the given SAX function block
10549 * to handle the parsing callback. If sax is NULL, fallback to the default
10550 * DOM tree building routines.
10551 *
10552 * Returns the resulting document tree
10553 */
10554xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010555xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10556 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010557 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010558}
10559
10560/**
10561 * xmlParseMemory:
10562 * @buffer: an pointer to a char array
10563 * @size: the size of the array
10564 *
10565 * parse an XML in-memory block and build a tree.
10566 *
10567 * Returns the resulting document tree
10568 */
10569
Daniel Veillard50822cb2001-07-26 20:05:51 +000010570xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010571 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10572}
10573
10574/**
10575 * xmlRecoverMemory:
10576 * @buffer: an pointer to a char array
10577 * @size: the size of the array
10578 *
10579 * parse an XML in-memory block and build a tree.
10580 * In the case the document is not Well Formed, a tree is built anyway
10581 *
10582 * Returns the resulting document tree
10583 */
10584
Daniel Veillard50822cb2001-07-26 20:05:51 +000010585xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010586 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10587}
10588
10589/**
10590 * xmlSAXUserParseMemory:
10591 * @sax: a SAX handler
10592 * @user_data: The user data returned on SAX callbacks
10593 * @buffer: an in-memory XML document input
10594 * @size: the length of the XML document in bytes
10595 *
10596 * A better SAX parsing routine.
10597 * parse an XML in-memory buffer and call the given SAX handler routines.
10598 *
10599 * Returns 0 in case of success or a error number otherwise
10600 */
10601int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010602 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010603 int ret = 0;
10604 xmlParserCtxtPtr ctxt;
10605 xmlSAXHandlerPtr oldsax = NULL;
10606
Daniel Veillard9e923512002-08-14 08:48:52 +000010607 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010608 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10609 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010610 oldsax = ctxt->sax;
10611 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010612 if (user_data != NULL)
10613 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010614
10615 xmlParseDocument(ctxt);
10616
10617 if (ctxt->wellFormed)
10618 ret = 0;
10619 else {
10620 if (ctxt->errNo != 0)
10621 ret = ctxt->errNo;
10622 else
10623 ret = -1;
10624 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010625 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010626 xmlFreeParserCtxt(ctxt);
10627
10628 return ret;
10629}
10630
10631/**
10632 * xmlCreateDocParserCtxt:
10633 * @cur: a pointer to an array of xmlChar
10634 *
10635 * Creates a parser context for an XML in-memory document.
10636 *
10637 * Returns the new parser context or NULL
10638 */
10639xmlParserCtxtPtr
10640xmlCreateDocParserCtxt(xmlChar *cur) {
10641 int len;
10642
10643 if (cur == NULL)
10644 return(NULL);
10645 len = xmlStrlen(cur);
10646 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10647}
10648
10649/**
10650 * xmlSAXParseDoc:
10651 * @sax: the SAX handler block
10652 * @cur: a pointer to an array of xmlChar
10653 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10654 * documents
10655 *
10656 * parse an XML in-memory document and build a tree.
10657 * It use the given SAX function block to handle the parsing callback.
10658 * If sax is NULL, fallback to the default DOM tree building routines.
10659 *
10660 * Returns the resulting document tree
10661 */
10662
10663xmlDocPtr
10664xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10665 xmlDocPtr ret;
10666 xmlParserCtxtPtr ctxt;
10667
10668 if (cur == NULL) return(NULL);
10669
10670
10671 ctxt = xmlCreateDocParserCtxt(cur);
10672 if (ctxt == NULL) return(NULL);
10673 if (sax != NULL) {
10674 ctxt->sax = sax;
10675 ctxt->userData = NULL;
10676 }
10677
10678 xmlParseDocument(ctxt);
10679 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10680 else {
10681 ret = NULL;
10682 xmlFreeDoc(ctxt->myDoc);
10683 ctxt->myDoc = NULL;
10684 }
10685 if (sax != NULL)
10686 ctxt->sax = NULL;
10687 xmlFreeParserCtxt(ctxt);
10688
10689 return(ret);
10690}
10691
10692/**
10693 * xmlParseDoc:
10694 * @cur: a pointer to an array of xmlChar
10695 *
10696 * parse an XML in-memory document and build a tree.
10697 *
10698 * Returns the resulting document tree
10699 */
10700
10701xmlDocPtr
10702xmlParseDoc(xmlChar *cur) {
10703 return(xmlSAXParseDoc(NULL, cur, 0));
10704}
10705
Daniel Veillard8107a222002-01-13 14:10:10 +000010706/************************************************************************
10707 * *
10708 * Specific function to keep track of entities references *
10709 * and used by the XSLT debugger *
10710 * *
10711 ************************************************************************/
10712
10713static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10714
10715/**
10716 * xmlAddEntityReference:
10717 * @ent : A valid entity
10718 * @firstNode : A valid first node for children of entity
10719 * @lastNode : A valid last node of children entity
10720 *
10721 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10722 */
10723static void
10724xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10725 xmlNodePtr lastNode)
10726{
10727 if (xmlEntityRefFunc != NULL) {
10728 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10729 }
10730}
10731
10732
10733/**
10734 * xmlSetEntityReferenceFunc:
10735 * @func : A valid function
10736 *
10737 * Set the function to call call back when a xml reference has been made
10738 */
10739void
10740xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10741{
10742 xmlEntityRefFunc = func;
10743}
Owen Taylor3473f882001-02-23 17:55:21 +000010744
10745/************************************************************************
10746 * *
10747 * Miscellaneous *
10748 * *
10749 ************************************************************************/
10750
10751#ifdef LIBXML_XPATH_ENABLED
10752#include <libxml/xpath.h>
10753#endif
10754
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010755extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010756static int xmlParserInitialized = 0;
10757
10758/**
10759 * xmlInitParser:
10760 *
10761 * Initialization function for the XML parser.
10762 * This is not reentrant. Call once before processing in case of
10763 * use in multithreaded programs.
10764 */
10765
10766void
10767xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010768 if (xmlParserInitialized != 0)
10769 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010770
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010771 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10772 (xmlGenericError == NULL))
10773 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010774 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010775 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010776 xmlInitCharEncodingHandlers();
10777 xmlInitializePredefinedEntities();
10778 xmlDefaultSAXHandlerInit();
10779 xmlRegisterDefaultInputCallbacks();
10780 xmlRegisterDefaultOutputCallbacks();
10781#ifdef LIBXML_HTML_ENABLED
10782 htmlInitAutoClose();
10783 htmlDefaultSAXHandlerInit();
10784#endif
10785#ifdef LIBXML_XPATH_ENABLED
10786 xmlXPathInit();
10787#endif
10788 xmlParserInitialized = 1;
10789}
10790
10791/**
10792 * xmlCleanupParser:
10793 *
10794 * Cleanup function for the XML parser. It tries to reclaim all
10795 * parsing related global memory allocated for the parser processing.
10796 * It doesn't deallocate any document related memory. Calling this
10797 * function should not prevent reusing the parser.
10798 */
10799
10800void
10801xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010802 xmlCleanupCharEncodingHandlers();
10803 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010804#ifdef LIBXML_CATALOG_ENABLED
10805 xmlCatalogCleanup();
10806#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010807 xmlCleanupThreads();
10808 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010809}