blob: 98d148353928f42d2b38262e6ca41f6f397c7784 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Owen Taylor3473f882001-02-23 17:55:21 +0000108/************************************************************************
109 * *
110 * Parser stacks related functions and macros *
111 * *
112 ************************************************************************/
113
114xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
115 const xmlChar ** str);
116
117/*
118 * Generic function for accessing stacks in the Parser Context
119 */
120
121#define PUSH_AND_POP(scope, type, name) \
122scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
123 if (ctxt->name##Nr >= ctxt->name##Max) { \
124 ctxt->name##Max *= 2; \
125 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
126 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
127 if (ctxt->name##Tab == NULL) { \
128 xmlGenericError(xmlGenericErrorContext, \
129 "realloc failed !\n"); \
130 return(0); \
131 } \
132 } \
133 ctxt->name##Tab[ctxt->name##Nr] = value; \
134 ctxt->name = value; \
135 return(ctxt->name##Nr++); \
136} \
137scope type name##Pop(xmlParserCtxtPtr ctxt) { \
138 type ret; \
139 if (ctxt->name##Nr <= 0) return(0); \
140 ctxt->name##Nr--; \
141 if (ctxt->name##Nr > 0) \
142 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
143 else \
144 ctxt->name = NULL; \
145 ret = ctxt->name##Tab[ctxt->name##Nr]; \
146 ctxt->name##Tab[ctxt->name##Nr] = 0; \
147 return(ret); \
148} \
149
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000150/**
151 * inputPop:
152 * @ctxt: an XML parser context
153 *
154 * Pops the top parser input from the input stack
155 *
156 * Returns the input just removed
157 */
158/**
159 * inputPush:
160 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 *
163 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000164 *
165 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000166 */
167/**
168 * namePop:
169 * @ctxt: an XML parser context
170 *
171 * Pops the top element name from the name stack
172 *
173 * Returns the name just removed
174 */
175/**
176 * namePush:
177 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 *
180 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000181 *
182 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 */
184/**
185 * nodePop:
186 * @ctxt: an XML parser context
187 *
188 * Pops the top element node from the node stack
189 *
190 * Returns the node just removed
191 */
192/**
193 * nodePush:
194 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000196 *
197 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000198 *
199 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000200 */
Owen Taylor3473f882001-02-23 17:55:21 +0000201/*
202 * Those macros actually generate the functions
203 */
204PUSH_AND_POP(extern, xmlParserInputPtr, input)
205PUSH_AND_POP(extern, xmlNodePtr, node)
206PUSH_AND_POP(extern, xmlChar*, name)
207
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000208static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000209 if (ctxt->spaceNr >= ctxt->spaceMax) {
210 ctxt->spaceMax *= 2;
211 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
212 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
213 if (ctxt->spaceTab == NULL) {
214 xmlGenericError(xmlGenericErrorContext,
215 "realloc failed !\n");
216 return(0);
217 }
218 }
219 ctxt->spaceTab[ctxt->spaceNr] = val;
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
221 return(ctxt->spaceNr++);
222}
223
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000224static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000225 int ret;
226 if (ctxt->spaceNr <= 0) return(0);
227 ctxt->spaceNr--;
228 if (ctxt->spaceNr > 0)
229 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
230 else
231 ctxt->space = NULL;
232 ret = ctxt->spaceTab[ctxt->spaceNr];
233 ctxt->spaceTab[ctxt->spaceNr] = -1;
234 return(ret);
235}
236
237/*
238 * Macros for accessing the content. Those should be used only by the parser,
239 * and not exported.
240 *
241 * Dirty macros, i.e. one often need to make assumption on the context to
242 * use them
243 *
244 * CUR_PTR return the current pointer to the xmlChar to be parsed.
245 * To be used with extreme caution since operations consuming
246 * characters may move the input buffer to a different location !
247 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
248 * This should be used internally by the parser
249 * only to compare to ASCII values otherwise it would break when
250 * running with UTF-8 encoding.
251 * RAW same as CUR but in the input buffer, bypass any token
252 * extraction that may have been done
253 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
254 * to compare on ASCII based substring.
255 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
256 * strings within the parser.
257 *
258 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
259 *
260 * NEXT Skip to the next character, this does the proper decoding
261 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000262 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000263 * CUR_CHAR(l) returns the current unicode character (int), set l
264 * to the number of xmlChars used for the encoding [0-5].
265 * CUR_SCHAR same but operate on a string instead of the context
266 * COPY_BUF copy the current unicode char to the target buffer, increment
267 * the index
268 * GROW, SHRINK handling of input buffers
269 */
270
Daniel Veillard561b7f82002-03-20 21:55:57 +0000271#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
272#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
Owen Taylor3473f882001-02-23 17:55:21 +0000273#define NXT(val) ctxt->input->cur[(val)]
274#define CUR_PTR ctxt->input->cur
275
276#define SKIP(val) do { \
277 ctxt->nbChars += (val),ctxt->input->cur += (val); \
278 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000279 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
281 xmlPopInput(ctxt); \
282 } while (0)
283
Daniel Veillard48b2f892001-02-25 16:11:03 +0000284#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000285 xmlParserInputShrink(ctxt->input); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000286 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000287 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
288 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000289 }
Owen Taylor3473f882001-02-23 17:55:21 +0000290
Daniel Veillard48b2f892001-02-25 16:11:03 +0000291#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000292 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000293 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000294 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
295 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000296 }
Owen Taylor3473f882001-02-23 17:55:21 +0000297
298#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
299
300#define NEXT xmlNextChar(ctxt)
301
Daniel Veillard21a0f912001-02-25 19:54:14 +0000302#define NEXT1 { \
303 ctxt->input->cur++; \
304 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000305 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000306 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
307 }
308
Owen Taylor3473f882001-02-23 17:55:21 +0000309#define NEXTL(l) do { \
310 if (*(ctxt->input->cur) == '\n') { \
311 ctxt->input->line++; ctxt->input->col = 1; \
312 } else ctxt->input->col++; \
313 ctxt->token = 0; ctxt->input->cur += l; \
314 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000315 } while (0)
316
317#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
318#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
319
320#define COPY_BUF(l,b,i,v) \
321 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000322 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000323
324/**
325 * xmlSkipBlankChars:
326 * @ctxt: the XML parser context
327 *
328 * skip all blanks character found at that point in the input streams.
329 * It pops up finished entities in the process if allowable at that point.
330 *
331 * Returns the number of space chars skipped
332 */
333
334int
335xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000336 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000337
Daniel Veillard02141ea2001-04-30 11:46:40 +0000338 if (ctxt->token != 0) {
339 if (!IS_BLANK(ctxt->token))
340 return(0);
341 ctxt->token = 0;
342 res++;
343 }
Owen Taylor3473f882001-02-23 17:55:21 +0000344 /*
345 * It's Okay to use CUR/NEXT here since all the blanks are on
346 * the ASCII range.
347 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000348 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
349 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000350 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000351 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000352 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000353 cur = ctxt->input->cur;
354 while (IS_BLANK(*cur)) {
355 if (*cur == '\n') {
356 ctxt->input->line++; ctxt->input->col = 1;
357 }
358 cur++;
359 res++;
360 if (*cur == 0) {
361 ctxt->input->cur = cur;
362 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
363 cur = ctxt->input->cur;
364 }
365 }
366 ctxt->input->cur = cur;
367 } else {
368 int cur;
369 do {
370 cur = CUR;
371 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
372 NEXT;
373 cur = CUR;
374 res++;
375 }
376 while ((cur == 0) && (ctxt->inputNr > 1) &&
377 (ctxt->instate != XML_PARSER_COMMENT)) {
378 xmlPopInput(ctxt);
379 cur = CUR;
380 }
381 /*
382 * Need to handle support of entities branching here
383 */
384 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
385 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
386 }
Owen Taylor3473f882001-02-23 17:55:21 +0000387 return(res);
388}
389
390/************************************************************************
391 * *
392 * Commodity functions to handle entities *
393 * *
394 ************************************************************************/
395
396/**
397 * xmlPopInput:
398 * @ctxt: an XML parser context
399 *
400 * xmlPopInput: the current input pointed by ctxt->input came to an end
401 * pop it and return the next char.
402 *
403 * Returns the current xmlChar in the parser context
404 */
405xmlChar
406xmlPopInput(xmlParserCtxtPtr ctxt) {
407 if (ctxt->inputNr == 1) return(0); /* End of main Input */
408 if (xmlParserDebugEntities)
409 xmlGenericError(xmlGenericErrorContext,
410 "Popping input %d\n", ctxt->inputNr);
411 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000412 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000413 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
414 return(xmlPopInput(ctxt));
415 return(CUR);
416}
417
418/**
419 * xmlPushInput:
420 * @ctxt: an XML parser context
421 * @input: an XML parser input fragment (entity, XML fragment ...).
422 *
423 * xmlPushInput: switch to a new input stream which is stacked on top
424 * of the previous one(s).
425 */
426void
427xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
428 if (input == NULL) return;
429
430 if (xmlParserDebugEntities) {
431 if ((ctxt->input != NULL) && (ctxt->input->filename))
432 xmlGenericError(xmlGenericErrorContext,
433 "%s(%d): ", ctxt->input->filename,
434 ctxt->input->line);
435 xmlGenericError(xmlGenericErrorContext,
436 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
437 }
438 inputPush(ctxt, input);
439 GROW;
440}
441
442/**
443 * xmlParseCharRef:
444 * @ctxt: an XML parser context
445 *
446 * parse Reference declarations
447 *
448 * [66] CharRef ::= '&#' [0-9]+ ';' |
449 * '&#x' [0-9a-fA-F]+ ';'
450 *
451 * [ WFC: Legal Character ]
452 * Characters referred to using character references must match the
453 * production for Char.
454 *
455 * Returns the value parsed (as an int), 0 in case of error
456 */
457int
458xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000459 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000460 int count = 0;
461
462 if (ctxt->token != 0) {
463 val = ctxt->token;
464 ctxt->token = 0;
465 return(val);
466 }
467 /*
468 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
469 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000470 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000471 (NXT(2) == 'x')) {
472 SKIP(3);
473 GROW;
474 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000475 if (count++ > 20) {
476 count = 0;
477 GROW;
478 }
479 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000480 val = val * 16 + (CUR - '0');
481 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
482 val = val * 16 + (CUR - 'a') + 10;
483 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
484 val = val * 16 + (CUR - 'A') + 10;
485 else {
486 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
488 ctxt->sax->error(ctxt->userData,
489 "xmlParseCharRef: invalid hexadecimal value\n");
490 ctxt->wellFormed = 0;
491 ctxt->disableSAX = 1;
492 val = 0;
493 break;
494 }
495 NEXT;
496 count++;
497 }
498 if (RAW == ';') {
499 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
500 ctxt->nbChars ++;
501 ctxt->input->cur++;
502 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000503 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000504 SKIP(2);
505 GROW;
506 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000507 if (count++ > 20) {
508 count = 0;
509 GROW;
510 }
511 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000512 val = val * 10 + (CUR - '0');
513 else {
514 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
516 ctxt->sax->error(ctxt->userData,
517 "xmlParseCharRef: invalid decimal value\n");
518 ctxt->wellFormed = 0;
519 ctxt->disableSAX = 1;
520 val = 0;
521 break;
522 }
523 NEXT;
524 count++;
525 }
526 if (RAW == ';') {
527 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
528 ctxt->nbChars ++;
529 ctxt->input->cur++;
530 }
531 } else {
532 ctxt->errNo = XML_ERR_INVALID_CHARREF;
533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
534 ctxt->sax->error(ctxt->userData,
535 "xmlParseCharRef: invalid value\n");
536 ctxt->wellFormed = 0;
537 ctxt->disableSAX = 1;
538 }
539
540 /*
541 * [ WFC: Legal Character ]
542 * Characters referred to using character references must match the
543 * production for Char.
544 */
545 if (IS_CHAR(val)) {
546 return(val);
547 } else {
548 ctxt->errNo = XML_ERR_INVALID_CHAR;
549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000550 ctxt->sax->error(ctxt->userData,
551 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000552 val);
553 ctxt->wellFormed = 0;
554 ctxt->disableSAX = 1;
555 }
556 return(0);
557}
558
559/**
560 * xmlParseStringCharRef:
561 * @ctxt: an XML parser context
562 * @str: a pointer to an index in the string
563 *
564 * parse Reference declarations, variant parsing from a string rather
565 * than an an input flow.
566 *
567 * [66] CharRef ::= '&#' [0-9]+ ';' |
568 * '&#x' [0-9a-fA-F]+ ';'
569 *
570 * [ WFC: Legal Character ]
571 * Characters referred to using character references must match the
572 * production for Char.
573 *
574 * Returns the value parsed (as an int), 0 in case of error, str will be
575 * updated to the current value of the index
576 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000577static int
Owen Taylor3473f882001-02-23 17:55:21 +0000578xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
579 const xmlChar *ptr;
580 xmlChar cur;
581 int val = 0;
582
583 if ((str == NULL) || (*str == NULL)) return(0);
584 ptr = *str;
585 cur = *ptr;
586 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
587 ptr += 3;
588 cur = *ptr;
589 while (cur != ';') { /* Non input consuming loop */
590 if ((cur >= '0') && (cur <= '9'))
591 val = val * 16 + (cur - '0');
592 else if ((cur >= 'a') && (cur <= 'f'))
593 val = val * 16 + (cur - 'a') + 10;
594 else if ((cur >= 'A') && (cur <= 'F'))
595 val = val * 16 + (cur - 'A') + 10;
596 else {
597 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
599 ctxt->sax->error(ctxt->userData,
600 "xmlParseStringCharRef: invalid hexadecimal value\n");
601 ctxt->wellFormed = 0;
602 ctxt->disableSAX = 1;
603 val = 0;
604 break;
605 }
606 ptr++;
607 cur = *ptr;
608 }
609 if (cur == ';')
610 ptr++;
611 } else if ((cur == '&') && (ptr[1] == '#')){
612 ptr += 2;
613 cur = *ptr;
614 while (cur != ';') { /* Non input consuming loops */
615 if ((cur >= '0') && (cur <= '9'))
616 val = val * 10 + (cur - '0');
617 else {
618 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
620 ctxt->sax->error(ctxt->userData,
621 "xmlParseStringCharRef: invalid decimal value\n");
622 ctxt->wellFormed = 0;
623 ctxt->disableSAX = 1;
624 val = 0;
625 break;
626 }
627 ptr++;
628 cur = *ptr;
629 }
630 if (cur == ';')
631 ptr++;
632 } else {
633 ctxt->errNo = XML_ERR_INVALID_CHARREF;
634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
635 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000636 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000637 ctxt->wellFormed = 0;
638 ctxt->disableSAX = 1;
639 return(0);
640 }
641 *str = ptr;
642
643 /*
644 * [ WFC: Legal Character ]
645 * Characters referred to using character references must match the
646 * production for Char.
647 */
648 if (IS_CHAR(val)) {
649 return(val);
650 } else {
651 ctxt->errNo = XML_ERR_INVALID_CHAR;
652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
653 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000654 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000655 ctxt->wellFormed = 0;
656 ctxt->disableSAX = 1;
657 }
658 return(0);
659}
660
661/**
662 * xmlParserHandlePEReference:
663 * @ctxt: the parser context
664 *
665 * [69] PEReference ::= '%' Name ';'
666 *
667 * [ WFC: No Recursion ]
668 * A parsed entity must not contain a recursive
669 * reference to itself, either directly or indirectly.
670 *
671 * [ WFC: Entity Declared ]
672 * In a document without any DTD, a document with only an internal DTD
673 * subset which contains no parameter entity references, or a document
674 * with "standalone='yes'", ... ... The declaration of a parameter
675 * entity must precede any reference to it...
676 *
677 * [ VC: Entity Declared ]
678 * In a document with an external subset or external parameter entities
679 * with "standalone='no'", ... ... The declaration of a parameter entity
680 * must precede any reference to it...
681 *
682 * [ WFC: In DTD ]
683 * Parameter-entity references may only appear in the DTD.
684 * NOTE: misleading but this is handled.
685 *
686 * A PEReference may have been detected in the current input stream
687 * the handling is done accordingly to
688 * http://www.w3.org/TR/REC-xml#entproc
689 * i.e.
690 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000691 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000692 */
693void
694xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
695 xmlChar *name;
696 xmlEntityPtr entity = NULL;
697 xmlParserInputPtr input;
698
699 if (ctxt->token != 0) {
700 return;
701 }
702 if (RAW != '%') return;
703 switch(ctxt->instate) {
704 case XML_PARSER_CDATA_SECTION:
705 return;
706 case XML_PARSER_COMMENT:
707 return;
708 case XML_PARSER_START_TAG:
709 return;
710 case XML_PARSER_END_TAG:
711 return;
712 case XML_PARSER_EOF:
713 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
715 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
716 ctxt->wellFormed = 0;
717 ctxt->disableSAX = 1;
718 return;
719 case XML_PARSER_PROLOG:
720 case XML_PARSER_START:
721 case XML_PARSER_MISC:
722 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
724 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
725 ctxt->wellFormed = 0;
726 ctxt->disableSAX = 1;
727 return;
728 case XML_PARSER_ENTITY_DECL:
729 case XML_PARSER_CONTENT:
730 case XML_PARSER_ATTRIBUTE_VALUE:
731 case XML_PARSER_PI:
732 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000733 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000734 /* we just ignore it there */
735 return;
736 case XML_PARSER_EPILOG:
737 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
739 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
740 ctxt->wellFormed = 0;
741 ctxt->disableSAX = 1;
742 return;
743 case XML_PARSER_ENTITY_VALUE:
744 /*
745 * NOTE: in the case of entity values, we don't do the
746 * substitution here since we need the literal
747 * entity value to be able to save the internal
748 * subset of the document.
749 * This will be handled by xmlStringDecodeEntities
750 */
751 return;
752 case XML_PARSER_DTD:
753 /*
754 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
755 * In the internal DTD subset, parameter-entity references
756 * can occur only where markup declarations can occur, not
757 * within markup declarations.
758 * In that case this is handled in xmlParseMarkupDecl
759 */
760 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
761 return;
762 break;
763 case XML_PARSER_IGNORE:
764 return;
765 }
766
767 NEXT;
768 name = xmlParseName(ctxt);
769 if (xmlParserDebugEntities)
770 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000771 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000772 if (name == NULL) {
773 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000775 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000776 ctxt->wellFormed = 0;
777 ctxt->disableSAX = 1;
778 } else {
779 if (RAW == ';') {
780 NEXT;
781 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
782 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
783 if (entity == NULL) {
784
785 /*
786 * [ WFC: Entity Declared ]
787 * In a document without any DTD, a document with only an
788 * internal DTD subset which contains no parameter entity
789 * references, or a document with "standalone='yes'", ...
790 * ... The declaration of a parameter entity must precede
791 * any reference to it...
792 */
793 if ((ctxt->standalone == 1) ||
794 ((ctxt->hasExternalSubset == 0) &&
795 (ctxt->hasPErefs == 0))) {
796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
797 ctxt->sax->error(ctxt->userData,
798 "PEReference: %%%s; not found\n", name);
799 ctxt->wellFormed = 0;
800 ctxt->disableSAX = 1;
801 } else {
802 /*
803 * [ VC: Entity Declared ]
804 * In a document with an external subset or external
805 * parameter entities with "standalone='no'", ...
806 * ... The declaration of a parameter entity must precede
807 * any reference to it...
808 */
809 if ((!ctxt->disableSAX) &&
810 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
811 ctxt->vctxt.error(ctxt->vctxt.userData,
812 "PEReference: %%%s; not found\n", name);
813 } else if ((!ctxt->disableSAX) &&
814 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
815 ctxt->sax->warning(ctxt->userData,
816 "PEReference: %%%s; not found\n", name);
817 ctxt->valid = 0;
818 }
819 } else {
820 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
821 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000822 xmlChar start[4];
823 xmlCharEncoding enc;
824
Owen Taylor3473f882001-02-23 17:55:21 +0000825 /*
826 * handle the extra spaces added before and after
827 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000828 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000829 */
830 input = xmlNewEntityInputStream(ctxt, entity);
831 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000832
833 /*
834 * Get the 4 first bytes and decode the charset
835 * if enc != XML_CHAR_ENCODING_NONE
836 * plug some encoding conversion routines.
837 */
838 GROW
Daniel Veillard561b7f82002-03-20 21:55:57 +0000839 start[0] = RAW;
840 start[1] = NXT(1);
841 start[2] = NXT(2);
842 start[3] = NXT(3);
843 enc = xmlDetectCharEncoding(start, 4);
844 if (enc != XML_CHAR_ENCODING_NONE) {
845 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000846 }
847
Owen Taylor3473f882001-02-23 17:55:21 +0000848 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
849 (RAW == '<') && (NXT(1) == '?') &&
850 (NXT(2) == 'x') && (NXT(3) == 'm') &&
851 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
852 xmlParseTextDecl(ctxt);
853 }
854 if (ctxt->token == 0)
855 ctxt->token = ' ';
856 } else {
857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
858 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000859 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000860 name);
861 ctxt->wellFormed = 0;
862 ctxt->disableSAX = 1;
863 }
864 }
865 } else {
866 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
868 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000869 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000870 ctxt->wellFormed = 0;
871 ctxt->disableSAX = 1;
872 }
873 xmlFree(name);
874 }
875}
876
877/*
878 * Macro used to grow the current buffer.
879 */
880#define growBuffer(buffer) { \
881 buffer##_size *= 2; \
882 buffer = (xmlChar *) \
883 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
884 if (buffer == NULL) { \
885 perror("realloc failed"); \
886 return(NULL); \
887 } \
888}
889
890/**
891 * xmlStringDecodeEntities:
892 * @ctxt: the parser context
893 * @str: the input string
894 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
895 * @end: an end marker xmlChar, 0 if none
896 * @end2: an end marker xmlChar, 0 if none
897 * @end3: an end marker xmlChar, 0 if none
898 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000899 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000900 *
901 * [67] Reference ::= EntityRef | CharRef
902 *
903 * [69] PEReference ::= '%' Name ';'
904 *
905 * Returns A newly allocated string with the substitution done. The caller
906 * must deallocate it !
907 */
908xmlChar *
909xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
910 xmlChar end, xmlChar end2, xmlChar end3) {
911 xmlChar *buffer = NULL;
912 int buffer_size = 0;
913
914 xmlChar *current = NULL;
915 xmlEntityPtr ent;
916 int c,l;
917 int nbchars = 0;
918
919 if (str == NULL)
920 return(NULL);
921
922 if (ctxt->depth > 40) {
923 ctxt->errNo = XML_ERR_ENTITY_LOOP;
924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
925 ctxt->sax->error(ctxt->userData,
926 "Detected entity reference loop\n");
927 ctxt->wellFormed = 0;
928 ctxt->disableSAX = 1;
929 return(NULL);
930 }
931
932 /*
933 * allocate a translation buffer.
934 */
935 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
936 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
937 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000938 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000939 return(NULL);
940 }
941
942 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000943 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000944 * we are operating on already parsed values.
945 */
946 c = CUR_SCHAR(str, l);
947 while ((c != 0) && (c != end) && /* non input consuming loop */
948 (c != end2) && (c != end3)) {
949
950 if (c == 0) break;
951 if ((c == '&') && (str[1] == '#')) {
952 int val = xmlParseStringCharRef(ctxt, &str);
953 if (val != 0) {
954 COPY_BUF(0,buffer,nbchars,val);
955 }
956 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
957 if (xmlParserDebugEntities)
958 xmlGenericError(xmlGenericErrorContext,
959 "String decoding Entity Reference: %.30s\n",
960 str);
961 ent = xmlParseStringEntityRef(ctxt, &str);
962 if ((ent != NULL) &&
963 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
964 if (ent->content != NULL) {
965 COPY_BUF(0,buffer,nbchars,ent->content[0]);
966 } else {
967 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
968 ctxt->sax->error(ctxt->userData,
969 "internal error entity has no content\n");
970 }
971 } else if ((ent != NULL) && (ent->content != NULL)) {
972 xmlChar *rep;
973
974 ctxt->depth++;
975 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
976 0, 0, 0);
977 ctxt->depth--;
978 if (rep != NULL) {
979 current = rep;
980 while (*current != 0) { /* non input consuming loop */
981 buffer[nbchars++] = *current++;
982 if (nbchars >
983 buffer_size - XML_PARSER_BUFFER_SIZE) {
984 growBuffer(buffer);
985 }
986 }
987 xmlFree(rep);
988 }
989 } else if (ent != NULL) {
990 int i = xmlStrlen(ent->name);
991 const xmlChar *cur = ent->name;
992
993 buffer[nbchars++] = '&';
994 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
995 growBuffer(buffer);
996 }
997 for (;i > 0;i--)
998 buffer[nbchars++] = *cur++;
999 buffer[nbchars++] = ';';
1000 }
1001 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1002 if (xmlParserDebugEntities)
1003 xmlGenericError(xmlGenericErrorContext,
1004 "String decoding PE Reference: %.30s\n", str);
1005 ent = xmlParseStringPEReference(ctxt, &str);
1006 if (ent != NULL) {
1007 xmlChar *rep;
1008
1009 ctxt->depth++;
1010 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1011 0, 0, 0);
1012 ctxt->depth--;
1013 if (rep != NULL) {
1014 current = rep;
1015 while (*current != 0) { /* non input consuming loop */
1016 buffer[nbchars++] = *current++;
1017 if (nbchars >
1018 buffer_size - XML_PARSER_BUFFER_SIZE) {
1019 growBuffer(buffer);
1020 }
1021 }
1022 xmlFree(rep);
1023 }
1024 }
1025 } else {
1026 COPY_BUF(l,buffer,nbchars,c);
1027 str += l;
1028 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1029 growBuffer(buffer);
1030 }
1031 }
1032 c = CUR_SCHAR(str, l);
1033 }
1034 buffer[nbchars++] = 0;
1035 return(buffer);
1036}
1037
1038
1039/************************************************************************
1040 * *
1041 * Commodity functions to handle xmlChars *
1042 * *
1043 ************************************************************************/
1044
1045/**
1046 * xmlStrndup:
1047 * @cur: the input xmlChar *
1048 * @len: the len of @cur
1049 *
1050 * a strndup for array of xmlChar's
1051 *
1052 * Returns a new xmlChar * or NULL
1053 */
1054xmlChar *
1055xmlStrndup(const xmlChar *cur, int len) {
1056 xmlChar *ret;
1057
1058 if ((cur == NULL) || (len < 0)) return(NULL);
1059 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1060 if (ret == NULL) {
1061 xmlGenericError(xmlGenericErrorContext,
1062 "malloc of %ld byte failed\n",
1063 (len + 1) * (long)sizeof(xmlChar));
1064 return(NULL);
1065 }
1066 memcpy(ret, cur, len * sizeof(xmlChar));
1067 ret[len] = 0;
1068 return(ret);
1069}
1070
1071/**
1072 * xmlStrdup:
1073 * @cur: the input xmlChar *
1074 *
1075 * a strdup for array of xmlChar's. Since they are supposed to be
1076 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1077 * a termination mark of '0'.
1078 *
1079 * Returns a new xmlChar * or NULL
1080 */
1081xmlChar *
1082xmlStrdup(const xmlChar *cur) {
1083 const xmlChar *p = cur;
1084
1085 if (cur == NULL) return(NULL);
1086 while (*p != 0) p++; /* non input consuming */
1087 return(xmlStrndup(cur, p - cur));
1088}
1089
1090/**
1091 * xmlCharStrndup:
1092 * @cur: the input char *
1093 * @len: the len of @cur
1094 *
1095 * a strndup for char's to xmlChar's
1096 *
1097 * Returns a new xmlChar * or NULL
1098 */
1099
1100xmlChar *
1101xmlCharStrndup(const char *cur, int len) {
1102 int i;
1103 xmlChar *ret;
1104
1105 if ((cur == NULL) || (len < 0)) return(NULL);
1106 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1107 if (ret == NULL) {
1108 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1109 (len + 1) * (long)sizeof(xmlChar));
1110 return(NULL);
1111 }
1112 for (i = 0;i < len;i++)
1113 ret[i] = (xmlChar) cur[i];
1114 ret[len] = 0;
1115 return(ret);
1116}
1117
1118/**
1119 * xmlCharStrdup:
1120 * @cur: the input char *
1121 * @len: the len of @cur
1122 *
1123 * a strdup for char's to xmlChar's
1124 *
1125 * Returns a new xmlChar * or NULL
1126 */
1127
1128xmlChar *
1129xmlCharStrdup(const char *cur) {
1130 const char *p = cur;
1131
1132 if (cur == NULL) return(NULL);
1133 while (*p != '\0') p++; /* non input consuming */
1134 return(xmlCharStrndup(cur, p - cur));
1135}
1136
1137/**
1138 * xmlStrcmp:
1139 * @str1: the first xmlChar *
1140 * @str2: the second xmlChar *
1141 *
1142 * a strcmp for xmlChar's
1143 *
1144 * Returns the integer result of the comparison
1145 */
1146
1147int
1148xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1149 register int tmp;
1150
1151 if (str1 == str2) return(0);
1152 if (str1 == NULL) return(-1);
1153 if (str2 == NULL) return(1);
1154 do {
1155 tmp = *str1++ - *str2;
1156 if (tmp != 0) return(tmp);
1157 } while (*str2++ != 0);
1158 return 0;
1159}
1160
1161/**
1162 * xmlStrEqual:
1163 * @str1: the first xmlChar *
1164 * @str2: the second xmlChar *
1165 *
1166 * Check if both string are equal of have same content
1167 * Should be a bit more readable and faster than xmlStrEqual()
1168 *
1169 * Returns 1 if they are equal, 0 if they are different
1170 */
1171
1172int
1173xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1174 if (str1 == str2) return(1);
1175 if (str1 == NULL) return(0);
1176 if (str2 == NULL) return(0);
1177 do {
1178 if (*str1++ != *str2) return(0);
1179 } while (*str2++);
1180 return(1);
1181}
1182
1183/**
1184 * xmlStrncmp:
1185 * @str1: the first xmlChar *
1186 * @str2: the second xmlChar *
1187 * @len: the max comparison length
1188 *
1189 * a strncmp for xmlChar's
1190 *
1191 * Returns the integer result of the comparison
1192 */
1193
1194int
1195xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1196 register int tmp;
1197
1198 if (len <= 0) return(0);
1199 if (str1 == str2) return(0);
1200 if (str1 == NULL) return(-1);
1201 if (str2 == NULL) return(1);
1202 do {
1203 tmp = *str1++ - *str2;
1204 if (tmp != 0 || --len == 0) return(tmp);
1205 } while (*str2++ != 0);
1206 return 0;
1207}
1208
Daniel Veillardb44025c2001-10-11 22:55:55 +00001209static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001210 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1211 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1212 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1213 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1214 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1215 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1216 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1217 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1218 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1219 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1220 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1221 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1222 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1223 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1224 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1225 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1226 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1227 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1228 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1229 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1230 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1231 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1232 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1233 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1234 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1235 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1236 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1237 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1238 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1239 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1240 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1241 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1242};
1243
1244/**
1245 * xmlStrcasecmp:
1246 * @str1: the first xmlChar *
1247 * @str2: the second xmlChar *
1248 *
1249 * a strcasecmp for xmlChar's
1250 *
1251 * Returns the integer result of the comparison
1252 */
1253
1254int
1255xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1256 register int tmp;
1257
1258 if (str1 == str2) return(0);
1259 if (str1 == NULL) return(-1);
1260 if (str2 == NULL) return(1);
1261 do {
1262 tmp = casemap[*str1++] - casemap[*str2];
1263 if (tmp != 0) return(tmp);
1264 } while (*str2++ != 0);
1265 return 0;
1266}
1267
1268/**
1269 * xmlStrncasecmp:
1270 * @str1: the first xmlChar *
1271 * @str2: the second xmlChar *
1272 * @len: the max comparison length
1273 *
1274 * a strncasecmp for xmlChar's
1275 *
1276 * Returns the integer result of the comparison
1277 */
1278
1279int
1280xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1281 register int tmp;
1282
1283 if (len <= 0) return(0);
1284 if (str1 == str2) return(0);
1285 if (str1 == NULL) return(-1);
1286 if (str2 == NULL) return(1);
1287 do {
1288 tmp = casemap[*str1++] - casemap[*str2];
1289 if (tmp != 0 || --len == 0) return(tmp);
1290 } while (*str2++ != 0);
1291 return 0;
1292}
1293
1294/**
1295 * xmlStrchr:
1296 * @str: the xmlChar * array
1297 * @val: the xmlChar to search
1298 *
1299 * a strchr for xmlChar's
1300 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001301 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001302 */
1303
1304const xmlChar *
1305xmlStrchr(const xmlChar *str, xmlChar val) {
1306 if (str == NULL) return(NULL);
1307 while (*str != 0) { /* non input consuming */
1308 if (*str == val) return((xmlChar *) str);
1309 str++;
1310 }
1311 return(NULL);
1312}
1313
1314/**
1315 * xmlStrstr:
1316 * @str: the xmlChar * array (haystack)
1317 * @val: the xmlChar to search (needle)
1318 *
1319 * a strstr for xmlChar's
1320 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001321 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001322 */
1323
1324const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001325xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001326 int n;
1327
1328 if (str == NULL) return(NULL);
1329 if (val == NULL) return(NULL);
1330 n = xmlStrlen(val);
1331
1332 if (n == 0) return(str);
1333 while (*str != 0) { /* non input consuming */
1334 if (*str == *val) {
1335 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1336 }
1337 str++;
1338 }
1339 return(NULL);
1340}
1341
1342/**
1343 * xmlStrcasestr:
1344 * @str: the xmlChar * array (haystack)
1345 * @val: the xmlChar to search (needle)
1346 *
1347 * a case-ignoring strstr for xmlChar's
1348 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001349 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001350 */
1351
1352const xmlChar *
1353xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1354 int n;
1355
1356 if (str == NULL) return(NULL);
1357 if (val == NULL) return(NULL);
1358 n = xmlStrlen(val);
1359
1360 if (n == 0) return(str);
1361 while (*str != 0) { /* non input consuming */
1362 if (casemap[*str] == casemap[*val])
1363 if (!xmlStrncasecmp(str, val, n)) return(str);
1364 str++;
1365 }
1366 return(NULL);
1367}
1368
1369/**
1370 * xmlStrsub:
1371 * @str: the xmlChar * array (haystack)
1372 * @start: the index of the first char (zero based)
1373 * @len: the length of the substring
1374 *
1375 * Extract a substring of a given string
1376 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001377 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001378 */
1379
1380xmlChar *
1381xmlStrsub(const xmlChar *str, int start, int len) {
1382 int i;
1383
1384 if (str == NULL) return(NULL);
1385 if (start < 0) return(NULL);
1386 if (len < 0) return(NULL);
1387
1388 for (i = 0;i < start;i++) {
1389 if (*str == 0) return(NULL);
1390 str++;
1391 }
1392 if (*str == 0) return(NULL);
1393 return(xmlStrndup(str, len));
1394}
1395
1396/**
1397 * xmlStrlen:
1398 * @str: the xmlChar * array
1399 *
1400 * length of a xmlChar's string
1401 *
1402 * Returns the number of xmlChar contained in the ARRAY.
1403 */
1404
1405int
1406xmlStrlen(const xmlChar *str) {
1407 int len = 0;
1408
1409 if (str == NULL) return(0);
1410 while (*str != 0) { /* non input consuming */
1411 str++;
1412 len++;
1413 }
1414 return(len);
1415}
1416
1417/**
1418 * xmlStrncat:
1419 * @cur: the original xmlChar * array
1420 * @add: the xmlChar * array added
1421 * @len: the length of @add
1422 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001423 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001424 * first bytes of @add.
1425 *
1426 * Returns a new xmlChar *, the original @cur is reallocated if needed
1427 * and should not be freed
1428 */
1429
1430xmlChar *
1431xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1432 int size;
1433 xmlChar *ret;
1434
1435 if ((add == NULL) || (len == 0))
1436 return(cur);
1437 if (cur == NULL)
1438 return(xmlStrndup(add, len));
1439
1440 size = xmlStrlen(cur);
1441 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1442 if (ret == NULL) {
1443 xmlGenericError(xmlGenericErrorContext,
1444 "xmlStrncat: realloc of %ld byte failed\n",
1445 (size + len + 1) * (long)sizeof(xmlChar));
1446 return(cur);
1447 }
1448 memcpy(&ret[size], add, len * sizeof(xmlChar));
1449 ret[size + len] = 0;
1450 return(ret);
1451}
1452
1453/**
1454 * xmlStrcat:
1455 * @cur: the original xmlChar * array
1456 * @add: the xmlChar * array added
1457 *
1458 * a strcat for array of xmlChar's. Since they are supposed to be
1459 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1460 * a termination mark of '0'.
1461 *
1462 * Returns a new xmlChar * containing the concatenated string.
1463 */
1464xmlChar *
1465xmlStrcat(xmlChar *cur, const xmlChar *add) {
1466 const xmlChar *p = add;
1467
1468 if (add == NULL) return(cur);
1469 if (cur == NULL)
1470 return(xmlStrdup(add));
1471
1472 while (*p != 0) p++; /* non input consuming */
1473 return(xmlStrncat(cur, add, p - add));
1474}
1475
1476/************************************************************************
1477 * *
1478 * Commodity functions, cleanup needed ? *
1479 * *
1480 ************************************************************************/
1481
1482/**
1483 * areBlanks:
1484 * @ctxt: an XML parser context
1485 * @str: a xmlChar *
1486 * @len: the size of @str
1487 *
1488 * Is this a sequence of blank chars that one can ignore ?
1489 *
1490 * Returns 1 if ignorable 0 otherwise.
1491 */
1492
1493static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1494 int i, ret;
1495 xmlNodePtr lastChild;
1496
Daniel Veillard05c13a22001-09-09 08:38:09 +00001497 /*
1498 * Don't spend time trying to differentiate them, the same callback is
1499 * used !
1500 */
1501 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001502 return(0);
1503
Owen Taylor3473f882001-02-23 17:55:21 +00001504 /*
1505 * Check for xml:space value.
1506 */
1507 if (*(ctxt->space) == 1)
1508 return(0);
1509
1510 /*
1511 * Check that the string is made of blanks
1512 */
1513 for (i = 0;i < len;i++)
1514 if (!(IS_BLANK(str[i]))) return(0);
1515
1516 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001517 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001518 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001519 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001520 if (ctxt->myDoc != NULL) {
1521 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1522 if (ret == 0) return(1);
1523 if (ret == 1) return(0);
1524 }
1525
1526 /*
1527 * Otherwise, heuristic :-\
1528 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001529 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001530 if ((ctxt->node->children == NULL) &&
1531 (RAW == '<') && (NXT(1) == '/')) return(0);
1532
1533 lastChild = xmlGetLastChild(ctxt->node);
1534 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001535 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1536 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001537 } else if (xmlNodeIsText(lastChild))
1538 return(0);
1539 else if ((ctxt->node->children != NULL) &&
1540 (xmlNodeIsText(ctxt->node->children)))
1541 return(0);
1542 return(1);
1543}
1544
Owen Taylor3473f882001-02-23 17:55:21 +00001545/************************************************************************
1546 * *
1547 * Extra stuff for namespace support *
1548 * Relates to http://www.w3.org/TR/WD-xml-names *
1549 * *
1550 ************************************************************************/
1551
1552/**
1553 * xmlSplitQName:
1554 * @ctxt: an XML parser context
1555 * @name: an XML parser context
1556 * @prefix: a xmlChar **
1557 *
1558 * parse an UTF8 encoded XML qualified name string
1559 *
1560 * [NS 5] QName ::= (Prefix ':')? LocalPart
1561 *
1562 * [NS 6] Prefix ::= NCName
1563 *
1564 * [NS 7] LocalPart ::= NCName
1565 *
1566 * Returns the local part, and prefix is updated
1567 * to get the Prefix if any.
1568 */
1569
1570xmlChar *
1571xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1572 xmlChar buf[XML_MAX_NAMELEN + 5];
1573 xmlChar *buffer = NULL;
1574 int len = 0;
1575 int max = XML_MAX_NAMELEN;
1576 xmlChar *ret = NULL;
1577 const xmlChar *cur = name;
1578 int c;
1579
1580 *prefix = NULL;
1581
1582#ifndef XML_XML_NAMESPACE
1583 /* xml: prefix is not really a namespace */
1584 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1585 (cur[2] == 'l') && (cur[3] == ':'))
1586 return(xmlStrdup(name));
1587#endif
1588
1589 /* nasty but valid */
1590 if (cur[0] == ':')
1591 return(xmlStrdup(name));
1592
1593 c = *cur++;
1594 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1595 buf[len++] = c;
1596 c = *cur++;
1597 }
1598 if (len >= max) {
1599 /*
1600 * Okay someone managed to make a huge name, so he's ready to pay
1601 * for the processing speed.
1602 */
1603 max = len * 2;
1604
1605 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1606 if (buffer == NULL) {
1607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1608 ctxt->sax->error(ctxt->userData,
1609 "xmlSplitQName: out of memory\n");
1610 return(NULL);
1611 }
1612 memcpy(buffer, buf, len);
1613 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1614 if (len + 10 > max) {
1615 max *= 2;
1616 buffer = (xmlChar *) xmlRealloc(buffer,
1617 max * sizeof(xmlChar));
1618 if (buffer == NULL) {
1619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1620 ctxt->sax->error(ctxt->userData,
1621 "xmlSplitQName: out of memory\n");
1622 return(NULL);
1623 }
1624 }
1625 buffer[len++] = c;
1626 c = *cur++;
1627 }
1628 buffer[len] = 0;
1629 }
1630
1631 if (buffer == NULL)
1632 ret = xmlStrndup(buf, len);
1633 else {
1634 ret = buffer;
1635 buffer = NULL;
1636 max = XML_MAX_NAMELEN;
1637 }
1638
1639
1640 if (c == ':') {
1641 c = *cur++;
1642 if (c == 0) return(ret);
1643 *prefix = ret;
1644 len = 0;
1645
1646 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1647 buf[len++] = c;
1648 c = *cur++;
1649 }
1650 if (len >= max) {
1651 /*
1652 * Okay someone managed to make a huge name, so he's ready to pay
1653 * for the processing speed.
1654 */
1655 max = len * 2;
1656
1657 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1658 if (buffer == NULL) {
1659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1660 ctxt->sax->error(ctxt->userData,
1661 "xmlSplitQName: out of memory\n");
1662 return(NULL);
1663 }
1664 memcpy(buffer, buf, len);
1665 while (c != 0) { /* tested bigname2.xml */
1666 if (len + 10 > max) {
1667 max *= 2;
1668 buffer = (xmlChar *) xmlRealloc(buffer,
1669 max * sizeof(xmlChar));
1670 if (buffer == NULL) {
1671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1672 ctxt->sax->error(ctxt->userData,
1673 "xmlSplitQName: out of memory\n");
1674 return(NULL);
1675 }
1676 }
1677 buffer[len++] = c;
1678 c = *cur++;
1679 }
1680 buffer[len] = 0;
1681 }
1682
1683 if (buffer == NULL)
1684 ret = xmlStrndup(buf, len);
1685 else {
1686 ret = buffer;
1687 }
1688 }
1689
1690 return(ret);
1691}
1692
1693/************************************************************************
1694 * *
1695 * The parser itself *
1696 * Relates to http://www.w3.org/TR/REC-xml *
1697 * *
1698 ************************************************************************/
1699
Daniel Veillard76d66f42001-05-16 21:05:17 +00001700static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001701/**
1702 * xmlParseName:
1703 * @ctxt: an XML parser context
1704 *
1705 * parse an XML name.
1706 *
1707 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1708 * CombiningChar | Extender
1709 *
1710 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1711 *
1712 * [6] Names ::= Name (S Name)*
1713 *
1714 * Returns the Name parsed or NULL
1715 */
1716
1717xmlChar *
1718xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001719 const xmlChar *in;
1720 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001721 int count = 0;
1722
1723 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001724
1725 /*
1726 * Accelerator for simple ASCII names
1727 */
1728 in = ctxt->input->cur;
1729 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1730 ((*in >= 0x41) && (*in <= 0x5A)) ||
1731 (*in == '_') || (*in == ':')) {
1732 in++;
1733 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1734 ((*in >= 0x41) && (*in <= 0x5A)) ||
1735 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001736 (*in == '_') || (*in == '-') ||
1737 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001738 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001739 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001740 count = in - ctxt->input->cur;
1741 ret = xmlStrndup(ctxt->input->cur, count);
1742 ctxt->input->cur = in;
1743 return(ret);
1744 }
1745 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001746 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001747}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001748
Daniel Veillard76d66f42001-05-16 21:05:17 +00001749static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001750xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1751 xmlChar buf[XML_MAX_NAMELEN + 5];
1752 int len = 0, l;
1753 int c;
1754 int count = 0;
1755
1756 /*
1757 * Handler for more complex cases
1758 */
1759 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001760 c = CUR_CHAR(l);
1761 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1762 (!IS_LETTER(c) && (c != '_') &&
1763 (c != ':'))) {
1764 return(NULL);
1765 }
1766
1767 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1768 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1769 (c == '.') || (c == '-') ||
1770 (c == '_') || (c == ':') ||
1771 (IS_COMBINING(c)) ||
1772 (IS_EXTENDER(c)))) {
1773 if (count++ > 100) {
1774 count = 0;
1775 GROW;
1776 }
1777 COPY_BUF(l,buf,len,c);
1778 NEXTL(l);
1779 c = CUR_CHAR(l);
1780 if (len >= XML_MAX_NAMELEN) {
1781 /*
1782 * Okay someone managed to make a huge name, so he's ready to pay
1783 * for the processing speed.
1784 */
1785 xmlChar *buffer;
1786 int max = len * 2;
1787
1788 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1789 if (buffer == NULL) {
1790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1791 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001792 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001793 return(NULL);
1794 }
1795 memcpy(buffer, buf, len);
1796 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1797 (c == '.') || (c == '-') ||
1798 (c == '_') || (c == ':') ||
1799 (IS_COMBINING(c)) ||
1800 (IS_EXTENDER(c))) {
1801 if (count++ > 100) {
1802 count = 0;
1803 GROW;
1804 }
1805 if (len + 10 > max) {
1806 max *= 2;
1807 buffer = (xmlChar *) xmlRealloc(buffer,
1808 max * sizeof(xmlChar));
1809 if (buffer == NULL) {
1810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1811 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001812 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001813 return(NULL);
1814 }
1815 }
1816 COPY_BUF(l,buffer,len,c);
1817 NEXTL(l);
1818 c = CUR_CHAR(l);
1819 }
1820 buffer[len] = 0;
1821 return(buffer);
1822 }
1823 }
1824 return(xmlStrndup(buf, len));
1825}
1826
1827/**
1828 * xmlParseStringName:
1829 * @ctxt: an XML parser context
1830 * @str: a pointer to the string pointer (IN/OUT)
1831 *
1832 * parse an XML name.
1833 *
1834 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1835 * CombiningChar | Extender
1836 *
1837 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1838 *
1839 * [6] Names ::= Name (S Name)*
1840 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001841 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001842 * is updated to the current location in the string.
1843 */
1844
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001845static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001846xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1847 xmlChar buf[XML_MAX_NAMELEN + 5];
1848 const xmlChar *cur = *str;
1849 int len = 0, l;
1850 int c;
1851
1852 c = CUR_SCHAR(cur, l);
1853 if (!IS_LETTER(c) && (c != '_') &&
1854 (c != ':')) {
1855 return(NULL);
1856 }
1857
1858 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1859 (c == '.') || (c == '-') ||
1860 (c == '_') || (c == ':') ||
1861 (IS_COMBINING(c)) ||
1862 (IS_EXTENDER(c))) {
1863 COPY_BUF(l,buf,len,c);
1864 cur += l;
1865 c = CUR_SCHAR(cur, l);
1866 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1867 /*
1868 * Okay someone managed to make a huge name, so he's ready to pay
1869 * for the processing speed.
1870 */
1871 xmlChar *buffer;
1872 int max = len * 2;
1873
1874 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1875 if (buffer == NULL) {
1876 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1877 ctxt->sax->error(ctxt->userData,
1878 "xmlParseStringName: out of memory\n");
1879 return(NULL);
1880 }
1881 memcpy(buffer, buf, len);
1882 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1883 (c == '.') || (c == '-') ||
1884 (c == '_') || (c == ':') ||
1885 (IS_COMBINING(c)) ||
1886 (IS_EXTENDER(c))) {
1887 if (len + 10 > max) {
1888 max *= 2;
1889 buffer = (xmlChar *) xmlRealloc(buffer,
1890 max * sizeof(xmlChar));
1891 if (buffer == NULL) {
1892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1893 ctxt->sax->error(ctxt->userData,
1894 "xmlParseStringName: out of memory\n");
1895 return(NULL);
1896 }
1897 }
1898 COPY_BUF(l,buffer,len,c);
1899 cur += l;
1900 c = CUR_SCHAR(cur, l);
1901 }
1902 buffer[len] = 0;
1903 *str = cur;
1904 return(buffer);
1905 }
1906 }
1907 *str = cur;
1908 return(xmlStrndup(buf, len));
1909}
1910
1911/**
1912 * xmlParseNmtoken:
1913 * @ctxt: an XML parser context
1914 *
1915 * parse an XML Nmtoken.
1916 *
1917 * [7] Nmtoken ::= (NameChar)+
1918 *
1919 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1920 *
1921 * Returns the Nmtoken parsed or NULL
1922 */
1923
1924xmlChar *
1925xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1926 xmlChar buf[XML_MAX_NAMELEN + 5];
1927 int len = 0, l;
1928 int c;
1929 int count = 0;
1930
1931 GROW;
1932 c = CUR_CHAR(l);
1933
1934 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1935 (c == '.') || (c == '-') ||
1936 (c == '_') || (c == ':') ||
1937 (IS_COMBINING(c)) ||
1938 (IS_EXTENDER(c))) {
1939 if (count++ > 100) {
1940 count = 0;
1941 GROW;
1942 }
1943 COPY_BUF(l,buf,len,c);
1944 NEXTL(l);
1945 c = CUR_CHAR(l);
1946 if (len >= XML_MAX_NAMELEN) {
1947 /*
1948 * Okay someone managed to make a huge token, so he's ready to pay
1949 * for the processing speed.
1950 */
1951 xmlChar *buffer;
1952 int max = len * 2;
1953
1954 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1955 if (buffer == NULL) {
1956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1957 ctxt->sax->error(ctxt->userData,
1958 "xmlParseNmtoken: out of memory\n");
1959 return(NULL);
1960 }
1961 memcpy(buffer, buf, len);
1962 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1963 (c == '.') || (c == '-') ||
1964 (c == '_') || (c == ':') ||
1965 (IS_COMBINING(c)) ||
1966 (IS_EXTENDER(c))) {
1967 if (count++ > 100) {
1968 count = 0;
1969 GROW;
1970 }
1971 if (len + 10 > max) {
1972 max *= 2;
1973 buffer = (xmlChar *) xmlRealloc(buffer,
1974 max * sizeof(xmlChar));
1975 if (buffer == NULL) {
1976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1977 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001978 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001979 return(NULL);
1980 }
1981 }
1982 COPY_BUF(l,buffer,len,c);
1983 NEXTL(l);
1984 c = CUR_CHAR(l);
1985 }
1986 buffer[len] = 0;
1987 return(buffer);
1988 }
1989 }
1990 if (len == 0)
1991 return(NULL);
1992 return(xmlStrndup(buf, len));
1993}
1994
1995/**
1996 * xmlParseEntityValue:
1997 * @ctxt: an XML parser context
1998 * @orig: if non-NULL store a copy of the original entity value
1999 *
2000 * parse a value for ENTITY declarations
2001 *
2002 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2003 * "'" ([^%&'] | PEReference | Reference)* "'"
2004 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002005 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002006 */
2007
2008xmlChar *
2009xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2010 xmlChar *buf = NULL;
2011 int len = 0;
2012 int size = XML_PARSER_BUFFER_SIZE;
2013 int c, l;
2014 xmlChar stop;
2015 xmlChar *ret = NULL;
2016 const xmlChar *cur = NULL;
2017 xmlParserInputPtr input;
2018
2019 if (RAW == '"') stop = '"';
2020 else if (RAW == '\'') stop = '\'';
2021 else {
2022 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2024 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2025 ctxt->wellFormed = 0;
2026 ctxt->disableSAX = 1;
2027 return(NULL);
2028 }
2029 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2030 if (buf == NULL) {
2031 xmlGenericError(xmlGenericErrorContext,
2032 "malloc of %d byte failed\n", size);
2033 return(NULL);
2034 }
2035
2036 /*
2037 * The content of the entity definition is copied in a buffer.
2038 */
2039
2040 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2041 input = ctxt->input;
2042 GROW;
2043 NEXT;
2044 c = CUR_CHAR(l);
2045 /*
2046 * NOTE: 4.4.5 Included in Literal
2047 * When a parameter entity reference appears in a literal entity
2048 * value, ... a single or double quote character in the replacement
2049 * text is always treated as a normal data character and will not
2050 * terminate the literal.
2051 * In practice it means we stop the loop only when back at parsing
2052 * the initial entity and the quote is found
2053 */
2054 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2055 (ctxt->input != input))) {
2056 if (len + 5 >= size) {
2057 size *= 2;
2058 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2059 if (buf == NULL) {
2060 xmlGenericError(xmlGenericErrorContext,
2061 "realloc of %d byte failed\n", size);
2062 return(NULL);
2063 }
2064 }
2065 COPY_BUF(l,buf,len,c);
2066 NEXTL(l);
2067 /*
2068 * Pop-up of finished entities.
2069 */
2070 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2071 xmlPopInput(ctxt);
2072
2073 GROW;
2074 c = CUR_CHAR(l);
2075 if (c == 0) {
2076 GROW;
2077 c = CUR_CHAR(l);
2078 }
2079 }
2080 buf[len] = 0;
2081
2082 /*
2083 * Raise problem w.r.t. '&' and '%' being used in non-entities
2084 * reference constructs. Note Charref will be handled in
2085 * xmlStringDecodeEntities()
2086 */
2087 cur = buf;
2088 while (*cur != 0) { /* non input consuming */
2089 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2090 xmlChar *name;
2091 xmlChar tmp = *cur;
2092
2093 cur++;
2094 name = xmlParseStringName(ctxt, &cur);
2095 if ((name == NULL) || (*cur != ';')) {
2096 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2098 ctxt->sax->error(ctxt->userData,
2099 "EntityValue: '%c' forbidden except for entities references\n",
2100 tmp);
2101 ctxt->wellFormed = 0;
2102 ctxt->disableSAX = 1;
2103 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002104 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2105 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002106 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2108 ctxt->sax->error(ctxt->userData,
2109 "EntityValue: PEReferences forbidden in internal subset\n",
2110 tmp);
2111 ctxt->wellFormed = 0;
2112 ctxt->disableSAX = 1;
2113 }
2114 if (name != NULL)
2115 xmlFree(name);
2116 }
2117 cur++;
2118 }
2119
2120 /*
2121 * Then PEReference entities are substituted.
2122 */
2123 if (c != stop) {
2124 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2126 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2127 ctxt->wellFormed = 0;
2128 ctxt->disableSAX = 1;
2129 xmlFree(buf);
2130 } else {
2131 NEXT;
2132 /*
2133 * NOTE: 4.4.7 Bypassed
2134 * When a general entity reference appears in the EntityValue in
2135 * an entity declaration, it is bypassed and left as is.
2136 * so XML_SUBSTITUTE_REF is not set here.
2137 */
2138 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2139 0, 0, 0);
2140 if (orig != NULL)
2141 *orig = buf;
2142 else
2143 xmlFree(buf);
2144 }
2145
2146 return(ret);
2147}
2148
2149/**
2150 * xmlParseAttValue:
2151 * @ctxt: an XML parser context
2152 *
2153 * parse a value for an attribute
2154 * Note: the parser won't do substitution of entities here, this
2155 * will be handled later in xmlStringGetNodeList
2156 *
2157 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2158 * "'" ([^<&'] | Reference)* "'"
2159 *
2160 * 3.3.3 Attribute-Value Normalization:
2161 * Before the value of an attribute is passed to the application or
2162 * checked for validity, the XML processor must normalize it as follows:
2163 * - a character reference is processed by appending the referenced
2164 * character to the attribute value
2165 * - an entity reference is processed by recursively processing the
2166 * replacement text of the entity
2167 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2168 * appending #x20 to the normalized value, except that only a single
2169 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2170 * parsed entity or the literal entity value of an internal parsed entity
2171 * - other characters are processed by appending them to the normalized value
2172 * If the declared value is not CDATA, then the XML processor must further
2173 * process the normalized attribute value by discarding any leading and
2174 * trailing space (#x20) characters, and by replacing sequences of space
2175 * (#x20) characters by a single space (#x20) character.
2176 * All attributes for which no declaration has been read should be treated
2177 * by a non-validating parser as if declared CDATA.
2178 *
2179 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2180 */
2181
2182xmlChar *
2183xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2184 xmlChar limit = 0;
2185 xmlChar *buf = NULL;
2186 int len = 0;
2187 int buf_size = 0;
2188 int c, l;
2189 xmlChar *current = NULL;
2190 xmlEntityPtr ent;
2191
2192
2193 SHRINK;
2194 if (NXT(0) == '"') {
2195 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2196 limit = '"';
2197 NEXT;
2198 } else if (NXT(0) == '\'') {
2199 limit = '\'';
2200 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2201 NEXT;
2202 } else {
2203 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2205 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2206 ctxt->wellFormed = 0;
2207 ctxt->disableSAX = 1;
2208 return(NULL);
2209 }
2210
2211 /*
2212 * allocate a translation buffer.
2213 */
2214 buf_size = XML_PARSER_BUFFER_SIZE;
2215 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2216 if (buf == NULL) {
2217 perror("xmlParseAttValue: malloc failed");
2218 return(NULL);
2219 }
2220
2221 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002222 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002223 */
2224 c = CUR_CHAR(l);
2225 while (((NXT(0) != limit) && /* checked */
2226 (c != '<')) || (ctxt->token != 0)) {
2227 if (c == 0) break;
2228 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002229 if (ctxt->replaceEntities) {
2230 if (len > buf_size - 10) {
2231 growBuffer(buf);
2232 }
2233 buf[len++] = '&';
2234 } else {
2235 /*
2236 * The reparsing will be done in xmlStringGetNodeList()
2237 * called by the attribute() function in SAX.c
2238 */
2239 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002240
Daniel Veillard319a7422001-09-11 09:27:09 +00002241 if (len > buf_size - 10) {
2242 growBuffer(buf);
2243 }
2244 current = &buffer[0];
2245 while (*current != 0) { /* non input consuming */
2246 buf[len++] = *current++;
2247 }
2248 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002249 }
Owen Taylor3473f882001-02-23 17:55:21 +00002250 } else if (c == '&') {
2251 if (NXT(1) == '#') {
2252 int val = xmlParseCharRef(ctxt);
2253 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002254 if (ctxt->replaceEntities) {
2255 if (len > buf_size - 10) {
2256 growBuffer(buf);
2257 }
2258 buf[len++] = '&';
2259 } else {
2260 /*
2261 * The reparsing will be done in xmlStringGetNodeList()
2262 * called by the attribute() function in SAX.c
2263 */
2264 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002265
Daniel Veillard319a7422001-09-11 09:27:09 +00002266 if (len > buf_size - 10) {
2267 growBuffer(buf);
2268 }
2269 current = &buffer[0];
2270 while (*current != 0) { /* non input consuming */
2271 buf[len++] = *current++;
2272 }
Owen Taylor3473f882001-02-23 17:55:21 +00002273 }
2274 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002275 if (len > buf_size - 10) {
2276 growBuffer(buf);
2277 }
Owen Taylor3473f882001-02-23 17:55:21 +00002278 len += xmlCopyChar(0, &buf[len], val);
2279 }
2280 } else {
2281 ent = xmlParseEntityRef(ctxt);
2282 if ((ent != NULL) &&
2283 (ctxt->replaceEntities != 0)) {
2284 xmlChar *rep;
2285
2286 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2287 rep = xmlStringDecodeEntities(ctxt, ent->content,
2288 XML_SUBSTITUTE_REF, 0, 0, 0);
2289 if (rep != NULL) {
2290 current = rep;
2291 while (*current != 0) { /* non input consuming */
2292 buf[len++] = *current++;
2293 if (len > buf_size - 10) {
2294 growBuffer(buf);
2295 }
2296 }
2297 xmlFree(rep);
2298 }
2299 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002300 if (len > buf_size - 10) {
2301 growBuffer(buf);
2302 }
Owen Taylor3473f882001-02-23 17:55:21 +00002303 if (ent->content != NULL)
2304 buf[len++] = ent->content[0];
2305 }
2306 } else if (ent != NULL) {
2307 int i = xmlStrlen(ent->name);
2308 const xmlChar *cur = ent->name;
2309
2310 /*
2311 * This may look absurd but is needed to detect
2312 * entities problems
2313 */
2314 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2315 (ent->content != NULL)) {
2316 xmlChar *rep;
2317 rep = xmlStringDecodeEntities(ctxt, ent->content,
2318 XML_SUBSTITUTE_REF, 0, 0, 0);
2319 if (rep != NULL)
2320 xmlFree(rep);
2321 }
2322
2323 /*
2324 * Just output the reference
2325 */
2326 buf[len++] = '&';
2327 if (len > buf_size - i - 10) {
2328 growBuffer(buf);
2329 }
2330 for (;i > 0;i--)
2331 buf[len++] = *cur++;
2332 buf[len++] = ';';
2333 }
2334 }
2335 } else {
2336 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2337 COPY_BUF(l,buf,len,0x20);
2338 if (len > buf_size - 10) {
2339 growBuffer(buf);
2340 }
2341 } else {
2342 COPY_BUF(l,buf,len,c);
2343 if (len > buf_size - 10) {
2344 growBuffer(buf);
2345 }
2346 }
2347 NEXTL(l);
2348 }
2349 GROW;
2350 c = CUR_CHAR(l);
2351 }
2352 buf[len++] = 0;
2353 if (RAW == '<') {
2354 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2356 ctxt->sax->error(ctxt->userData,
2357 "Unescaped '<' not allowed in attributes values\n");
2358 ctxt->wellFormed = 0;
2359 ctxt->disableSAX = 1;
2360 } else if (RAW != limit) {
2361 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2363 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2364 ctxt->wellFormed = 0;
2365 ctxt->disableSAX = 1;
2366 } else
2367 NEXT;
2368 return(buf);
2369}
2370
2371/**
2372 * xmlParseSystemLiteral:
2373 * @ctxt: an XML parser context
2374 *
2375 * parse an XML Literal
2376 *
2377 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2378 *
2379 * Returns the SystemLiteral parsed or NULL
2380 */
2381
2382xmlChar *
2383xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2384 xmlChar *buf = NULL;
2385 int len = 0;
2386 int size = XML_PARSER_BUFFER_SIZE;
2387 int cur, l;
2388 xmlChar stop;
2389 int state = ctxt->instate;
2390 int count = 0;
2391
2392 SHRINK;
2393 if (RAW == '"') {
2394 NEXT;
2395 stop = '"';
2396 } else if (RAW == '\'') {
2397 NEXT;
2398 stop = '\'';
2399 } else {
2400 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2402 ctxt->sax->error(ctxt->userData,
2403 "SystemLiteral \" or ' expected\n");
2404 ctxt->wellFormed = 0;
2405 ctxt->disableSAX = 1;
2406 return(NULL);
2407 }
2408
2409 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2410 if (buf == NULL) {
2411 xmlGenericError(xmlGenericErrorContext,
2412 "malloc of %d byte failed\n", size);
2413 return(NULL);
2414 }
2415 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2416 cur = CUR_CHAR(l);
2417 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2418 if (len + 5 >= size) {
2419 size *= 2;
2420 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2421 if (buf == NULL) {
2422 xmlGenericError(xmlGenericErrorContext,
2423 "realloc of %d byte failed\n", size);
2424 ctxt->instate = (xmlParserInputState) state;
2425 return(NULL);
2426 }
2427 }
2428 count++;
2429 if (count > 50) {
2430 GROW;
2431 count = 0;
2432 }
2433 COPY_BUF(l,buf,len,cur);
2434 NEXTL(l);
2435 cur = CUR_CHAR(l);
2436 if (cur == 0) {
2437 GROW;
2438 SHRINK;
2439 cur = CUR_CHAR(l);
2440 }
2441 }
2442 buf[len] = 0;
2443 ctxt->instate = (xmlParserInputState) state;
2444 if (!IS_CHAR(cur)) {
2445 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2446 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2447 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2448 ctxt->wellFormed = 0;
2449 ctxt->disableSAX = 1;
2450 } else {
2451 NEXT;
2452 }
2453 return(buf);
2454}
2455
2456/**
2457 * xmlParsePubidLiteral:
2458 * @ctxt: an XML parser context
2459 *
2460 * parse an XML public literal
2461 *
2462 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2463 *
2464 * Returns the PubidLiteral parsed or NULL.
2465 */
2466
2467xmlChar *
2468xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2469 xmlChar *buf = NULL;
2470 int len = 0;
2471 int size = XML_PARSER_BUFFER_SIZE;
2472 xmlChar cur;
2473 xmlChar stop;
2474 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002475 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002476
2477 SHRINK;
2478 if (RAW == '"') {
2479 NEXT;
2480 stop = '"';
2481 } else if (RAW == '\'') {
2482 NEXT;
2483 stop = '\'';
2484 } else {
2485 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2487 ctxt->sax->error(ctxt->userData,
2488 "SystemLiteral \" or ' expected\n");
2489 ctxt->wellFormed = 0;
2490 ctxt->disableSAX = 1;
2491 return(NULL);
2492 }
2493 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2494 if (buf == NULL) {
2495 xmlGenericError(xmlGenericErrorContext,
2496 "malloc of %d byte failed\n", size);
2497 return(NULL);
2498 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002499 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002500 cur = CUR;
2501 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2502 if (len + 1 >= size) {
2503 size *= 2;
2504 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2505 if (buf == NULL) {
2506 xmlGenericError(xmlGenericErrorContext,
2507 "realloc of %d byte failed\n", size);
2508 return(NULL);
2509 }
2510 }
2511 buf[len++] = cur;
2512 count++;
2513 if (count > 50) {
2514 GROW;
2515 count = 0;
2516 }
2517 NEXT;
2518 cur = CUR;
2519 if (cur == 0) {
2520 GROW;
2521 SHRINK;
2522 cur = CUR;
2523 }
2524 }
2525 buf[len] = 0;
2526 if (cur != stop) {
2527 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2529 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2530 ctxt->wellFormed = 0;
2531 ctxt->disableSAX = 1;
2532 } else {
2533 NEXT;
2534 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002535 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002536 return(buf);
2537}
2538
Daniel Veillard48b2f892001-02-25 16:11:03 +00002539void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002540/**
2541 * xmlParseCharData:
2542 * @ctxt: an XML parser context
2543 * @cdata: int indicating whether we are within a CDATA section
2544 *
2545 * parse a CharData section.
2546 * if we are within a CDATA section ']]>' marks an end of section.
2547 *
2548 * The right angle bracket (>) may be represented using the string "&gt;",
2549 * and must, for compatibility, be escaped using "&gt;" or a character
2550 * reference when it appears in the string "]]>" in content, when that
2551 * string is not marking the end of a CDATA section.
2552 *
2553 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2554 */
2555
2556void
2557xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002558 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002559 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002560 int line = ctxt->input->line;
2561 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002562
2563 SHRINK;
2564 GROW;
2565 /*
2566 * Accelerated common case where input don't need to be
2567 * modified before passing it to the handler.
2568 */
2569 if ((ctxt->token == 0) && (!cdata)) {
2570 in = ctxt->input->cur;
2571 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002572get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002573 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2574 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002575 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002576 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002577 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002578 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002579 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002580 ctxt->input->line++;
2581 in++;
2582 }
2583 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002584 }
2585 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002586 if ((in[1] == ']') && (in[2] == '>')) {
2587 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2589 ctxt->sax->error(ctxt->userData,
2590 "Sequence ']]>' not allowed in content\n");
2591 ctxt->input->cur = in;
2592 ctxt->wellFormed = 0;
2593 ctxt->disableSAX = 1;
2594 return;
2595 }
2596 in++;
2597 goto get_more;
2598 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002599 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002600 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002601 if (IS_BLANK(*ctxt->input->cur)) {
2602 const xmlChar *tmp = ctxt->input->cur;
2603 ctxt->input->cur = in;
2604 if (areBlanks(ctxt, tmp, nbchar)) {
2605 if (ctxt->sax->ignorableWhitespace != NULL)
2606 ctxt->sax->ignorableWhitespace(ctxt->userData,
2607 tmp, nbchar);
2608 } else {
2609 if (ctxt->sax->characters != NULL)
2610 ctxt->sax->characters(ctxt->userData,
2611 tmp, nbchar);
2612 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002613 line = ctxt->input->line;
2614 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002615 } else {
2616 if (ctxt->sax->characters != NULL)
2617 ctxt->sax->characters(ctxt->userData,
2618 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002619 line = ctxt->input->line;
2620 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002621 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002622 }
2623 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002624 if (*in == 0xD) {
2625 in++;
2626 if (*in == 0xA) {
2627 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002628 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002629 ctxt->input->line++;
2630 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002631 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002632 in--;
2633 }
2634 if (*in == '<') {
2635 return;
2636 }
2637 if (*in == '&') {
2638 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002639 }
2640 SHRINK;
2641 GROW;
2642 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002643 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002644 nbchar = 0;
2645 }
Daniel Veillard50582112001-03-26 22:52:16 +00002646 ctxt->input->line = line;
2647 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002648 xmlParseCharDataComplex(ctxt, cdata);
2649}
2650
2651void
2652xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002653 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2654 int nbchar = 0;
2655 int cur, l;
2656 int count = 0;
2657
2658 SHRINK;
2659 GROW;
2660 cur = CUR_CHAR(l);
2661 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2662 ((cur != '&') || (ctxt->token == '&')) &&
2663 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2664 if ((cur == ']') && (NXT(1) == ']') &&
2665 (NXT(2) == '>')) {
2666 if (cdata) break;
2667 else {
2668 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2670 ctxt->sax->error(ctxt->userData,
2671 "Sequence ']]>' not allowed in content\n");
2672 /* Should this be relaxed ??? I see a "must here */
2673 ctxt->wellFormed = 0;
2674 ctxt->disableSAX = 1;
2675 }
2676 }
2677 COPY_BUF(l,buf,nbchar,cur);
2678 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2679 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002680 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002681 */
2682 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2683 if (areBlanks(ctxt, buf, nbchar)) {
2684 if (ctxt->sax->ignorableWhitespace != NULL)
2685 ctxt->sax->ignorableWhitespace(ctxt->userData,
2686 buf, nbchar);
2687 } else {
2688 if (ctxt->sax->characters != NULL)
2689 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2690 }
2691 }
2692 nbchar = 0;
2693 }
2694 count++;
2695 if (count > 50) {
2696 GROW;
2697 count = 0;
2698 }
2699 NEXTL(l);
2700 cur = CUR_CHAR(l);
2701 }
2702 if (nbchar != 0) {
2703 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002704 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002705 */
2706 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2707 if (areBlanks(ctxt, buf, nbchar)) {
2708 if (ctxt->sax->ignorableWhitespace != NULL)
2709 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2710 } else {
2711 if (ctxt->sax->characters != NULL)
2712 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2713 }
2714 }
2715 }
2716}
2717
2718/**
2719 * xmlParseExternalID:
2720 * @ctxt: an XML parser context
2721 * @publicID: a xmlChar** receiving PubidLiteral
2722 * @strict: indicate whether we should restrict parsing to only
2723 * production [75], see NOTE below
2724 *
2725 * Parse an External ID or a Public ID
2726 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002727 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002728 * 'PUBLIC' S PubidLiteral S SystemLiteral
2729 *
2730 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2731 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2732 *
2733 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2734 *
2735 * Returns the function returns SystemLiteral and in the second
2736 * case publicID receives PubidLiteral, is strict is off
2737 * it is possible to return NULL and have publicID set.
2738 */
2739
2740xmlChar *
2741xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2742 xmlChar *URI = NULL;
2743
2744 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002745
2746 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002747 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2748 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2749 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2750 SKIP(6);
2751 if (!IS_BLANK(CUR)) {
2752 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2753 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2754 ctxt->sax->error(ctxt->userData,
2755 "Space required after 'SYSTEM'\n");
2756 ctxt->wellFormed = 0;
2757 ctxt->disableSAX = 1;
2758 }
2759 SKIP_BLANKS;
2760 URI = xmlParseSystemLiteral(ctxt);
2761 if (URI == NULL) {
2762 ctxt->errNo = XML_ERR_URI_REQUIRED;
2763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2764 ctxt->sax->error(ctxt->userData,
2765 "xmlParseExternalID: SYSTEM, no URI\n");
2766 ctxt->wellFormed = 0;
2767 ctxt->disableSAX = 1;
2768 }
2769 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2770 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2771 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2772 SKIP(6);
2773 if (!IS_BLANK(CUR)) {
2774 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2776 ctxt->sax->error(ctxt->userData,
2777 "Space required after 'PUBLIC'\n");
2778 ctxt->wellFormed = 0;
2779 ctxt->disableSAX = 1;
2780 }
2781 SKIP_BLANKS;
2782 *publicID = xmlParsePubidLiteral(ctxt);
2783 if (*publicID == NULL) {
2784 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2786 ctxt->sax->error(ctxt->userData,
2787 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2788 ctxt->wellFormed = 0;
2789 ctxt->disableSAX = 1;
2790 }
2791 if (strict) {
2792 /*
2793 * We don't handle [83] so "S SystemLiteral" is required.
2794 */
2795 if (!IS_BLANK(CUR)) {
2796 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2798 ctxt->sax->error(ctxt->userData,
2799 "Space required after the Public Identifier\n");
2800 ctxt->wellFormed = 0;
2801 ctxt->disableSAX = 1;
2802 }
2803 } else {
2804 /*
2805 * We handle [83] so we return immediately, if
2806 * "S SystemLiteral" is not detected. From a purely parsing
2807 * point of view that's a nice mess.
2808 */
2809 const xmlChar *ptr;
2810 GROW;
2811
2812 ptr = CUR_PTR;
2813 if (!IS_BLANK(*ptr)) return(NULL);
2814
2815 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2816 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2817 }
2818 SKIP_BLANKS;
2819 URI = xmlParseSystemLiteral(ctxt);
2820 if (URI == NULL) {
2821 ctxt->errNo = XML_ERR_URI_REQUIRED;
2822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2823 ctxt->sax->error(ctxt->userData,
2824 "xmlParseExternalID: PUBLIC, no URI\n");
2825 ctxt->wellFormed = 0;
2826 ctxt->disableSAX = 1;
2827 }
2828 }
2829 return(URI);
2830}
2831
2832/**
2833 * xmlParseComment:
2834 * @ctxt: an XML parser context
2835 *
2836 * Skip an XML (SGML) comment <!-- .... -->
2837 * The spec says that "For compatibility, the string "--" (double-hyphen)
2838 * must not occur within comments. "
2839 *
2840 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2841 */
2842void
2843xmlParseComment(xmlParserCtxtPtr ctxt) {
2844 xmlChar *buf = NULL;
2845 int len;
2846 int size = XML_PARSER_BUFFER_SIZE;
2847 int q, ql;
2848 int r, rl;
2849 int cur, l;
2850 xmlParserInputState state;
2851 xmlParserInputPtr input = ctxt->input;
2852 int count = 0;
2853
2854 /*
2855 * Check that there is a comment right here.
2856 */
2857 if ((RAW != '<') || (NXT(1) != '!') ||
2858 (NXT(2) != '-') || (NXT(3) != '-')) return;
2859
2860 state = ctxt->instate;
2861 ctxt->instate = XML_PARSER_COMMENT;
2862 SHRINK;
2863 SKIP(4);
2864 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2865 if (buf == NULL) {
2866 xmlGenericError(xmlGenericErrorContext,
2867 "malloc of %d byte failed\n", size);
2868 ctxt->instate = state;
2869 return;
2870 }
2871 q = CUR_CHAR(ql);
2872 NEXTL(ql);
2873 r = CUR_CHAR(rl);
2874 NEXTL(rl);
2875 cur = CUR_CHAR(l);
2876 len = 0;
2877 while (IS_CHAR(cur) && /* checked */
2878 ((cur != '>') ||
2879 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002880 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002881 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2883 ctxt->sax->error(ctxt->userData,
2884 "Comment must not contain '--' (double-hyphen)`\n");
2885 ctxt->wellFormed = 0;
2886 ctxt->disableSAX = 1;
2887 }
2888 if (len + 5 >= size) {
2889 size *= 2;
2890 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2891 if (buf == NULL) {
2892 xmlGenericError(xmlGenericErrorContext,
2893 "realloc of %d byte failed\n", size);
2894 ctxt->instate = state;
2895 return;
2896 }
2897 }
2898 COPY_BUF(ql,buf,len,q);
2899 q = r;
2900 ql = rl;
2901 r = cur;
2902 rl = l;
2903
2904 count++;
2905 if (count > 50) {
2906 GROW;
2907 count = 0;
2908 }
2909 NEXTL(l);
2910 cur = CUR_CHAR(l);
2911 if (cur == 0) {
2912 SHRINK;
2913 GROW;
2914 cur = CUR_CHAR(l);
2915 }
2916 }
2917 buf[len] = 0;
2918 if (!IS_CHAR(cur)) {
2919 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2921 ctxt->sax->error(ctxt->userData,
2922 "Comment not terminated \n<!--%.50s\n", buf);
2923 ctxt->wellFormed = 0;
2924 ctxt->disableSAX = 1;
2925 xmlFree(buf);
2926 } else {
2927 if (input != ctxt->input) {
2928 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2930 ctxt->sax->error(ctxt->userData,
2931"Comment doesn't start and stop in the same entity\n");
2932 ctxt->wellFormed = 0;
2933 ctxt->disableSAX = 1;
2934 }
2935 NEXT;
2936 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2937 (!ctxt->disableSAX))
2938 ctxt->sax->comment(ctxt->userData, buf);
2939 xmlFree(buf);
2940 }
2941 ctxt->instate = state;
2942}
2943
2944/**
2945 * xmlParsePITarget:
2946 * @ctxt: an XML parser context
2947 *
2948 * parse the name of a PI
2949 *
2950 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2951 *
2952 * Returns the PITarget name or NULL
2953 */
2954
2955xmlChar *
2956xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2957 xmlChar *name;
2958
2959 name = xmlParseName(ctxt);
2960 if ((name != NULL) &&
2961 ((name[0] == 'x') || (name[0] == 'X')) &&
2962 ((name[1] == 'm') || (name[1] == 'M')) &&
2963 ((name[2] == 'l') || (name[2] == 'L'))) {
2964 int i;
2965 if ((name[0] == 'x') && (name[1] == 'm') &&
2966 (name[2] == 'l') && (name[3] == 0)) {
2967 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2969 ctxt->sax->error(ctxt->userData,
2970 "XML declaration allowed only at the start of the document\n");
2971 ctxt->wellFormed = 0;
2972 ctxt->disableSAX = 1;
2973 return(name);
2974 } else if (name[3] == 0) {
2975 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2977 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2978 ctxt->wellFormed = 0;
2979 ctxt->disableSAX = 1;
2980 return(name);
2981 }
2982 for (i = 0;;i++) {
2983 if (xmlW3CPIs[i] == NULL) break;
2984 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2985 return(name);
2986 }
2987 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2988 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2989 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002990 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002991 }
2992 }
2993 return(name);
2994}
2995
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002996#ifdef LIBXML_CATALOG_ENABLED
2997/**
2998 * xmlParseCatalogPI:
2999 * @ctxt: an XML parser context
3000 * @catalog: the PI value string
3001 *
3002 * parse an XML Catalog Processing Instruction.
3003 *
3004 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3005 *
3006 * Occurs only if allowed by the user and if happening in the Misc
3007 * part of the document before any doctype informations
3008 * This will add the given catalog to the parsing context in order
3009 * to be used if there is a resolution need further down in the document
3010 */
3011
3012static void
3013xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3014 xmlChar *URL = NULL;
3015 const xmlChar *tmp, *base;
3016 xmlChar marker;
3017
3018 tmp = catalog;
3019 while (IS_BLANK(*tmp)) tmp++;
3020 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3021 goto error;
3022 tmp += 7;
3023 while (IS_BLANK(*tmp)) tmp++;
3024 if (*tmp != '=') {
3025 return;
3026 }
3027 tmp++;
3028 while (IS_BLANK(*tmp)) tmp++;
3029 marker = *tmp;
3030 if ((marker != '\'') && (marker != '"'))
3031 goto error;
3032 tmp++;
3033 base = tmp;
3034 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3035 if (*tmp == 0)
3036 goto error;
3037 URL = xmlStrndup(base, tmp - base);
3038 tmp++;
3039 while (IS_BLANK(*tmp)) tmp++;
3040 if (*tmp != 0)
3041 goto error;
3042
3043 if (URL != NULL) {
3044 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3045 xmlFree(URL);
3046 }
3047 return;
3048
3049error:
3050 ctxt->errNo = XML_WAR_CATALOG_PI;
3051 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3052 ctxt->sax->warning(ctxt->userData,
3053 "Catalog PI syntax error: %s\n", catalog);
3054 if (URL != NULL)
3055 xmlFree(URL);
3056}
3057#endif
3058
Owen Taylor3473f882001-02-23 17:55:21 +00003059/**
3060 * xmlParsePI:
3061 * @ctxt: an XML parser context
3062 *
3063 * parse an XML Processing Instruction.
3064 *
3065 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3066 *
3067 * The processing is transfered to SAX once parsed.
3068 */
3069
3070void
3071xmlParsePI(xmlParserCtxtPtr ctxt) {
3072 xmlChar *buf = NULL;
3073 int len = 0;
3074 int size = XML_PARSER_BUFFER_SIZE;
3075 int cur, l;
3076 xmlChar *target;
3077 xmlParserInputState state;
3078 int count = 0;
3079
3080 if ((RAW == '<') && (NXT(1) == '?')) {
3081 xmlParserInputPtr input = ctxt->input;
3082 state = ctxt->instate;
3083 ctxt->instate = XML_PARSER_PI;
3084 /*
3085 * this is a Processing Instruction.
3086 */
3087 SKIP(2);
3088 SHRINK;
3089
3090 /*
3091 * Parse the target name and check for special support like
3092 * namespace.
3093 */
3094 target = xmlParsePITarget(ctxt);
3095 if (target != NULL) {
3096 if ((RAW == '?') && (NXT(1) == '>')) {
3097 if (input != ctxt->input) {
3098 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3100 ctxt->sax->error(ctxt->userData,
3101 "PI declaration doesn't start and stop in the same entity\n");
3102 ctxt->wellFormed = 0;
3103 ctxt->disableSAX = 1;
3104 }
3105 SKIP(2);
3106
3107 /*
3108 * SAX: PI detected.
3109 */
3110 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3111 (ctxt->sax->processingInstruction != NULL))
3112 ctxt->sax->processingInstruction(ctxt->userData,
3113 target, NULL);
3114 ctxt->instate = state;
3115 xmlFree(target);
3116 return;
3117 }
3118 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3119 if (buf == NULL) {
3120 xmlGenericError(xmlGenericErrorContext,
3121 "malloc of %d byte failed\n", size);
3122 ctxt->instate = state;
3123 return;
3124 }
3125 cur = CUR;
3126 if (!IS_BLANK(cur)) {
3127 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3129 ctxt->sax->error(ctxt->userData,
3130 "xmlParsePI: PI %s space expected\n", target);
3131 ctxt->wellFormed = 0;
3132 ctxt->disableSAX = 1;
3133 }
3134 SKIP_BLANKS;
3135 cur = CUR_CHAR(l);
3136 while (IS_CHAR(cur) && /* checked */
3137 ((cur != '?') || (NXT(1) != '>'))) {
3138 if (len + 5 >= size) {
3139 size *= 2;
3140 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3141 if (buf == NULL) {
3142 xmlGenericError(xmlGenericErrorContext,
3143 "realloc of %d byte failed\n", size);
3144 ctxt->instate = state;
3145 return;
3146 }
3147 }
3148 count++;
3149 if (count > 50) {
3150 GROW;
3151 count = 0;
3152 }
3153 COPY_BUF(l,buf,len,cur);
3154 NEXTL(l);
3155 cur = CUR_CHAR(l);
3156 if (cur == 0) {
3157 SHRINK;
3158 GROW;
3159 cur = CUR_CHAR(l);
3160 }
3161 }
3162 buf[len] = 0;
3163 if (cur != '?') {
3164 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3166 ctxt->sax->error(ctxt->userData,
3167 "xmlParsePI: PI %s never end ...\n", target);
3168 ctxt->wellFormed = 0;
3169 ctxt->disableSAX = 1;
3170 } else {
3171 if (input != ctxt->input) {
3172 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3173 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3174 ctxt->sax->error(ctxt->userData,
3175 "PI declaration doesn't start and stop in the same entity\n");
3176 ctxt->wellFormed = 0;
3177 ctxt->disableSAX = 1;
3178 }
3179 SKIP(2);
3180
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003181#ifdef LIBXML_CATALOG_ENABLED
3182 if (((state == XML_PARSER_MISC) ||
3183 (state == XML_PARSER_START)) &&
3184 (xmlStrEqual(target, XML_CATALOG_PI))) {
3185 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3186 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3187 (allow == XML_CATA_ALLOW_ALL))
3188 xmlParseCatalogPI(ctxt, buf);
3189 }
3190#endif
3191
3192
Owen Taylor3473f882001-02-23 17:55:21 +00003193 /*
3194 * SAX: PI detected.
3195 */
3196 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3197 (ctxt->sax->processingInstruction != NULL))
3198 ctxt->sax->processingInstruction(ctxt->userData,
3199 target, buf);
3200 }
3201 xmlFree(buf);
3202 xmlFree(target);
3203 } else {
3204 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3206 ctxt->sax->error(ctxt->userData,
3207 "xmlParsePI : no target name\n");
3208 ctxt->wellFormed = 0;
3209 ctxt->disableSAX = 1;
3210 }
3211 ctxt->instate = state;
3212 }
3213}
3214
3215/**
3216 * xmlParseNotationDecl:
3217 * @ctxt: an XML parser context
3218 *
3219 * parse a notation declaration
3220 *
3221 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3222 *
3223 * Hence there is actually 3 choices:
3224 * 'PUBLIC' S PubidLiteral
3225 * 'PUBLIC' S PubidLiteral S SystemLiteral
3226 * and 'SYSTEM' S SystemLiteral
3227 *
3228 * See the NOTE on xmlParseExternalID().
3229 */
3230
3231void
3232xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3233 xmlChar *name;
3234 xmlChar *Pubid;
3235 xmlChar *Systemid;
3236
3237 if ((RAW == '<') && (NXT(1) == '!') &&
3238 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3239 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3240 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3241 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3242 xmlParserInputPtr input = ctxt->input;
3243 SHRINK;
3244 SKIP(10);
3245 if (!IS_BLANK(CUR)) {
3246 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3248 ctxt->sax->error(ctxt->userData,
3249 "Space required after '<!NOTATION'\n");
3250 ctxt->wellFormed = 0;
3251 ctxt->disableSAX = 1;
3252 return;
3253 }
3254 SKIP_BLANKS;
3255
Daniel Veillard76d66f42001-05-16 21:05:17 +00003256 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003257 if (name == NULL) {
3258 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3259 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3260 ctxt->sax->error(ctxt->userData,
3261 "NOTATION: Name expected here\n");
3262 ctxt->wellFormed = 0;
3263 ctxt->disableSAX = 1;
3264 return;
3265 }
3266 if (!IS_BLANK(CUR)) {
3267 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3269 ctxt->sax->error(ctxt->userData,
3270 "Space required after the NOTATION name'\n");
3271 ctxt->wellFormed = 0;
3272 ctxt->disableSAX = 1;
3273 return;
3274 }
3275 SKIP_BLANKS;
3276
3277 /*
3278 * Parse the IDs.
3279 */
3280 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3281 SKIP_BLANKS;
3282
3283 if (RAW == '>') {
3284 if (input != ctxt->input) {
3285 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3287 ctxt->sax->error(ctxt->userData,
3288"Notation declaration doesn't start and stop in the same entity\n");
3289 ctxt->wellFormed = 0;
3290 ctxt->disableSAX = 1;
3291 }
3292 NEXT;
3293 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3294 (ctxt->sax->notationDecl != NULL))
3295 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3296 } else {
3297 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3298 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3299 ctxt->sax->error(ctxt->userData,
3300 "'>' required to close NOTATION declaration\n");
3301 ctxt->wellFormed = 0;
3302 ctxt->disableSAX = 1;
3303 }
3304 xmlFree(name);
3305 if (Systemid != NULL) xmlFree(Systemid);
3306 if (Pubid != NULL) xmlFree(Pubid);
3307 }
3308}
3309
3310/**
3311 * xmlParseEntityDecl:
3312 * @ctxt: an XML parser context
3313 *
3314 * parse <!ENTITY declarations
3315 *
3316 * [70] EntityDecl ::= GEDecl | PEDecl
3317 *
3318 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3319 *
3320 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3321 *
3322 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3323 *
3324 * [74] PEDef ::= EntityValue | ExternalID
3325 *
3326 * [76] NDataDecl ::= S 'NDATA' S Name
3327 *
3328 * [ VC: Notation Declared ]
3329 * The Name must match the declared name of a notation.
3330 */
3331
3332void
3333xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3334 xmlChar *name = NULL;
3335 xmlChar *value = NULL;
3336 xmlChar *URI = NULL, *literal = NULL;
3337 xmlChar *ndata = NULL;
3338 int isParameter = 0;
3339 xmlChar *orig = NULL;
3340
3341 GROW;
3342 if ((RAW == '<') && (NXT(1) == '!') &&
3343 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3344 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3345 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3346 xmlParserInputPtr input = ctxt->input;
3347 ctxt->instate = XML_PARSER_ENTITY_DECL;
3348 SHRINK;
3349 SKIP(8);
3350 if (!IS_BLANK(CUR)) {
3351 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3353 ctxt->sax->error(ctxt->userData,
3354 "Space required after '<!ENTITY'\n");
3355 ctxt->wellFormed = 0;
3356 ctxt->disableSAX = 1;
3357 }
3358 SKIP_BLANKS;
3359
3360 if (RAW == '%') {
3361 NEXT;
3362 if (!IS_BLANK(CUR)) {
3363 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3365 ctxt->sax->error(ctxt->userData,
3366 "Space required after '%'\n");
3367 ctxt->wellFormed = 0;
3368 ctxt->disableSAX = 1;
3369 }
3370 SKIP_BLANKS;
3371 isParameter = 1;
3372 }
3373
Daniel Veillard76d66f42001-05-16 21:05:17 +00003374 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003375 if (name == NULL) {
3376 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3378 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3379 ctxt->wellFormed = 0;
3380 ctxt->disableSAX = 1;
3381 return;
3382 }
3383 if (!IS_BLANK(CUR)) {
3384 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3386 ctxt->sax->error(ctxt->userData,
3387 "Space required after the entity name\n");
3388 ctxt->wellFormed = 0;
3389 ctxt->disableSAX = 1;
3390 }
3391 SKIP_BLANKS;
3392
3393 /*
3394 * handle the various case of definitions...
3395 */
3396 if (isParameter) {
3397 if ((RAW == '"') || (RAW == '\'')) {
3398 value = xmlParseEntityValue(ctxt, &orig);
3399 if (value) {
3400 if ((ctxt->sax != NULL) &&
3401 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3402 ctxt->sax->entityDecl(ctxt->userData, name,
3403 XML_INTERNAL_PARAMETER_ENTITY,
3404 NULL, NULL, value);
3405 }
3406 } else {
3407 URI = xmlParseExternalID(ctxt, &literal, 1);
3408 if ((URI == NULL) && (literal == NULL)) {
3409 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3411 ctxt->sax->error(ctxt->userData,
3412 "Entity value required\n");
3413 ctxt->wellFormed = 0;
3414 ctxt->disableSAX = 1;
3415 }
3416 if (URI) {
3417 xmlURIPtr uri;
3418
3419 uri = xmlParseURI((const char *) URI);
3420 if (uri == NULL) {
3421 ctxt->errNo = XML_ERR_INVALID_URI;
3422 if ((ctxt->sax != NULL) &&
3423 (!ctxt->disableSAX) &&
3424 (ctxt->sax->error != NULL))
3425 ctxt->sax->error(ctxt->userData,
3426 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003427 /*
3428 * This really ought to be a well formedness error
3429 * but the XML Core WG decided otherwise c.f. issue
3430 * E26 of the XML erratas.
3431 */
Owen Taylor3473f882001-02-23 17:55:21 +00003432 } else {
3433 if (uri->fragment != NULL) {
3434 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3435 if ((ctxt->sax != NULL) &&
3436 (!ctxt->disableSAX) &&
3437 (ctxt->sax->error != NULL))
3438 ctxt->sax->error(ctxt->userData,
3439 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003440 /*
3441 * Okay this is foolish to block those but not
3442 * invalid URIs.
3443 */
Owen Taylor3473f882001-02-23 17:55:21 +00003444 ctxt->wellFormed = 0;
3445 } else {
3446 if ((ctxt->sax != NULL) &&
3447 (!ctxt->disableSAX) &&
3448 (ctxt->sax->entityDecl != NULL))
3449 ctxt->sax->entityDecl(ctxt->userData, name,
3450 XML_EXTERNAL_PARAMETER_ENTITY,
3451 literal, URI, NULL);
3452 }
3453 xmlFreeURI(uri);
3454 }
3455 }
3456 }
3457 } else {
3458 if ((RAW == '"') || (RAW == '\'')) {
3459 value = xmlParseEntityValue(ctxt, &orig);
3460 if ((ctxt->sax != NULL) &&
3461 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3462 ctxt->sax->entityDecl(ctxt->userData, name,
3463 XML_INTERNAL_GENERAL_ENTITY,
3464 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003465 /*
3466 * For expat compatibility in SAX mode.
3467 */
3468 if ((ctxt->myDoc == NULL) ||
3469 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3470 if (ctxt->myDoc == NULL) {
3471 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3472 }
3473 if (ctxt->myDoc->intSubset == NULL)
3474 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3475 BAD_CAST "fake", NULL, NULL);
3476
3477 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3478 NULL, NULL, value);
3479 }
Owen Taylor3473f882001-02-23 17:55:21 +00003480 } else {
3481 URI = xmlParseExternalID(ctxt, &literal, 1);
3482 if ((URI == NULL) && (literal == NULL)) {
3483 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3485 ctxt->sax->error(ctxt->userData,
3486 "Entity value required\n");
3487 ctxt->wellFormed = 0;
3488 ctxt->disableSAX = 1;
3489 }
3490 if (URI) {
3491 xmlURIPtr uri;
3492
3493 uri = xmlParseURI((const char *)URI);
3494 if (uri == NULL) {
3495 ctxt->errNo = XML_ERR_INVALID_URI;
3496 if ((ctxt->sax != NULL) &&
3497 (!ctxt->disableSAX) &&
3498 (ctxt->sax->error != NULL))
3499 ctxt->sax->error(ctxt->userData,
3500 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003501 /*
3502 * This really ought to be a well formedness error
3503 * but the XML Core WG decided otherwise c.f. issue
3504 * E26 of the XML erratas.
3505 */
Owen Taylor3473f882001-02-23 17:55:21 +00003506 } else {
3507 if (uri->fragment != NULL) {
3508 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3509 if ((ctxt->sax != NULL) &&
3510 (!ctxt->disableSAX) &&
3511 (ctxt->sax->error != NULL))
3512 ctxt->sax->error(ctxt->userData,
3513 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003514 /*
3515 * Okay this is foolish to block those but not
3516 * invalid URIs.
3517 */
Owen Taylor3473f882001-02-23 17:55:21 +00003518 ctxt->wellFormed = 0;
3519 }
3520 xmlFreeURI(uri);
3521 }
3522 }
3523 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3524 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3526 ctxt->sax->error(ctxt->userData,
3527 "Space required before 'NDATA'\n");
3528 ctxt->wellFormed = 0;
3529 ctxt->disableSAX = 1;
3530 }
3531 SKIP_BLANKS;
3532 if ((RAW == 'N') && (NXT(1) == 'D') &&
3533 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3534 (NXT(4) == 'A')) {
3535 SKIP(5);
3536 if (!IS_BLANK(CUR)) {
3537 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3539 ctxt->sax->error(ctxt->userData,
3540 "Space required after 'NDATA'\n");
3541 ctxt->wellFormed = 0;
3542 ctxt->disableSAX = 1;
3543 }
3544 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003545 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003546 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3547 (ctxt->sax->unparsedEntityDecl != NULL))
3548 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3549 literal, URI, ndata);
3550 } else {
3551 if ((ctxt->sax != NULL) &&
3552 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3553 ctxt->sax->entityDecl(ctxt->userData, name,
3554 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3555 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003556 /*
3557 * For expat compatibility in SAX mode.
3558 * assuming the entity repalcement was asked for
3559 */
3560 if ((ctxt->replaceEntities != 0) &&
3561 ((ctxt->myDoc == NULL) ||
3562 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3563 if (ctxt->myDoc == NULL) {
3564 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3565 }
3566
3567 if (ctxt->myDoc->intSubset == NULL)
3568 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3569 BAD_CAST "fake", NULL, NULL);
3570 entityDecl(ctxt, name,
3571 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3572 literal, URI, NULL);
3573 }
Owen Taylor3473f882001-02-23 17:55:21 +00003574 }
3575 }
3576 }
3577 SKIP_BLANKS;
3578 if (RAW != '>') {
3579 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3581 ctxt->sax->error(ctxt->userData,
3582 "xmlParseEntityDecl: entity %s not terminated\n", name);
3583 ctxt->wellFormed = 0;
3584 ctxt->disableSAX = 1;
3585 } else {
3586 if (input != ctxt->input) {
3587 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3589 ctxt->sax->error(ctxt->userData,
3590"Entity declaration doesn't start and stop in the same entity\n");
3591 ctxt->wellFormed = 0;
3592 ctxt->disableSAX = 1;
3593 }
3594 NEXT;
3595 }
3596 if (orig != NULL) {
3597 /*
3598 * Ugly mechanism to save the raw entity value.
3599 */
3600 xmlEntityPtr cur = NULL;
3601
3602 if (isParameter) {
3603 if ((ctxt->sax != NULL) &&
3604 (ctxt->sax->getParameterEntity != NULL))
3605 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3606 } else {
3607 if ((ctxt->sax != NULL) &&
3608 (ctxt->sax->getEntity != NULL))
3609 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003610 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3611 cur = getEntity(ctxt, name);
3612 }
Owen Taylor3473f882001-02-23 17:55:21 +00003613 }
3614 if (cur != NULL) {
3615 if (cur->orig != NULL)
3616 xmlFree(orig);
3617 else
3618 cur->orig = orig;
3619 } else
3620 xmlFree(orig);
3621 }
3622 if (name != NULL) xmlFree(name);
3623 if (value != NULL) xmlFree(value);
3624 if (URI != NULL) xmlFree(URI);
3625 if (literal != NULL) xmlFree(literal);
3626 if (ndata != NULL) xmlFree(ndata);
3627 }
3628}
3629
3630/**
3631 * xmlParseDefaultDecl:
3632 * @ctxt: an XML parser context
3633 * @value: Receive a possible fixed default value for the attribute
3634 *
3635 * Parse an attribute default declaration
3636 *
3637 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3638 *
3639 * [ VC: Required Attribute ]
3640 * if the default declaration is the keyword #REQUIRED, then the
3641 * attribute must be specified for all elements of the type in the
3642 * attribute-list declaration.
3643 *
3644 * [ VC: Attribute Default Legal ]
3645 * The declared default value must meet the lexical constraints of
3646 * the declared attribute type c.f. xmlValidateAttributeDecl()
3647 *
3648 * [ VC: Fixed Attribute Default ]
3649 * if an attribute has a default value declared with the #FIXED
3650 * keyword, instances of that attribute must match the default value.
3651 *
3652 * [ WFC: No < in Attribute Values ]
3653 * handled in xmlParseAttValue()
3654 *
3655 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3656 * or XML_ATTRIBUTE_FIXED.
3657 */
3658
3659int
3660xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3661 int val;
3662 xmlChar *ret;
3663
3664 *value = NULL;
3665 if ((RAW == '#') && (NXT(1) == 'R') &&
3666 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3667 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3668 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3669 (NXT(8) == 'D')) {
3670 SKIP(9);
3671 return(XML_ATTRIBUTE_REQUIRED);
3672 }
3673 if ((RAW == '#') && (NXT(1) == 'I') &&
3674 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3675 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3676 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3677 SKIP(8);
3678 return(XML_ATTRIBUTE_IMPLIED);
3679 }
3680 val = XML_ATTRIBUTE_NONE;
3681 if ((RAW == '#') && (NXT(1) == 'F') &&
3682 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3683 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3684 SKIP(6);
3685 val = XML_ATTRIBUTE_FIXED;
3686 if (!IS_BLANK(CUR)) {
3687 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3689 ctxt->sax->error(ctxt->userData,
3690 "Space required after '#FIXED'\n");
3691 ctxt->wellFormed = 0;
3692 ctxt->disableSAX = 1;
3693 }
3694 SKIP_BLANKS;
3695 }
3696 ret = xmlParseAttValue(ctxt);
3697 ctxt->instate = XML_PARSER_DTD;
3698 if (ret == NULL) {
3699 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3700 ctxt->sax->error(ctxt->userData,
3701 "Attribute default value declaration error\n");
3702 ctxt->wellFormed = 0;
3703 ctxt->disableSAX = 1;
3704 } else
3705 *value = ret;
3706 return(val);
3707}
3708
3709/**
3710 * xmlParseNotationType:
3711 * @ctxt: an XML parser context
3712 *
3713 * parse an Notation attribute type.
3714 *
3715 * Note: the leading 'NOTATION' S part has already being parsed...
3716 *
3717 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3718 *
3719 * [ VC: Notation Attributes ]
3720 * Values of this type must match one of the notation names included
3721 * in the declaration; all notation names in the declaration must be declared.
3722 *
3723 * Returns: the notation attribute tree built while parsing
3724 */
3725
3726xmlEnumerationPtr
3727xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3728 xmlChar *name;
3729 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3730
3731 if (RAW != '(') {
3732 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3734 ctxt->sax->error(ctxt->userData,
3735 "'(' required to start 'NOTATION'\n");
3736 ctxt->wellFormed = 0;
3737 ctxt->disableSAX = 1;
3738 return(NULL);
3739 }
3740 SHRINK;
3741 do {
3742 NEXT;
3743 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003744 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003745 if (name == NULL) {
3746 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3748 ctxt->sax->error(ctxt->userData,
3749 "Name expected in NOTATION declaration\n");
3750 ctxt->wellFormed = 0;
3751 ctxt->disableSAX = 1;
3752 return(ret);
3753 }
3754 cur = xmlCreateEnumeration(name);
3755 xmlFree(name);
3756 if (cur == NULL) return(ret);
3757 if (last == NULL) ret = last = cur;
3758 else {
3759 last->next = cur;
3760 last = cur;
3761 }
3762 SKIP_BLANKS;
3763 } while (RAW == '|');
3764 if (RAW != ')') {
3765 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3767 ctxt->sax->error(ctxt->userData,
3768 "')' required to finish NOTATION declaration\n");
3769 ctxt->wellFormed = 0;
3770 ctxt->disableSAX = 1;
3771 if ((last != NULL) && (last != ret))
3772 xmlFreeEnumeration(last);
3773 return(ret);
3774 }
3775 NEXT;
3776 return(ret);
3777}
3778
3779/**
3780 * xmlParseEnumerationType:
3781 * @ctxt: an XML parser context
3782 *
3783 * parse an Enumeration attribute type.
3784 *
3785 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3786 *
3787 * [ VC: Enumeration ]
3788 * Values of this type must match one of the Nmtoken tokens in
3789 * the declaration
3790 *
3791 * Returns: the enumeration attribute tree built while parsing
3792 */
3793
3794xmlEnumerationPtr
3795xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3796 xmlChar *name;
3797 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3798
3799 if (RAW != '(') {
3800 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3802 ctxt->sax->error(ctxt->userData,
3803 "'(' required to start ATTLIST enumeration\n");
3804 ctxt->wellFormed = 0;
3805 ctxt->disableSAX = 1;
3806 return(NULL);
3807 }
3808 SHRINK;
3809 do {
3810 NEXT;
3811 SKIP_BLANKS;
3812 name = xmlParseNmtoken(ctxt);
3813 if (name == NULL) {
3814 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3815 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3816 ctxt->sax->error(ctxt->userData,
3817 "NmToken expected in ATTLIST enumeration\n");
3818 ctxt->wellFormed = 0;
3819 ctxt->disableSAX = 1;
3820 return(ret);
3821 }
3822 cur = xmlCreateEnumeration(name);
3823 xmlFree(name);
3824 if (cur == NULL) return(ret);
3825 if (last == NULL) ret = last = cur;
3826 else {
3827 last->next = cur;
3828 last = cur;
3829 }
3830 SKIP_BLANKS;
3831 } while (RAW == '|');
3832 if (RAW != ')') {
3833 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3834 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3835 ctxt->sax->error(ctxt->userData,
3836 "')' required to finish ATTLIST enumeration\n");
3837 ctxt->wellFormed = 0;
3838 ctxt->disableSAX = 1;
3839 return(ret);
3840 }
3841 NEXT;
3842 return(ret);
3843}
3844
3845/**
3846 * xmlParseEnumeratedType:
3847 * @ctxt: an XML parser context
3848 * @tree: the enumeration tree built while parsing
3849 *
3850 * parse an Enumerated attribute type.
3851 *
3852 * [57] EnumeratedType ::= NotationType | Enumeration
3853 *
3854 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3855 *
3856 *
3857 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3858 */
3859
3860int
3861xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3862 if ((RAW == 'N') && (NXT(1) == 'O') &&
3863 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3864 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3865 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3866 SKIP(8);
3867 if (!IS_BLANK(CUR)) {
3868 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3870 ctxt->sax->error(ctxt->userData,
3871 "Space required after 'NOTATION'\n");
3872 ctxt->wellFormed = 0;
3873 ctxt->disableSAX = 1;
3874 return(0);
3875 }
3876 SKIP_BLANKS;
3877 *tree = xmlParseNotationType(ctxt);
3878 if (*tree == NULL) return(0);
3879 return(XML_ATTRIBUTE_NOTATION);
3880 }
3881 *tree = xmlParseEnumerationType(ctxt);
3882 if (*tree == NULL) return(0);
3883 return(XML_ATTRIBUTE_ENUMERATION);
3884}
3885
3886/**
3887 * xmlParseAttributeType:
3888 * @ctxt: an XML parser context
3889 * @tree: the enumeration tree built while parsing
3890 *
3891 * parse the Attribute list def for an element
3892 *
3893 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3894 *
3895 * [55] StringType ::= 'CDATA'
3896 *
3897 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3898 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3899 *
3900 * Validity constraints for attribute values syntax are checked in
3901 * xmlValidateAttributeValue()
3902 *
3903 * [ VC: ID ]
3904 * Values of type ID must match the Name production. A name must not
3905 * appear more than once in an XML document as a value of this type;
3906 * i.e., ID values must uniquely identify the elements which bear them.
3907 *
3908 * [ VC: One ID per Element Type ]
3909 * No element type may have more than one ID attribute specified.
3910 *
3911 * [ VC: ID Attribute Default ]
3912 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3913 *
3914 * [ VC: IDREF ]
3915 * Values of type IDREF must match the Name production, and values
3916 * of type IDREFS must match Names; each IDREF Name must match the value
3917 * of an ID attribute on some element in the XML document; i.e. IDREF
3918 * values must match the value of some ID attribute.
3919 *
3920 * [ VC: Entity Name ]
3921 * Values of type ENTITY must match the Name production, values
3922 * of type ENTITIES must match Names; each Entity Name must match the
3923 * name of an unparsed entity declared in the DTD.
3924 *
3925 * [ VC: Name Token ]
3926 * Values of type NMTOKEN must match the Nmtoken production; values
3927 * of type NMTOKENS must match Nmtokens.
3928 *
3929 * Returns the attribute type
3930 */
3931int
3932xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3933 SHRINK;
3934 if ((RAW == 'C') && (NXT(1) == 'D') &&
3935 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3936 (NXT(4) == 'A')) {
3937 SKIP(5);
3938 return(XML_ATTRIBUTE_CDATA);
3939 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3940 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3941 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3942 SKIP(6);
3943 return(XML_ATTRIBUTE_IDREFS);
3944 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3945 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3946 (NXT(4) == 'F')) {
3947 SKIP(5);
3948 return(XML_ATTRIBUTE_IDREF);
3949 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3950 SKIP(2);
3951 return(XML_ATTRIBUTE_ID);
3952 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3953 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3954 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3955 SKIP(6);
3956 return(XML_ATTRIBUTE_ENTITY);
3957 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3958 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3959 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3960 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3961 SKIP(8);
3962 return(XML_ATTRIBUTE_ENTITIES);
3963 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3964 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3965 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3966 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3967 SKIP(8);
3968 return(XML_ATTRIBUTE_NMTOKENS);
3969 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3970 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3971 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3972 (NXT(6) == 'N')) {
3973 SKIP(7);
3974 return(XML_ATTRIBUTE_NMTOKEN);
3975 }
3976 return(xmlParseEnumeratedType(ctxt, tree));
3977}
3978
3979/**
3980 * xmlParseAttributeListDecl:
3981 * @ctxt: an XML parser context
3982 *
3983 * : parse the Attribute list def for an element
3984 *
3985 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3986 *
3987 * [53] AttDef ::= S Name S AttType S DefaultDecl
3988 *
3989 */
3990void
3991xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3992 xmlChar *elemName;
3993 xmlChar *attrName;
3994 xmlEnumerationPtr tree;
3995
3996 if ((RAW == '<') && (NXT(1) == '!') &&
3997 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3998 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3999 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4000 (NXT(8) == 'T')) {
4001 xmlParserInputPtr input = ctxt->input;
4002
4003 SKIP(9);
4004 if (!IS_BLANK(CUR)) {
4005 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4007 ctxt->sax->error(ctxt->userData,
4008 "Space required after '<!ATTLIST'\n");
4009 ctxt->wellFormed = 0;
4010 ctxt->disableSAX = 1;
4011 }
4012 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004013 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004014 if (elemName == NULL) {
4015 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4017 ctxt->sax->error(ctxt->userData,
4018 "ATTLIST: no name for Element\n");
4019 ctxt->wellFormed = 0;
4020 ctxt->disableSAX = 1;
4021 return;
4022 }
4023 SKIP_BLANKS;
4024 GROW;
4025 while (RAW != '>') {
4026 const xmlChar *check = CUR_PTR;
4027 int type;
4028 int def;
4029 xmlChar *defaultValue = NULL;
4030
4031 GROW;
4032 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004033 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004034 if (attrName == NULL) {
4035 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4037 ctxt->sax->error(ctxt->userData,
4038 "ATTLIST: no name for Attribute\n");
4039 ctxt->wellFormed = 0;
4040 ctxt->disableSAX = 1;
4041 break;
4042 }
4043 GROW;
4044 if (!IS_BLANK(CUR)) {
4045 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4047 ctxt->sax->error(ctxt->userData,
4048 "Space required after the attribute name\n");
4049 ctxt->wellFormed = 0;
4050 ctxt->disableSAX = 1;
4051 if (attrName != NULL)
4052 xmlFree(attrName);
4053 if (defaultValue != NULL)
4054 xmlFree(defaultValue);
4055 break;
4056 }
4057 SKIP_BLANKS;
4058
4059 type = xmlParseAttributeType(ctxt, &tree);
4060 if (type <= 0) {
4061 if (attrName != NULL)
4062 xmlFree(attrName);
4063 if (defaultValue != NULL)
4064 xmlFree(defaultValue);
4065 break;
4066 }
4067
4068 GROW;
4069 if (!IS_BLANK(CUR)) {
4070 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4072 ctxt->sax->error(ctxt->userData,
4073 "Space required after the attribute type\n");
4074 ctxt->wellFormed = 0;
4075 ctxt->disableSAX = 1;
4076 if (attrName != NULL)
4077 xmlFree(attrName);
4078 if (defaultValue != NULL)
4079 xmlFree(defaultValue);
4080 if (tree != NULL)
4081 xmlFreeEnumeration(tree);
4082 break;
4083 }
4084 SKIP_BLANKS;
4085
4086 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4087 if (def <= 0) {
4088 if (attrName != NULL)
4089 xmlFree(attrName);
4090 if (defaultValue != NULL)
4091 xmlFree(defaultValue);
4092 if (tree != NULL)
4093 xmlFreeEnumeration(tree);
4094 break;
4095 }
4096
4097 GROW;
4098 if (RAW != '>') {
4099 if (!IS_BLANK(CUR)) {
4100 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4102 ctxt->sax->error(ctxt->userData,
4103 "Space required after the attribute default value\n");
4104 ctxt->wellFormed = 0;
4105 ctxt->disableSAX = 1;
4106 if (attrName != NULL)
4107 xmlFree(attrName);
4108 if (defaultValue != NULL)
4109 xmlFree(defaultValue);
4110 if (tree != NULL)
4111 xmlFreeEnumeration(tree);
4112 break;
4113 }
4114 SKIP_BLANKS;
4115 }
4116 if (check == CUR_PTR) {
4117 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4119 ctxt->sax->error(ctxt->userData,
4120 "xmlParseAttributeListDecl: detected internal error\n");
4121 if (attrName != NULL)
4122 xmlFree(attrName);
4123 if (defaultValue != NULL)
4124 xmlFree(defaultValue);
4125 if (tree != NULL)
4126 xmlFreeEnumeration(tree);
4127 break;
4128 }
4129 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4130 (ctxt->sax->attributeDecl != NULL))
4131 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4132 type, def, defaultValue, tree);
4133 if (attrName != NULL)
4134 xmlFree(attrName);
4135 if (defaultValue != NULL)
4136 xmlFree(defaultValue);
4137 GROW;
4138 }
4139 if (RAW == '>') {
4140 if (input != ctxt->input) {
4141 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4143 ctxt->sax->error(ctxt->userData,
4144"Attribute list declaration doesn't start and stop in the same entity\n");
4145 ctxt->wellFormed = 0;
4146 ctxt->disableSAX = 1;
4147 }
4148 NEXT;
4149 }
4150
4151 xmlFree(elemName);
4152 }
4153}
4154
4155/**
4156 * xmlParseElementMixedContentDecl:
4157 * @ctxt: an XML parser context
4158 *
4159 * parse the declaration for a Mixed Element content
4160 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4161 *
4162 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4163 * '(' S? '#PCDATA' S? ')'
4164 *
4165 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4166 *
4167 * [ VC: No Duplicate Types ]
4168 * The same name must not appear more than once in a single
4169 * mixed-content declaration.
4170 *
4171 * returns: the list of the xmlElementContentPtr describing the element choices
4172 */
4173xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004174xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004175 xmlElementContentPtr ret = NULL, cur = NULL, n;
4176 xmlChar *elem = NULL;
4177
4178 GROW;
4179 if ((RAW == '#') && (NXT(1) == 'P') &&
4180 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4181 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4182 (NXT(6) == 'A')) {
4183 SKIP(7);
4184 SKIP_BLANKS;
4185 SHRINK;
4186 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004187 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4188 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4189 if (ctxt->vctxt.error != NULL)
4190 ctxt->vctxt.error(ctxt->vctxt.userData,
4191"Element content declaration doesn't start and stop in the same entity\n");
4192 ctxt->valid = 0;
4193 }
Owen Taylor3473f882001-02-23 17:55:21 +00004194 NEXT;
4195 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4196 if (RAW == '*') {
4197 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4198 NEXT;
4199 }
4200 return(ret);
4201 }
4202 if ((RAW == '(') || (RAW == '|')) {
4203 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4204 if (ret == NULL) return(NULL);
4205 }
4206 while (RAW == '|') {
4207 NEXT;
4208 if (elem == NULL) {
4209 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4210 if (ret == NULL) return(NULL);
4211 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004212 if (cur != NULL)
4213 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004214 cur = ret;
4215 } else {
4216 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4217 if (n == NULL) return(NULL);
4218 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004219 if (n->c1 != NULL)
4220 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004221 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004222 if (n != NULL)
4223 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004224 cur = n;
4225 xmlFree(elem);
4226 }
4227 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004228 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004229 if (elem == NULL) {
4230 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4231 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4232 ctxt->sax->error(ctxt->userData,
4233 "xmlParseElementMixedContentDecl : Name expected\n");
4234 ctxt->wellFormed = 0;
4235 ctxt->disableSAX = 1;
4236 xmlFreeElementContent(cur);
4237 return(NULL);
4238 }
4239 SKIP_BLANKS;
4240 GROW;
4241 }
4242 if ((RAW == ')') && (NXT(1) == '*')) {
4243 if (elem != NULL) {
4244 cur->c2 = xmlNewElementContent(elem,
4245 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004246 if (cur->c2 != NULL)
4247 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004248 xmlFree(elem);
4249 }
4250 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004251 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4252 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4253 if (ctxt->vctxt.error != NULL)
4254 ctxt->vctxt.error(ctxt->vctxt.userData,
4255"Element content declaration doesn't start and stop in the same entity\n");
4256 ctxt->valid = 0;
4257 }
Owen Taylor3473f882001-02-23 17:55:21 +00004258 SKIP(2);
4259 } else {
4260 if (elem != NULL) xmlFree(elem);
4261 xmlFreeElementContent(ret);
4262 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4264 ctxt->sax->error(ctxt->userData,
4265 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4266 ctxt->wellFormed = 0;
4267 ctxt->disableSAX = 1;
4268 return(NULL);
4269 }
4270
4271 } else {
4272 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4274 ctxt->sax->error(ctxt->userData,
4275 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4276 ctxt->wellFormed = 0;
4277 ctxt->disableSAX = 1;
4278 }
4279 return(ret);
4280}
4281
4282/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004283 * xmlParseElementChildrenContentD:
4284 * @ctxt: an XML parser context
4285 *
4286 * VMS version of xmlParseElementChildrenContentDecl()
4287 *
4288 * Returns the tree of xmlElementContentPtr describing the element
4289 * hierarchy.
4290 */
4291/**
Owen Taylor3473f882001-02-23 17:55:21 +00004292 * xmlParseElementChildrenContentDecl:
4293 * @ctxt: an XML parser context
4294 *
4295 * parse the declaration for a Mixed Element content
4296 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4297 *
4298 *
4299 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4300 *
4301 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4302 *
4303 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4304 *
4305 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4306 *
4307 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4308 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004309 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004310 * opening or closing parentheses in a choice, seq, or Mixed
4311 * construct is contained in the replacement text for a parameter
4312 * entity, both must be contained in the same replacement text. For
4313 * interoperability, if a parameter-entity reference appears in a
4314 * choice, seq, or Mixed construct, its replacement text should not
4315 * be empty, and neither the first nor last non-blank character of
4316 * the replacement text should be a connector (| or ,).
4317 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004318 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004319 * hierarchy.
4320 */
4321xmlElementContentPtr
4322#ifdef VMS
4323xmlParseElementChildrenContentD
4324#else
4325xmlParseElementChildrenContentDecl
4326#endif
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004327(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004328 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4329 xmlChar *elem;
4330 xmlChar type = 0;
4331
4332 SKIP_BLANKS;
4333 GROW;
4334 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004335 xmlParserInputPtr input = ctxt->input;
4336
Owen Taylor3473f882001-02-23 17:55:21 +00004337 /* Recurse on first child */
4338 NEXT;
4339 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004340 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004341 SKIP_BLANKS;
4342 GROW;
4343 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004344 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004345 if (elem == NULL) {
4346 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4348 ctxt->sax->error(ctxt->userData,
4349 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4350 ctxt->wellFormed = 0;
4351 ctxt->disableSAX = 1;
4352 return(NULL);
4353 }
4354 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4355 GROW;
4356 if (RAW == '?') {
4357 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4358 NEXT;
4359 } else if (RAW == '*') {
4360 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4361 NEXT;
4362 } else if (RAW == '+') {
4363 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4364 NEXT;
4365 } else {
4366 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4367 }
4368 xmlFree(elem);
4369 GROW;
4370 }
4371 SKIP_BLANKS;
4372 SHRINK;
4373 while (RAW != ')') {
4374 /*
4375 * Each loop we parse one separator and one element.
4376 */
4377 if (RAW == ',') {
4378 if (type == 0) type = CUR;
4379
4380 /*
4381 * Detect "Name | Name , Name" error
4382 */
4383 else if (type != CUR) {
4384 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4386 ctxt->sax->error(ctxt->userData,
4387 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4388 type);
4389 ctxt->wellFormed = 0;
4390 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004391 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004392 xmlFreeElementContent(last);
4393 if (ret != NULL)
4394 xmlFreeElementContent(ret);
4395 return(NULL);
4396 }
4397 NEXT;
4398
4399 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4400 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004401 if ((last != NULL) && (last != ret))
4402 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004403 xmlFreeElementContent(ret);
4404 return(NULL);
4405 }
4406 if (last == NULL) {
4407 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004408 if (ret != NULL)
4409 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004410 ret = cur = op;
4411 } else {
4412 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004413 if (op != NULL)
4414 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004415 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004416 if (last != NULL)
4417 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004418 cur =op;
4419 last = NULL;
4420 }
4421 } else if (RAW == '|') {
4422 if (type == 0) type = CUR;
4423
4424 /*
4425 * Detect "Name , Name | Name" error
4426 */
4427 else if (type != CUR) {
4428 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4430 ctxt->sax->error(ctxt->userData,
4431 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4432 type);
4433 ctxt->wellFormed = 0;
4434 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004435 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004436 xmlFreeElementContent(last);
4437 if (ret != NULL)
4438 xmlFreeElementContent(ret);
4439 return(NULL);
4440 }
4441 NEXT;
4442
4443 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4444 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004445 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004446 xmlFreeElementContent(last);
4447 if (ret != NULL)
4448 xmlFreeElementContent(ret);
4449 return(NULL);
4450 }
4451 if (last == NULL) {
4452 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004453 if (ret != NULL)
4454 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004455 ret = cur = op;
4456 } else {
4457 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004458 if (op != NULL)
4459 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004460 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004461 if (last != NULL)
4462 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004463 cur =op;
4464 last = NULL;
4465 }
4466 } else {
4467 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4469 ctxt->sax->error(ctxt->userData,
4470 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4471 ctxt->wellFormed = 0;
4472 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004473 if (ret != NULL)
4474 xmlFreeElementContent(ret);
4475 return(NULL);
4476 }
4477 GROW;
4478 SKIP_BLANKS;
4479 GROW;
4480 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004481 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004482 /* Recurse on second child */
4483 NEXT;
4484 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004485 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004486 SKIP_BLANKS;
4487 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004488 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004489 if (elem == NULL) {
4490 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4492 ctxt->sax->error(ctxt->userData,
4493 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4494 ctxt->wellFormed = 0;
4495 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004496 if (ret != NULL)
4497 xmlFreeElementContent(ret);
4498 return(NULL);
4499 }
4500 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4501 xmlFree(elem);
4502 if (RAW == '?') {
4503 last->ocur = XML_ELEMENT_CONTENT_OPT;
4504 NEXT;
4505 } else if (RAW == '*') {
4506 last->ocur = XML_ELEMENT_CONTENT_MULT;
4507 NEXT;
4508 } else if (RAW == '+') {
4509 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4510 NEXT;
4511 } else {
4512 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4513 }
4514 }
4515 SKIP_BLANKS;
4516 GROW;
4517 }
4518 if ((cur != NULL) && (last != NULL)) {
4519 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004520 if (last != NULL)
4521 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004522 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004523 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4524 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4525 if (ctxt->vctxt.error != NULL)
4526 ctxt->vctxt.error(ctxt->vctxt.userData,
4527"Element content declaration doesn't start and stop in the same entity\n");
4528 ctxt->valid = 0;
4529 }
Owen Taylor3473f882001-02-23 17:55:21 +00004530 NEXT;
4531 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004532 if (ret != NULL)
4533 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004534 NEXT;
4535 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004536 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004537 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004538 cur = ret;
4539 /*
4540 * Some normalization:
4541 * (a | b* | c?)* == (a | b | c)*
4542 */
4543 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4544 if ((cur->c1 != NULL) &&
4545 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4546 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4547 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4548 if ((cur->c2 != NULL) &&
4549 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4550 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4551 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4552 cur = cur->c2;
4553 }
4554 }
Owen Taylor3473f882001-02-23 17:55:21 +00004555 NEXT;
4556 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004557 if (ret != NULL) {
4558 int found = 0;
4559
Daniel Veillarde470df72001-04-18 21:41:07 +00004560 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004561 /*
4562 * Some normalization:
4563 * (a | b*)+ == (a | b)*
4564 * (a | b?)+ == (a | b)*
4565 */
4566 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4567 if ((cur->c1 != NULL) &&
4568 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4569 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4570 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4571 found = 1;
4572 }
4573 if ((cur->c2 != NULL) &&
4574 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4575 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4576 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4577 found = 1;
4578 }
4579 cur = cur->c2;
4580 }
4581 if (found)
4582 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4583 }
Owen Taylor3473f882001-02-23 17:55:21 +00004584 NEXT;
4585 }
4586 return(ret);
4587}
4588
4589/**
4590 * xmlParseElementContentDecl:
4591 * @ctxt: an XML parser context
4592 * @name: the name of the element being defined.
4593 * @result: the Element Content pointer will be stored here if any
4594 *
4595 * parse the declaration for an Element content either Mixed or Children,
4596 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4597 *
4598 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4599 *
4600 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4601 */
4602
4603int
4604xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4605 xmlElementContentPtr *result) {
4606
4607 xmlElementContentPtr tree = NULL;
4608 xmlParserInputPtr input = ctxt->input;
4609 int res;
4610
4611 *result = NULL;
4612
4613 if (RAW != '(') {
4614 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4616 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004617 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004618 ctxt->wellFormed = 0;
4619 ctxt->disableSAX = 1;
4620 return(-1);
4621 }
4622 NEXT;
4623 GROW;
4624 SKIP_BLANKS;
4625 if ((RAW == '#') && (NXT(1) == 'P') &&
4626 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4627 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4628 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004629 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004630 res = XML_ELEMENT_TYPE_MIXED;
4631 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004632 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004633 res = XML_ELEMENT_TYPE_ELEMENT;
4634 }
Owen Taylor3473f882001-02-23 17:55:21 +00004635 SKIP_BLANKS;
4636 *result = tree;
4637 return(res);
4638}
4639
4640/**
4641 * xmlParseElementDecl:
4642 * @ctxt: an XML parser context
4643 *
4644 * parse an Element declaration.
4645 *
4646 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4647 *
4648 * [ VC: Unique Element Type Declaration ]
4649 * No element type may be declared more than once
4650 *
4651 * Returns the type of the element, or -1 in case of error
4652 */
4653int
4654xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4655 xmlChar *name;
4656 int ret = -1;
4657 xmlElementContentPtr content = NULL;
4658
4659 GROW;
4660 if ((RAW == '<') && (NXT(1) == '!') &&
4661 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4662 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4663 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4664 (NXT(8) == 'T')) {
4665 xmlParserInputPtr input = ctxt->input;
4666
4667 SKIP(9);
4668 if (!IS_BLANK(CUR)) {
4669 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4671 ctxt->sax->error(ctxt->userData,
4672 "Space required after 'ELEMENT'\n");
4673 ctxt->wellFormed = 0;
4674 ctxt->disableSAX = 1;
4675 }
4676 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004677 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004678 if (name == NULL) {
4679 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4681 ctxt->sax->error(ctxt->userData,
4682 "xmlParseElementDecl: no name for Element\n");
4683 ctxt->wellFormed = 0;
4684 ctxt->disableSAX = 1;
4685 return(-1);
4686 }
4687 while ((RAW == 0) && (ctxt->inputNr > 1))
4688 xmlPopInput(ctxt);
4689 if (!IS_BLANK(CUR)) {
4690 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4692 ctxt->sax->error(ctxt->userData,
4693 "Space required after the element name\n");
4694 ctxt->wellFormed = 0;
4695 ctxt->disableSAX = 1;
4696 }
4697 SKIP_BLANKS;
4698 if ((RAW == 'E') && (NXT(1) == 'M') &&
4699 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4700 (NXT(4) == 'Y')) {
4701 SKIP(5);
4702 /*
4703 * Element must always be empty.
4704 */
4705 ret = XML_ELEMENT_TYPE_EMPTY;
4706 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4707 (NXT(2) == 'Y')) {
4708 SKIP(3);
4709 /*
4710 * Element is a generic container.
4711 */
4712 ret = XML_ELEMENT_TYPE_ANY;
4713 } else if (RAW == '(') {
4714 ret = xmlParseElementContentDecl(ctxt, name, &content);
4715 } else {
4716 /*
4717 * [ WFC: PEs in Internal Subset ] error handling.
4718 */
4719 if ((RAW == '%') && (ctxt->external == 0) &&
4720 (ctxt->inputNr == 1)) {
4721 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4723 ctxt->sax->error(ctxt->userData,
4724 "PEReference: forbidden within markup decl in internal subset\n");
4725 } else {
4726 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4728 ctxt->sax->error(ctxt->userData,
4729 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4730 }
4731 ctxt->wellFormed = 0;
4732 ctxt->disableSAX = 1;
4733 if (name != NULL) xmlFree(name);
4734 return(-1);
4735 }
4736
4737 SKIP_BLANKS;
4738 /*
4739 * Pop-up of finished entities.
4740 */
4741 while ((RAW == 0) && (ctxt->inputNr > 1))
4742 xmlPopInput(ctxt);
4743 SKIP_BLANKS;
4744
4745 if (RAW != '>') {
4746 ctxt->errNo = XML_ERR_GT_REQUIRED;
4747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4748 ctxt->sax->error(ctxt->userData,
4749 "xmlParseElementDecl: expected '>' at the end\n");
4750 ctxt->wellFormed = 0;
4751 ctxt->disableSAX = 1;
4752 } else {
4753 if (input != ctxt->input) {
4754 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4756 ctxt->sax->error(ctxt->userData,
4757"Element declaration doesn't start and stop in the same entity\n");
4758 ctxt->wellFormed = 0;
4759 ctxt->disableSAX = 1;
4760 }
4761
4762 NEXT;
4763 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4764 (ctxt->sax->elementDecl != NULL))
4765 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4766 content);
4767 }
4768 if (content != NULL) {
4769 xmlFreeElementContent(content);
4770 }
4771 if (name != NULL) {
4772 xmlFree(name);
4773 }
4774 }
4775 return(ret);
4776}
4777
4778/**
Owen Taylor3473f882001-02-23 17:55:21 +00004779 * xmlParseConditionalSections
4780 * @ctxt: an XML parser context
4781 *
4782 * [61] conditionalSect ::= includeSect | ignoreSect
4783 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4784 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4785 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4786 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4787 */
4788
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004789static void
Owen Taylor3473f882001-02-23 17:55:21 +00004790xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4791 SKIP(3);
4792 SKIP_BLANKS;
4793 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4794 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4795 (NXT(6) == 'E')) {
4796 SKIP(7);
4797 SKIP_BLANKS;
4798 if (RAW != '[') {
4799 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4801 ctxt->sax->error(ctxt->userData,
4802 "XML conditional section '[' expected\n");
4803 ctxt->wellFormed = 0;
4804 ctxt->disableSAX = 1;
4805 } else {
4806 NEXT;
4807 }
4808 if (xmlParserDebugEntities) {
4809 if ((ctxt->input != NULL) && (ctxt->input->filename))
4810 xmlGenericError(xmlGenericErrorContext,
4811 "%s(%d): ", ctxt->input->filename,
4812 ctxt->input->line);
4813 xmlGenericError(xmlGenericErrorContext,
4814 "Entering INCLUDE Conditional Section\n");
4815 }
4816
4817 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4818 (NXT(2) != '>'))) {
4819 const xmlChar *check = CUR_PTR;
4820 int cons = ctxt->input->consumed;
4821 int tok = ctxt->token;
4822
4823 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4824 xmlParseConditionalSections(ctxt);
4825 } else if (IS_BLANK(CUR)) {
4826 NEXT;
4827 } else if (RAW == '%') {
4828 xmlParsePEReference(ctxt);
4829 } else
4830 xmlParseMarkupDecl(ctxt);
4831
4832 /*
4833 * Pop-up of finished entities.
4834 */
4835 while ((RAW == 0) && (ctxt->inputNr > 1))
4836 xmlPopInput(ctxt);
4837
4838 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4839 (tok == ctxt->token)) {
4840 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4842 ctxt->sax->error(ctxt->userData,
4843 "Content error in the external subset\n");
4844 ctxt->wellFormed = 0;
4845 ctxt->disableSAX = 1;
4846 break;
4847 }
4848 }
4849 if (xmlParserDebugEntities) {
4850 if ((ctxt->input != NULL) && (ctxt->input->filename))
4851 xmlGenericError(xmlGenericErrorContext,
4852 "%s(%d): ", ctxt->input->filename,
4853 ctxt->input->line);
4854 xmlGenericError(xmlGenericErrorContext,
4855 "Leaving INCLUDE Conditional Section\n");
4856 }
4857
4858 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4859 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4860 int state;
4861 int instate;
4862 int depth = 0;
4863
4864 SKIP(6);
4865 SKIP_BLANKS;
4866 if (RAW != '[') {
4867 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4869 ctxt->sax->error(ctxt->userData,
4870 "XML conditional section '[' expected\n");
4871 ctxt->wellFormed = 0;
4872 ctxt->disableSAX = 1;
4873 } else {
4874 NEXT;
4875 }
4876 if (xmlParserDebugEntities) {
4877 if ((ctxt->input != NULL) && (ctxt->input->filename))
4878 xmlGenericError(xmlGenericErrorContext,
4879 "%s(%d): ", ctxt->input->filename,
4880 ctxt->input->line);
4881 xmlGenericError(xmlGenericErrorContext,
4882 "Entering IGNORE Conditional Section\n");
4883 }
4884
4885 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004886 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004887 * But disable SAX event generating DTD building in the meantime
4888 */
4889 state = ctxt->disableSAX;
4890 instate = ctxt->instate;
4891 ctxt->disableSAX = 1;
4892 ctxt->instate = XML_PARSER_IGNORE;
4893
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004894 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004895 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4896 depth++;
4897 SKIP(3);
4898 continue;
4899 }
4900 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4901 if (--depth >= 0) SKIP(3);
4902 continue;
4903 }
4904 NEXT;
4905 continue;
4906 }
4907
4908 ctxt->disableSAX = state;
4909 ctxt->instate = instate;
4910
4911 if (xmlParserDebugEntities) {
4912 if ((ctxt->input != NULL) && (ctxt->input->filename))
4913 xmlGenericError(xmlGenericErrorContext,
4914 "%s(%d): ", ctxt->input->filename,
4915 ctxt->input->line);
4916 xmlGenericError(xmlGenericErrorContext,
4917 "Leaving IGNORE Conditional Section\n");
4918 }
4919
4920 } else {
4921 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4923 ctxt->sax->error(ctxt->userData,
4924 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4925 ctxt->wellFormed = 0;
4926 ctxt->disableSAX = 1;
4927 }
4928
4929 if (RAW == 0)
4930 SHRINK;
4931
4932 if (RAW == 0) {
4933 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4935 ctxt->sax->error(ctxt->userData,
4936 "XML conditional section not closed\n");
4937 ctxt->wellFormed = 0;
4938 ctxt->disableSAX = 1;
4939 } else {
4940 SKIP(3);
4941 }
4942}
4943
4944/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004945 * xmlParseMarkupDecl:
4946 * @ctxt: an XML parser context
4947 *
4948 * parse Markup declarations
4949 *
4950 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4951 * NotationDecl | PI | Comment
4952 *
4953 * [ VC: Proper Declaration/PE Nesting ]
4954 * Parameter-entity replacement text must be properly nested with
4955 * markup declarations. That is to say, if either the first character
4956 * or the last character of a markup declaration (markupdecl above) is
4957 * contained in the replacement text for a parameter-entity reference,
4958 * both must be contained in the same replacement text.
4959 *
4960 * [ WFC: PEs in Internal Subset ]
4961 * In the internal DTD subset, parameter-entity references can occur
4962 * only where markup declarations can occur, not within markup declarations.
4963 * (This does not apply to references that occur in external parameter
4964 * entities or to the external subset.)
4965 */
4966void
4967xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4968 GROW;
4969 xmlParseElementDecl(ctxt);
4970 xmlParseAttributeListDecl(ctxt);
4971 xmlParseEntityDecl(ctxt);
4972 xmlParseNotationDecl(ctxt);
4973 xmlParsePI(ctxt);
4974 xmlParseComment(ctxt);
4975 /*
4976 * This is only for internal subset. On external entities,
4977 * the replacement is done before parsing stage
4978 */
4979 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4980 xmlParsePEReference(ctxt);
4981
4982 /*
4983 * Conditional sections are allowed from entities included
4984 * by PE References in the internal subset.
4985 */
4986 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4987 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4988 xmlParseConditionalSections(ctxt);
4989 }
4990 }
4991
4992 ctxt->instate = XML_PARSER_DTD;
4993}
4994
4995/**
4996 * xmlParseTextDecl:
4997 * @ctxt: an XML parser context
4998 *
4999 * parse an XML declaration header for external entities
5000 *
5001 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5002 *
5003 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5004 */
5005
5006void
5007xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5008 xmlChar *version;
5009
5010 /*
5011 * We know that '<?xml' is here.
5012 */
5013 if ((RAW == '<') && (NXT(1) == '?') &&
5014 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5015 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5016 SKIP(5);
5017 } else {
5018 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5020 ctxt->sax->error(ctxt->userData,
5021 "Text declaration '<?xml' required\n");
5022 ctxt->wellFormed = 0;
5023 ctxt->disableSAX = 1;
5024
5025 return;
5026 }
5027
5028 if (!IS_BLANK(CUR)) {
5029 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5031 ctxt->sax->error(ctxt->userData,
5032 "Space needed after '<?xml'\n");
5033 ctxt->wellFormed = 0;
5034 ctxt->disableSAX = 1;
5035 }
5036 SKIP_BLANKS;
5037
5038 /*
5039 * We may have the VersionInfo here.
5040 */
5041 version = xmlParseVersionInfo(ctxt);
5042 if (version == NULL)
5043 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005044 else {
5045 if (!IS_BLANK(CUR)) {
5046 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5048 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5049 ctxt->wellFormed = 0;
5050 ctxt->disableSAX = 1;
5051 }
5052 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005053 ctxt->input->version = version;
5054
5055 /*
5056 * We must have the encoding declaration
5057 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005058 xmlParseEncodingDecl(ctxt);
5059 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5060 /*
5061 * The XML REC instructs us to stop parsing right here
5062 */
5063 return;
5064 }
5065
5066 SKIP_BLANKS;
5067 if ((RAW == '?') && (NXT(1) == '>')) {
5068 SKIP(2);
5069 } else if (RAW == '>') {
5070 /* Deprecated old WD ... */
5071 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5073 ctxt->sax->error(ctxt->userData,
5074 "XML declaration must end-up with '?>'\n");
5075 ctxt->wellFormed = 0;
5076 ctxt->disableSAX = 1;
5077 NEXT;
5078 } else {
5079 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5081 ctxt->sax->error(ctxt->userData,
5082 "parsing XML declaration: '?>' expected\n");
5083 ctxt->wellFormed = 0;
5084 ctxt->disableSAX = 1;
5085 MOVETO_ENDTAG(CUR_PTR);
5086 NEXT;
5087 }
5088}
5089
5090/**
Owen Taylor3473f882001-02-23 17:55:21 +00005091 * xmlParseExternalSubset:
5092 * @ctxt: an XML parser context
5093 * @ExternalID: the external identifier
5094 * @SystemID: the system identifier (or URL)
5095 *
5096 * parse Markup declarations from an external subset
5097 *
5098 * [30] extSubset ::= textDecl? extSubsetDecl
5099 *
5100 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5101 */
5102void
5103xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5104 const xmlChar *SystemID) {
5105 GROW;
5106 if ((RAW == '<') && (NXT(1) == '?') &&
5107 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5108 (NXT(4) == 'l')) {
5109 xmlParseTextDecl(ctxt);
5110 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5111 /*
5112 * The XML REC instructs us to stop parsing right here
5113 */
5114 ctxt->instate = XML_PARSER_EOF;
5115 return;
5116 }
5117 }
5118 if (ctxt->myDoc == NULL) {
5119 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5120 }
5121 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5122 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5123
5124 ctxt->instate = XML_PARSER_DTD;
5125 ctxt->external = 1;
5126 while (((RAW == '<') && (NXT(1) == '?')) ||
5127 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005128 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005129 const xmlChar *check = CUR_PTR;
5130 int cons = ctxt->input->consumed;
5131 int tok = ctxt->token;
5132
5133 GROW;
5134 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5135 xmlParseConditionalSections(ctxt);
5136 } else if (IS_BLANK(CUR)) {
5137 NEXT;
5138 } else if (RAW == '%') {
5139 xmlParsePEReference(ctxt);
5140 } else
5141 xmlParseMarkupDecl(ctxt);
5142
5143 /*
5144 * Pop-up of finished entities.
5145 */
5146 while ((RAW == 0) && (ctxt->inputNr > 1))
5147 xmlPopInput(ctxt);
5148
5149 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5150 (tok == ctxt->token)) {
5151 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5153 ctxt->sax->error(ctxt->userData,
5154 "Content error in the external subset\n");
5155 ctxt->wellFormed = 0;
5156 ctxt->disableSAX = 1;
5157 break;
5158 }
5159 }
5160
5161 if (RAW != 0) {
5162 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5163 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5164 ctxt->sax->error(ctxt->userData,
5165 "Extra content at the end of the document\n");
5166 ctxt->wellFormed = 0;
5167 ctxt->disableSAX = 1;
5168 }
5169
5170}
5171
5172/**
5173 * xmlParseReference:
5174 * @ctxt: an XML parser context
5175 *
5176 * parse and handle entity references in content, depending on the SAX
5177 * interface, this may end-up in a call to character() if this is a
5178 * CharRef, a predefined entity, if there is no reference() callback.
5179 * or if the parser was asked to switch to that mode.
5180 *
5181 * [67] Reference ::= EntityRef | CharRef
5182 */
5183void
5184xmlParseReference(xmlParserCtxtPtr ctxt) {
5185 xmlEntityPtr ent;
5186 xmlChar *val;
5187 if (RAW != '&') return;
5188
5189 if (NXT(1) == '#') {
5190 int i = 0;
5191 xmlChar out[10];
5192 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005193 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005194
5195 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5196 /*
5197 * So we are using non-UTF-8 buffers
5198 * Check that the char fit on 8bits, if not
5199 * generate a CharRef.
5200 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005201 if (value <= 0xFF) {
5202 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005203 out[1] = 0;
5204 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5205 (!ctxt->disableSAX))
5206 ctxt->sax->characters(ctxt->userData, out, 1);
5207 } else {
5208 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005209 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005210 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005211 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005212 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5213 (!ctxt->disableSAX))
5214 ctxt->sax->reference(ctxt->userData, out);
5215 }
5216 } else {
5217 /*
5218 * Just encode the value in UTF-8
5219 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005220 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005221 out[i] = 0;
5222 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5223 (!ctxt->disableSAX))
5224 ctxt->sax->characters(ctxt->userData, out, i);
5225 }
5226 } else {
5227 ent = xmlParseEntityRef(ctxt);
5228 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005229 if (!ctxt->wellFormed)
5230 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005231 if ((ent->name != NULL) &&
5232 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5233 xmlNodePtr list = NULL;
5234 int ret;
5235
5236
5237 /*
5238 * The first reference to the entity trigger a parsing phase
5239 * where the ent->children is filled with the result from
5240 * the parsing.
5241 */
5242 if (ent->children == NULL) {
5243 xmlChar *value;
5244 value = ent->content;
5245
5246 /*
5247 * Check that this entity is well formed
5248 */
5249 if ((value != NULL) &&
5250 (value[1] == 0) && (value[0] == '<') &&
5251 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5252 /*
5253 * DONE: get definite answer on this !!!
5254 * Lots of entity decls are used to declare a single
5255 * char
5256 * <!ENTITY lt "<">
5257 * Which seems to be valid since
5258 * 2.4: The ampersand character (&) and the left angle
5259 * bracket (<) may appear in their literal form only
5260 * when used ... They are also legal within the literal
5261 * entity value of an internal entity declaration;i
5262 * see "4.3.2 Well-Formed Parsed Entities".
5263 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5264 * Looking at the OASIS test suite and James Clark
5265 * tests, this is broken. However the XML REC uses
5266 * it. Is the XML REC not well-formed ????
5267 * This is a hack to avoid this problem
5268 *
5269 * ANSWER: since lt gt amp .. are already defined,
5270 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005271 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005272 * is lousy but acceptable.
5273 */
5274 list = xmlNewDocText(ctxt->myDoc, value);
5275 if (list != NULL) {
5276 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5277 (ent->children == NULL)) {
5278 ent->children = list;
5279 ent->last = list;
5280 list->parent = (xmlNodePtr) ent;
5281 } else {
5282 xmlFreeNodeList(list);
5283 }
5284 } else if (list != NULL) {
5285 xmlFreeNodeList(list);
5286 }
5287 } else {
5288 /*
5289 * 4.3.2: An internal general parsed entity is well-formed
5290 * if its replacement text matches the production labeled
5291 * content.
5292 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005293
5294 void *user_data;
5295 /*
5296 * This is a bit hackish but this seems the best
5297 * way to make sure both SAX and DOM entity support
5298 * behaves okay.
5299 */
5300 if (ctxt->userData == ctxt)
5301 user_data = NULL;
5302 else
5303 user_data = ctxt->userData;
5304
Owen Taylor3473f882001-02-23 17:55:21 +00005305 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5306 ctxt->depth++;
5307 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005308 ctxt->sax, user_data, ctxt->depth,
Owen Taylor3473f882001-02-23 17:55:21 +00005309 value, &list);
5310 ctxt->depth--;
5311 } else if (ent->etype ==
5312 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5313 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005314 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005315 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005316 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005317 ctxt->depth--;
5318 } else {
5319 ret = -1;
5320 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5321 ctxt->sax->error(ctxt->userData,
5322 "Internal: invalid entity type\n");
5323 }
5324 if (ret == XML_ERR_ENTITY_LOOP) {
5325 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5327 ctxt->sax->error(ctxt->userData,
5328 "Detected entity reference loop\n");
5329 ctxt->wellFormed = 0;
5330 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005331 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005332 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005333 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5334 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005335 (ent->children == NULL)) {
5336 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005337 if (ctxt->replaceEntities) {
5338 /*
5339 * Prune it directly in the generated document
5340 * except for single text nodes.
5341 */
5342 if ((list->type == XML_TEXT_NODE) &&
5343 (list->next == NULL)) {
5344 list->parent = (xmlNodePtr) ent;
5345 list = NULL;
5346 } else {
5347 while (list != NULL) {
5348 list->parent = (xmlNodePtr) ctxt->node;
5349 if (list->next == NULL)
5350 ent->last = list;
5351 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005352 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005353 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005354 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5355 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005356 }
5357 } else {
5358 while (list != NULL) {
5359 list->parent = (xmlNodePtr) ent;
5360 if (list->next == NULL)
5361 ent->last = list;
5362 list = list->next;
5363 }
Owen Taylor3473f882001-02-23 17:55:21 +00005364 }
5365 } else {
5366 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005367 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005368 }
5369 } else if (ret > 0) {
5370 ctxt->errNo = ret;
5371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5372 ctxt->sax->error(ctxt->userData,
5373 "Entity value required\n");
5374 ctxt->wellFormed = 0;
5375 ctxt->disableSAX = 1;
5376 } else if (list != NULL) {
5377 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005378 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005379 }
5380 }
5381 }
5382 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5383 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5384 /*
5385 * Create a node.
5386 */
5387 ctxt->sax->reference(ctxt->userData, ent->name);
5388 return;
5389 } else if (ctxt->replaceEntities) {
5390 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5391 /*
5392 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005393 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005394 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005395 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005396 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005397 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005398 cur = ent->children;
5399 while (cur != NULL) {
5400 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005401 if (firstChild == NULL){
5402 firstChild = new;
5403 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005404 xmlAddChild(ctxt->node, new);
5405 if (cur == ent->last)
5406 break;
5407 cur = cur->next;
5408 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005409 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5410 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005411 } else {
5412 /*
5413 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005414 * node with a possible previous text one which
5415 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005416 */
5417 if (ent->children->type == XML_TEXT_NODE)
5418 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5419 if ((ent->last != ent->children) &&
5420 (ent->last->type == XML_TEXT_NODE))
5421 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5422 xmlAddChildList(ctxt->node, ent->children);
5423 }
5424
Owen Taylor3473f882001-02-23 17:55:21 +00005425 /*
5426 * This is to avoid a nasty side effect, see
5427 * characters() in SAX.c
5428 */
5429 ctxt->nodemem = 0;
5430 ctxt->nodelen = 0;
5431 return;
5432 } else {
5433 /*
5434 * Probably running in SAX mode
5435 */
5436 xmlParserInputPtr input;
5437
5438 input = xmlNewEntityInputStream(ctxt, ent);
5439 xmlPushInput(ctxt, input);
5440 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5441 (RAW == '<') && (NXT(1) == '?') &&
5442 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5443 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5444 xmlParseTextDecl(ctxt);
5445 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5446 /*
5447 * The XML REC instructs us to stop parsing right here
5448 */
5449 ctxt->instate = XML_PARSER_EOF;
5450 return;
5451 }
5452 if (input->standalone == 1) {
5453 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5455 ctxt->sax->error(ctxt->userData,
5456 "external parsed entities cannot be standalone\n");
5457 ctxt->wellFormed = 0;
5458 ctxt->disableSAX = 1;
5459 }
5460 }
5461 return;
5462 }
5463 }
5464 } else {
5465 val = ent->content;
5466 if (val == NULL) return;
5467 /*
5468 * inline the entity.
5469 */
5470 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5471 (!ctxt->disableSAX))
5472 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5473 }
5474 }
5475}
5476
5477/**
5478 * xmlParseEntityRef:
5479 * @ctxt: an XML parser context
5480 *
5481 * parse ENTITY references declarations
5482 *
5483 * [68] EntityRef ::= '&' Name ';'
5484 *
5485 * [ WFC: Entity Declared ]
5486 * In a document without any DTD, a document with only an internal DTD
5487 * subset which contains no parameter entity references, or a document
5488 * with "standalone='yes'", the Name given in the entity reference
5489 * must match that in an entity declaration, except that well-formed
5490 * documents need not declare any of the following entities: amp, lt,
5491 * gt, apos, quot. The declaration of a parameter entity must precede
5492 * any reference to it. Similarly, the declaration of a general entity
5493 * must precede any reference to it which appears in a default value in an
5494 * attribute-list declaration. Note that if entities are declared in the
5495 * external subset or in external parameter entities, a non-validating
5496 * processor is not obligated to read and process their declarations;
5497 * for such documents, the rule that an entity must be declared is a
5498 * well-formedness constraint only if standalone='yes'.
5499 *
5500 * [ WFC: Parsed Entity ]
5501 * An entity reference must not contain the name of an unparsed entity
5502 *
5503 * Returns the xmlEntityPtr if found, or NULL otherwise.
5504 */
5505xmlEntityPtr
5506xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5507 xmlChar *name;
5508 xmlEntityPtr ent = NULL;
5509
5510 GROW;
5511
5512 if (RAW == '&') {
5513 NEXT;
5514 name = xmlParseName(ctxt);
5515 if (name == NULL) {
5516 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5518 ctxt->sax->error(ctxt->userData,
5519 "xmlParseEntityRef: no name\n");
5520 ctxt->wellFormed = 0;
5521 ctxt->disableSAX = 1;
5522 } else {
5523 if (RAW == ';') {
5524 NEXT;
5525 /*
5526 * Ask first SAX for entity resolution, otherwise try the
5527 * predefined set.
5528 */
5529 if (ctxt->sax != NULL) {
5530 if (ctxt->sax->getEntity != NULL)
5531 ent = ctxt->sax->getEntity(ctxt->userData, name);
5532 if (ent == NULL)
5533 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005534 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5535 ent = getEntity(ctxt, name);
5536 }
Owen Taylor3473f882001-02-23 17:55:21 +00005537 }
5538 /*
5539 * [ WFC: Entity Declared ]
5540 * In a document without any DTD, a document with only an
5541 * internal DTD subset which contains no parameter entity
5542 * references, or a document with "standalone='yes'", the
5543 * Name given in the entity reference must match that in an
5544 * entity declaration, except that well-formed documents
5545 * need not declare any of the following entities: amp, lt,
5546 * gt, apos, quot.
5547 * The declaration of a parameter entity must precede any
5548 * reference to it.
5549 * Similarly, the declaration of a general entity must
5550 * precede any reference to it which appears in a default
5551 * value in an attribute-list declaration. Note that if
5552 * entities are declared in the external subset or in
5553 * external parameter entities, a non-validating processor
5554 * is not obligated to read and process their declarations;
5555 * for such documents, the rule that an entity must be
5556 * declared is a well-formedness constraint only if
5557 * standalone='yes'.
5558 */
5559 if (ent == NULL) {
5560 if ((ctxt->standalone == 1) ||
5561 ((ctxt->hasExternalSubset == 0) &&
5562 (ctxt->hasPErefs == 0))) {
5563 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5565 ctxt->sax->error(ctxt->userData,
5566 "Entity '%s' not defined\n", name);
5567 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005568 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005569 ctxt->disableSAX = 1;
5570 } else {
5571 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005573 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005574 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005575 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005576 }
5577 }
5578
5579 /*
5580 * [ WFC: Parsed Entity ]
5581 * An entity reference must not contain the name of an
5582 * unparsed entity
5583 */
5584 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5585 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5587 ctxt->sax->error(ctxt->userData,
5588 "Entity reference to unparsed entity %s\n", name);
5589 ctxt->wellFormed = 0;
5590 ctxt->disableSAX = 1;
5591 }
5592
5593 /*
5594 * [ WFC: No External Entity References ]
5595 * Attribute values cannot contain direct or indirect
5596 * entity references to external entities.
5597 */
5598 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5599 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5600 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5602 ctxt->sax->error(ctxt->userData,
5603 "Attribute references external entity '%s'\n", name);
5604 ctxt->wellFormed = 0;
5605 ctxt->disableSAX = 1;
5606 }
5607 /*
5608 * [ WFC: No < in Attribute Values ]
5609 * The replacement text of any entity referred to directly or
5610 * indirectly in an attribute value (other than "&lt;") must
5611 * not contain a <.
5612 */
5613 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5614 (ent != NULL) &&
5615 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5616 (ent->content != NULL) &&
5617 (xmlStrchr(ent->content, '<'))) {
5618 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5620 ctxt->sax->error(ctxt->userData,
5621 "'<' in entity '%s' is not allowed in attributes values\n", name);
5622 ctxt->wellFormed = 0;
5623 ctxt->disableSAX = 1;
5624 }
5625
5626 /*
5627 * Internal check, no parameter entities here ...
5628 */
5629 else {
5630 switch (ent->etype) {
5631 case XML_INTERNAL_PARAMETER_ENTITY:
5632 case XML_EXTERNAL_PARAMETER_ENTITY:
5633 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5635 ctxt->sax->error(ctxt->userData,
5636 "Attempt to reference the parameter entity '%s'\n", name);
5637 ctxt->wellFormed = 0;
5638 ctxt->disableSAX = 1;
5639 break;
5640 default:
5641 break;
5642 }
5643 }
5644
5645 /*
5646 * [ WFC: No Recursion ]
5647 * A parsed entity must not contain a recursive reference
5648 * to itself, either directly or indirectly.
5649 * Done somewhere else
5650 */
5651
5652 } else {
5653 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5655 ctxt->sax->error(ctxt->userData,
5656 "xmlParseEntityRef: expecting ';'\n");
5657 ctxt->wellFormed = 0;
5658 ctxt->disableSAX = 1;
5659 }
5660 xmlFree(name);
5661 }
5662 }
5663 return(ent);
5664}
5665
5666/**
5667 * xmlParseStringEntityRef:
5668 * @ctxt: an XML parser context
5669 * @str: a pointer to an index in the string
5670 *
5671 * parse ENTITY references declarations, but this version parses it from
5672 * a string value.
5673 *
5674 * [68] EntityRef ::= '&' Name ';'
5675 *
5676 * [ WFC: Entity Declared ]
5677 * In a document without any DTD, a document with only an internal DTD
5678 * subset which contains no parameter entity references, or a document
5679 * with "standalone='yes'", the Name given in the entity reference
5680 * must match that in an entity declaration, except that well-formed
5681 * documents need not declare any of the following entities: amp, lt,
5682 * gt, apos, quot. The declaration of a parameter entity must precede
5683 * any reference to it. Similarly, the declaration of a general entity
5684 * must precede any reference to it which appears in a default value in an
5685 * attribute-list declaration. Note that if entities are declared in the
5686 * external subset or in external parameter entities, a non-validating
5687 * processor is not obligated to read and process their declarations;
5688 * for such documents, the rule that an entity must be declared is a
5689 * well-formedness constraint only if standalone='yes'.
5690 *
5691 * [ WFC: Parsed Entity ]
5692 * An entity reference must not contain the name of an unparsed entity
5693 *
5694 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5695 * is updated to the current location in the string.
5696 */
5697xmlEntityPtr
5698xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5699 xmlChar *name;
5700 const xmlChar *ptr;
5701 xmlChar cur;
5702 xmlEntityPtr ent = NULL;
5703
5704 if ((str == NULL) || (*str == NULL))
5705 return(NULL);
5706 ptr = *str;
5707 cur = *ptr;
5708 if (cur == '&') {
5709 ptr++;
5710 cur = *ptr;
5711 name = xmlParseStringName(ctxt, &ptr);
5712 if (name == NULL) {
5713 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5715 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005716 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005717 ctxt->wellFormed = 0;
5718 ctxt->disableSAX = 1;
5719 } else {
5720 if (*ptr == ';') {
5721 ptr++;
5722 /*
5723 * Ask first SAX for entity resolution, otherwise try the
5724 * predefined set.
5725 */
5726 if (ctxt->sax != NULL) {
5727 if (ctxt->sax->getEntity != NULL)
5728 ent = ctxt->sax->getEntity(ctxt->userData, name);
5729 if (ent == NULL)
5730 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005731 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5732 ent = getEntity(ctxt, name);
5733 }
Owen Taylor3473f882001-02-23 17:55:21 +00005734 }
5735 /*
5736 * [ WFC: Entity Declared ]
5737 * In a document without any DTD, a document with only an
5738 * internal DTD subset which contains no parameter entity
5739 * references, or a document with "standalone='yes'", the
5740 * Name given in the entity reference must match that in an
5741 * entity declaration, except that well-formed documents
5742 * need not declare any of the following entities: amp, lt,
5743 * gt, apos, quot.
5744 * The declaration of a parameter entity must precede any
5745 * reference to it.
5746 * Similarly, the declaration of a general entity must
5747 * precede any reference to it which appears in a default
5748 * value in an attribute-list declaration. Note that if
5749 * entities are declared in the external subset or in
5750 * external parameter entities, a non-validating processor
5751 * is not obligated to read and process their declarations;
5752 * for such documents, the rule that an entity must be
5753 * declared is a well-formedness constraint only if
5754 * standalone='yes'.
5755 */
5756 if (ent == NULL) {
5757 if ((ctxt->standalone == 1) ||
5758 ((ctxt->hasExternalSubset == 0) &&
5759 (ctxt->hasPErefs == 0))) {
5760 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5762 ctxt->sax->error(ctxt->userData,
5763 "Entity '%s' not defined\n", name);
5764 ctxt->wellFormed = 0;
5765 ctxt->disableSAX = 1;
5766 } else {
5767 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5768 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5769 ctxt->sax->warning(ctxt->userData,
5770 "Entity '%s' not defined\n", name);
5771 }
5772 }
5773
5774 /*
5775 * [ WFC: Parsed Entity ]
5776 * An entity reference must not contain the name of an
5777 * unparsed entity
5778 */
5779 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5780 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5782 ctxt->sax->error(ctxt->userData,
5783 "Entity reference to unparsed entity %s\n", name);
5784 ctxt->wellFormed = 0;
5785 ctxt->disableSAX = 1;
5786 }
5787
5788 /*
5789 * [ WFC: No External Entity References ]
5790 * Attribute values cannot contain direct or indirect
5791 * entity references to external entities.
5792 */
5793 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5794 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5795 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5797 ctxt->sax->error(ctxt->userData,
5798 "Attribute references external entity '%s'\n", name);
5799 ctxt->wellFormed = 0;
5800 ctxt->disableSAX = 1;
5801 }
5802 /*
5803 * [ WFC: No < in Attribute Values ]
5804 * The replacement text of any entity referred to directly or
5805 * indirectly in an attribute value (other than "&lt;") must
5806 * not contain a <.
5807 */
5808 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5809 (ent != NULL) &&
5810 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5811 (ent->content != NULL) &&
5812 (xmlStrchr(ent->content, '<'))) {
5813 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5815 ctxt->sax->error(ctxt->userData,
5816 "'<' in entity '%s' is not allowed in attributes values\n", name);
5817 ctxt->wellFormed = 0;
5818 ctxt->disableSAX = 1;
5819 }
5820
5821 /*
5822 * Internal check, no parameter entities here ...
5823 */
5824 else {
5825 switch (ent->etype) {
5826 case XML_INTERNAL_PARAMETER_ENTITY:
5827 case XML_EXTERNAL_PARAMETER_ENTITY:
5828 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5829 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5830 ctxt->sax->error(ctxt->userData,
5831 "Attempt to reference the parameter entity '%s'\n", name);
5832 ctxt->wellFormed = 0;
5833 ctxt->disableSAX = 1;
5834 break;
5835 default:
5836 break;
5837 }
5838 }
5839
5840 /*
5841 * [ WFC: No Recursion ]
5842 * A parsed entity must not contain a recursive reference
5843 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005844 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005845 */
5846
5847 } else {
5848 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5849 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5850 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005851 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005852 ctxt->wellFormed = 0;
5853 ctxt->disableSAX = 1;
5854 }
5855 xmlFree(name);
5856 }
5857 }
5858 *str = ptr;
5859 return(ent);
5860}
5861
5862/**
5863 * xmlParsePEReference:
5864 * @ctxt: an XML parser context
5865 *
5866 * parse PEReference declarations
5867 * The entity content is handled directly by pushing it's content as
5868 * a new input stream.
5869 *
5870 * [69] PEReference ::= '%' Name ';'
5871 *
5872 * [ WFC: No Recursion ]
5873 * A parsed entity must not contain a recursive
5874 * reference to itself, either directly or indirectly.
5875 *
5876 * [ WFC: Entity Declared ]
5877 * In a document without any DTD, a document with only an internal DTD
5878 * subset which contains no parameter entity references, or a document
5879 * with "standalone='yes'", ... ... The declaration of a parameter
5880 * entity must precede any reference to it...
5881 *
5882 * [ VC: Entity Declared ]
5883 * In a document with an external subset or external parameter entities
5884 * with "standalone='no'", ... ... The declaration of a parameter entity
5885 * must precede any reference to it...
5886 *
5887 * [ WFC: In DTD ]
5888 * Parameter-entity references may only appear in the DTD.
5889 * NOTE: misleading but this is handled.
5890 */
5891void
5892xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5893 xmlChar *name;
5894 xmlEntityPtr entity = NULL;
5895 xmlParserInputPtr input;
5896
5897 if (RAW == '%') {
5898 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005899 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005900 if (name == NULL) {
5901 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5903 ctxt->sax->error(ctxt->userData,
5904 "xmlParsePEReference: no name\n");
5905 ctxt->wellFormed = 0;
5906 ctxt->disableSAX = 1;
5907 } else {
5908 if (RAW == ';') {
5909 NEXT;
5910 if ((ctxt->sax != NULL) &&
5911 (ctxt->sax->getParameterEntity != NULL))
5912 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5913 name);
5914 if (entity == NULL) {
5915 /*
5916 * [ WFC: Entity Declared ]
5917 * In a document without any DTD, a document with only an
5918 * internal DTD subset which contains no parameter entity
5919 * references, or a document with "standalone='yes'", ...
5920 * ... The declaration of a parameter entity must precede
5921 * any reference to it...
5922 */
5923 if ((ctxt->standalone == 1) ||
5924 ((ctxt->hasExternalSubset == 0) &&
5925 (ctxt->hasPErefs == 0))) {
5926 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5927 if ((!ctxt->disableSAX) &&
5928 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5929 ctxt->sax->error(ctxt->userData,
5930 "PEReference: %%%s; not found\n", name);
5931 ctxt->wellFormed = 0;
5932 ctxt->disableSAX = 1;
5933 } else {
5934 /*
5935 * [ VC: Entity Declared ]
5936 * In a document with an external subset or external
5937 * parameter entities with "standalone='no'", ...
5938 * ... The declaration of a parameter entity must precede
5939 * any reference to it...
5940 */
5941 if ((!ctxt->disableSAX) &&
5942 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5943 ctxt->sax->warning(ctxt->userData,
5944 "PEReference: %%%s; not found\n", name);
5945 ctxt->valid = 0;
5946 }
5947 } else {
5948 /*
5949 * Internal checking in case the entity quest barfed
5950 */
5951 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5952 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5953 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5954 ctxt->sax->warning(ctxt->userData,
5955 "Internal: %%%s; is not a parameter entity\n", name);
5956 } else {
5957 /*
5958 * TODO !!!
5959 * handle the extra spaces added before and after
5960 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5961 */
5962 input = xmlNewEntityInputStream(ctxt, entity);
5963 xmlPushInput(ctxt, input);
5964 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5965 (RAW == '<') && (NXT(1) == '?') &&
5966 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5967 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5968 xmlParseTextDecl(ctxt);
5969 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5970 /*
5971 * The XML REC instructs us to stop parsing
5972 * right here
5973 */
5974 ctxt->instate = XML_PARSER_EOF;
5975 xmlFree(name);
5976 return;
5977 }
5978 }
5979 if (ctxt->token == 0)
5980 ctxt->token = ' ';
5981 }
5982 }
5983 ctxt->hasPErefs = 1;
5984 } else {
5985 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5987 ctxt->sax->error(ctxt->userData,
5988 "xmlParsePEReference: expecting ';'\n");
5989 ctxt->wellFormed = 0;
5990 ctxt->disableSAX = 1;
5991 }
5992 xmlFree(name);
5993 }
5994 }
5995}
5996
5997/**
5998 * xmlParseStringPEReference:
5999 * @ctxt: an XML parser context
6000 * @str: a pointer to an index in the string
6001 *
6002 * parse PEReference declarations
6003 *
6004 * [69] PEReference ::= '%' Name ';'
6005 *
6006 * [ WFC: No Recursion ]
6007 * A parsed entity must not contain a recursive
6008 * reference to itself, either directly or indirectly.
6009 *
6010 * [ WFC: Entity Declared ]
6011 * In a document without any DTD, a document with only an internal DTD
6012 * subset which contains no parameter entity references, or a document
6013 * with "standalone='yes'", ... ... The declaration of a parameter
6014 * entity must precede any reference to it...
6015 *
6016 * [ VC: Entity Declared ]
6017 * In a document with an external subset or external parameter entities
6018 * with "standalone='no'", ... ... The declaration of a parameter entity
6019 * must precede any reference to it...
6020 *
6021 * [ WFC: In DTD ]
6022 * Parameter-entity references may only appear in the DTD.
6023 * NOTE: misleading but this is handled.
6024 *
6025 * Returns the string of the entity content.
6026 * str is updated to the current value of the index
6027 */
6028xmlEntityPtr
6029xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6030 const xmlChar *ptr;
6031 xmlChar cur;
6032 xmlChar *name;
6033 xmlEntityPtr entity = NULL;
6034
6035 if ((str == NULL) || (*str == NULL)) return(NULL);
6036 ptr = *str;
6037 cur = *ptr;
6038 if (cur == '%') {
6039 ptr++;
6040 cur = *ptr;
6041 name = xmlParseStringName(ctxt, &ptr);
6042 if (name == NULL) {
6043 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6045 ctxt->sax->error(ctxt->userData,
6046 "xmlParseStringPEReference: no name\n");
6047 ctxt->wellFormed = 0;
6048 ctxt->disableSAX = 1;
6049 } else {
6050 cur = *ptr;
6051 if (cur == ';') {
6052 ptr++;
6053 cur = *ptr;
6054 if ((ctxt->sax != NULL) &&
6055 (ctxt->sax->getParameterEntity != NULL))
6056 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6057 name);
6058 if (entity == NULL) {
6059 /*
6060 * [ WFC: Entity Declared ]
6061 * In a document without any DTD, a document with only an
6062 * internal DTD subset which contains no parameter entity
6063 * references, or a document with "standalone='yes'", ...
6064 * ... The declaration of a parameter entity must precede
6065 * any reference to it...
6066 */
6067 if ((ctxt->standalone == 1) ||
6068 ((ctxt->hasExternalSubset == 0) &&
6069 (ctxt->hasPErefs == 0))) {
6070 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6072 ctxt->sax->error(ctxt->userData,
6073 "PEReference: %%%s; not found\n", name);
6074 ctxt->wellFormed = 0;
6075 ctxt->disableSAX = 1;
6076 } else {
6077 /*
6078 * [ VC: Entity Declared ]
6079 * In a document with an external subset or external
6080 * parameter entities with "standalone='no'", ...
6081 * ... The declaration of a parameter entity must
6082 * precede any reference to it...
6083 */
6084 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6085 ctxt->sax->warning(ctxt->userData,
6086 "PEReference: %%%s; not found\n", name);
6087 ctxt->valid = 0;
6088 }
6089 } else {
6090 /*
6091 * Internal checking in case the entity quest barfed
6092 */
6093 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6094 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6095 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6096 ctxt->sax->warning(ctxt->userData,
6097 "Internal: %%%s; is not a parameter entity\n", name);
6098 }
6099 }
6100 ctxt->hasPErefs = 1;
6101 } else {
6102 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6104 ctxt->sax->error(ctxt->userData,
6105 "xmlParseStringPEReference: expecting ';'\n");
6106 ctxt->wellFormed = 0;
6107 ctxt->disableSAX = 1;
6108 }
6109 xmlFree(name);
6110 }
6111 }
6112 *str = ptr;
6113 return(entity);
6114}
6115
6116/**
6117 * xmlParseDocTypeDecl:
6118 * @ctxt: an XML parser context
6119 *
6120 * parse a DOCTYPE declaration
6121 *
6122 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6123 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6124 *
6125 * [ VC: Root Element Type ]
6126 * The Name in the document type declaration must match the element
6127 * type of the root element.
6128 */
6129
6130void
6131xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6132 xmlChar *name = NULL;
6133 xmlChar *ExternalID = NULL;
6134 xmlChar *URI = NULL;
6135
6136 /*
6137 * We know that '<!DOCTYPE' has been detected.
6138 */
6139 SKIP(9);
6140
6141 SKIP_BLANKS;
6142
6143 /*
6144 * Parse the DOCTYPE name.
6145 */
6146 name = xmlParseName(ctxt);
6147 if (name == NULL) {
6148 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6150 ctxt->sax->error(ctxt->userData,
6151 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6152 ctxt->wellFormed = 0;
6153 ctxt->disableSAX = 1;
6154 }
6155 ctxt->intSubName = name;
6156
6157 SKIP_BLANKS;
6158
6159 /*
6160 * Check for SystemID and ExternalID
6161 */
6162 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6163
6164 if ((URI != NULL) || (ExternalID != NULL)) {
6165 ctxt->hasExternalSubset = 1;
6166 }
6167 ctxt->extSubURI = URI;
6168 ctxt->extSubSystem = ExternalID;
6169
6170 SKIP_BLANKS;
6171
6172 /*
6173 * Create and update the internal subset.
6174 */
6175 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6176 (!ctxt->disableSAX))
6177 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6178
6179 /*
6180 * Is there any internal subset declarations ?
6181 * they are handled separately in xmlParseInternalSubset()
6182 */
6183 if (RAW == '[')
6184 return;
6185
6186 /*
6187 * We should be at the end of the DOCTYPE declaration.
6188 */
6189 if (RAW != '>') {
6190 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006192 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006193 ctxt->wellFormed = 0;
6194 ctxt->disableSAX = 1;
6195 }
6196 NEXT;
6197}
6198
6199/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006200 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006201 * @ctxt: an XML parser context
6202 *
6203 * parse the internal subset declaration
6204 *
6205 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6206 */
6207
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006208static void
Owen Taylor3473f882001-02-23 17:55:21 +00006209xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6210 /*
6211 * Is there any DTD definition ?
6212 */
6213 if (RAW == '[') {
6214 ctxt->instate = XML_PARSER_DTD;
6215 NEXT;
6216 /*
6217 * Parse the succession of Markup declarations and
6218 * PEReferences.
6219 * Subsequence (markupdecl | PEReference | S)*
6220 */
6221 while (RAW != ']') {
6222 const xmlChar *check = CUR_PTR;
6223 int cons = ctxt->input->consumed;
6224
6225 SKIP_BLANKS;
6226 xmlParseMarkupDecl(ctxt);
6227 xmlParsePEReference(ctxt);
6228
6229 /*
6230 * Pop-up of finished entities.
6231 */
6232 while ((RAW == 0) && (ctxt->inputNr > 1))
6233 xmlPopInput(ctxt);
6234
6235 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6236 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6238 ctxt->sax->error(ctxt->userData,
6239 "xmlParseInternalSubset: error detected in Markup declaration\n");
6240 ctxt->wellFormed = 0;
6241 ctxt->disableSAX = 1;
6242 break;
6243 }
6244 }
6245 if (RAW == ']') {
6246 NEXT;
6247 SKIP_BLANKS;
6248 }
6249 }
6250
6251 /*
6252 * We should be at the end of the DOCTYPE declaration.
6253 */
6254 if (RAW != '>') {
6255 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006257 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006258 ctxt->wellFormed = 0;
6259 ctxt->disableSAX = 1;
6260 }
6261 NEXT;
6262}
6263
6264/**
6265 * xmlParseAttribute:
6266 * @ctxt: an XML parser context
6267 * @value: a xmlChar ** used to store the value of the attribute
6268 *
6269 * parse an attribute
6270 *
6271 * [41] Attribute ::= Name Eq AttValue
6272 *
6273 * [ WFC: No External Entity References ]
6274 * Attribute values cannot contain direct or indirect entity references
6275 * to external entities.
6276 *
6277 * [ WFC: No < in Attribute Values ]
6278 * The replacement text of any entity referred to directly or indirectly in
6279 * an attribute value (other than "&lt;") must not contain a <.
6280 *
6281 * [ VC: Attribute Value Type ]
6282 * The attribute must have been declared; the value must be of the type
6283 * declared for it.
6284 *
6285 * [25] Eq ::= S? '=' S?
6286 *
6287 * With namespace:
6288 *
6289 * [NS 11] Attribute ::= QName Eq AttValue
6290 *
6291 * Also the case QName == xmlns:??? is handled independently as a namespace
6292 * definition.
6293 *
6294 * Returns the attribute name, and the value in *value.
6295 */
6296
6297xmlChar *
6298xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6299 xmlChar *name, *val;
6300
6301 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006302 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006303 name = xmlParseName(ctxt);
6304 if (name == NULL) {
6305 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6307 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6308 ctxt->wellFormed = 0;
6309 ctxt->disableSAX = 1;
6310 return(NULL);
6311 }
6312
6313 /*
6314 * read the value
6315 */
6316 SKIP_BLANKS;
6317 if (RAW == '=') {
6318 NEXT;
6319 SKIP_BLANKS;
6320 val = xmlParseAttValue(ctxt);
6321 ctxt->instate = XML_PARSER_CONTENT;
6322 } else {
6323 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6325 ctxt->sax->error(ctxt->userData,
6326 "Specification mandate value for attribute %s\n", name);
6327 ctxt->wellFormed = 0;
6328 ctxt->disableSAX = 1;
6329 xmlFree(name);
6330 return(NULL);
6331 }
6332
6333 /*
6334 * Check that xml:lang conforms to the specification
6335 * No more registered as an error, just generate a warning now
6336 * since this was deprecated in XML second edition
6337 */
6338 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6339 if (!xmlCheckLanguageID(val)) {
6340 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6341 ctxt->sax->warning(ctxt->userData,
6342 "Malformed value for xml:lang : %s\n", val);
6343 }
6344 }
6345
6346 /*
6347 * Check that xml:space conforms to the specification
6348 */
6349 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6350 if (xmlStrEqual(val, BAD_CAST "default"))
6351 *(ctxt->space) = 0;
6352 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6353 *(ctxt->space) = 1;
6354 else {
6355 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6357 ctxt->sax->error(ctxt->userData,
6358"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6359 val);
6360 ctxt->wellFormed = 0;
6361 ctxt->disableSAX = 1;
6362 }
6363 }
6364
6365 *value = val;
6366 return(name);
6367}
6368
6369/**
6370 * xmlParseStartTag:
6371 * @ctxt: an XML parser context
6372 *
6373 * parse a start of tag either for rule element or
6374 * EmptyElement. In both case we don't parse the tag closing chars.
6375 *
6376 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6377 *
6378 * [ WFC: Unique Att Spec ]
6379 * No attribute name may appear more than once in the same start-tag or
6380 * empty-element tag.
6381 *
6382 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6383 *
6384 * [ WFC: Unique Att Spec ]
6385 * No attribute name may appear more than once in the same start-tag or
6386 * empty-element tag.
6387 *
6388 * With namespace:
6389 *
6390 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6391 *
6392 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6393 *
6394 * Returns the element name parsed
6395 */
6396
6397xmlChar *
6398xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6399 xmlChar *name;
6400 xmlChar *attname;
6401 xmlChar *attvalue;
6402 const xmlChar **atts = NULL;
6403 int nbatts = 0;
6404 int maxatts = 0;
6405 int i;
6406
6407 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006408 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006409
6410 name = xmlParseName(ctxt);
6411 if (name == NULL) {
6412 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6414 ctxt->sax->error(ctxt->userData,
6415 "xmlParseStartTag: invalid element name\n");
6416 ctxt->wellFormed = 0;
6417 ctxt->disableSAX = 1;
6418 return(NULL);
6419 }
6420
6421 /*
6422 * Now parse the attributes, it ends up with the ending
6423 *
6424 * (S Attribute)* S?
6425 */
6426 SKIP_BLANKS;
6427 GROW;
6428
Daniel Veillard21a0f912001-02-25 19:54:14 +00006429 while ((RAW != '>') &&
6430 ((RAW != '/') || (NXT(1) != '>')) &&
6431 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006432 const xmlChar *q = CUR_PTR;
6433 int cons = ctxt->input->consumed;
6434
6435 attname = xmlParseAttribute(ctxt, &attvalue);
6436 if ((attname != NULL) && (attvalue != NULL)) {
6437 /*
6438 * [ WFC: Unique Att Spec ]
6439 * No attribute name may appear more than once in the same
6440 * start-tag or empty-element tag.
6441 */
6442 for (i = 0; i < nbatts;i += 2) {
6443 if (xmlStrEqual(atts[i], attname)) {
6444 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6445 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6446 ctxt->sax->error(ctxt->userData,
6447 "Attribute %s redefined\n",
6448 attname);
6449 ctxt->wellFormed = 0;
6450 ctxt->disableSAX = 1;
6451 xmlFree(attname);
6452 xmlFree(attvalue);
6453 goto failed;
6454 }
6455 }
6456
6457 /*
6458 * Add the pair to atts
6459 */
6460 if (atts == NULL) {
6461 maxatts = 10;
6462 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6463 if (atts == NULL) {
6464 xmlGenericError(xmlGenericErrorContext,
6465 "malloc of %ld byte failed\n",
6466 maxatts * (long)sizeof(xmlChar *));
6467 return(NULL);
6468 }
6469 } else if (nbatts + 4 > maxatts) {
6470 maxatts *= 2;
6471 atts = (const xmlChar **) xmlRealloc((void *) atts,
6472 maxatts * sizeof(xmlChar *));
6473 if (atts == NULL) {
6474 xmlGenericError(xmlGenericErrorContext,
6475 "realloc of %ld byte failed\n",
6476 maxatts * (long)sizeof(xmlChar *));
6477 return(NULL);
6478 }
6479 }
6480 atts[nbatts++] = attname;
6481 atts[nbatts++] = attvalue;
6482 atts[nbatts] = NULL;
6483 atts[nbatts + 1] = NULL;
6484 } else {
6485 if (attname != NULL)
6486 xmlFree(attname);
6487 if (attvalue != NULL)
6488 xmlFree(attvalue);
6489 }
6490
6491failed:
6492
6493 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6494 break;
6495 if (!IS_BLANK(RAW)) {
6496 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6498 ctxt->sax->error(ctxt->userData,
6499 "attributes construct error\n");
6500 ctxt->wellFormed = 0;
6501 ctxt->disableSAX = 1;
6502 }
6503 SKIP_BLANKS;
6504 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6505 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6507 ctxt->sax->error(ctxt->userData,
6508 "xmlParseStartTag: problem parsing attributes\n");
6509 ctxt->wellFormed = 0;
6510 ctxt->disableSAX = 1;
6511 break;
6512 }
6513 GROW;
6514 }
6515
6516 /*
6517 * SAX: Start of Element !
6518 */
6519 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6520 (!ctxt->disableSAX))
6521 ctxt->sax->startElement(ctxt->userData, name, atts);
6522
6523 if (atts != NULL) {
6524 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6525 xmlFree((void *) atts);
6526 }
6527 return(name);
6528}
6529
6530/**
6531 * xmlParseEndTag:
6532 * @ctxt: an XML parser context
6533 *
6534 * parse an end of tag
6535 *
6536 * [42] ETag ::= '</' Name S? '>'
6537 *
6538 * With namespace
6539 *
6540 * [NS 9] ETag ::= '</' QName S? '>'
6541 */
6542
6543void
6544xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6545 xmlChar *name;
6546 xmlChar *oldname;
6547
6548 GROW;
6549 if ((RAW != '<') || (NXT(1) != '/')) {
6550 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6552 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6553 ctxt->wellFormed = 0;
6554 ctxt->disableSAX = 1;
6555 return;
6556 }
6557 SKIP(2);
6558
6559 name = xmlParseName(ctxt);
6560
6561 /*
6562 * We should definitely be at the ending "S? '>'" part
6563 */
6564 GROW;
6565 SKIP_BLANKS;
6566 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6567 ctxt->errNo = XML_ERR_GT_REQUIRED;
6568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6569 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6570 ctxt->wellFormed = 0;
6571 ctxt->disableSAX = 1;
6572 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006573 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006574
6575 /*
6576 * [ WFC: Element Type Match ]
6577 * The Name in an element's end-tag must match the element type in the
6578 * start-tag.
6579 *
6580 */
6581 if ((name == NULL) || (ctxt->name == NULL) ||
6582 (!xmlStrEqual(name, ctxt->name))) {
6583 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6585 if ((name != NULL) && (ctxt->name != NULL)) {
6586 ctxt->sax->error(ctxt->userData,
6587 "Opening and ending tag mismatch: %s and %s\n",
6588 ctxt->name, name);
6589 } else if (ctxt->name != NULL) {
6590 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006591 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006592 } else {
6593 ctxt->sax->error(ctxt->userData,
6594 "Ending tag error: internal error ???\n");
6595 }
6596
6597 }
6598 ctxt->wellFormed = 0;
6599 ctxt->disableSAX = 1;
6600 }
6601
6602 /*
6603 * SAX: End of Tag
6604 */
6605 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6606 (!ctxt->disableSAX))
6607 ctxt->sax->endElement(ctxt->userData, name);
6608
6609 if (name != NULL)
6610 xmlFree(name);
6611 oldname = namePop(ctxt);
6612 spacePop(ctxt);
6613 if (oldname != NULL) {
6614#ifdef DEBUG_STACK
6615 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6616#endif
6617 xmlFree(oldname);
6618 }
6619 return;
6620}
6621
6622/**
6623 * xmlParseCDSect:
6624 * @ctxt: an XML parser context
6625 *
6626 * Parse escaped pure raw content.
6627 *
6628 * [18] CDSect ::= CDStart CData CDEnd
6629 *
6630 * [19] CDStart ::= '<![CDATA['
6631 *
6632 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6633 *
6634 * [21] CDEnd ::= ']]>'
6635 */
6636void
6637xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6638 xmlChar *buf = NULL;
6639 int len = 0;
6640 int size = XML_PARSER_BUFFER_SIZE;
6641 int r, rl;
6642 int s, sl;
6643 int cur, l;
6644 int count = 0;
6645
6646 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6647 (NXT(2) == '[') && (NXT(3) == 'C') &&
6648 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6649 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6650 (NXT(8) == '[')) {
6651 SKIP(9);
6652 } else
6653 return;
6654
6655 ctxt->instate = XML_PARSER_CDATA_SECTION;
6656 r = CUR_CHAR(rl);
6657 if (!IS_CHAR(r)) {
6658 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6660 ctxt->sax->error(ctxt->userData,
6661 "CData section not finished\n");
6662 ctxt->wellFormed = 0;
6663 ctxt->disableSAX = 1;
6664 ctxt->instate = XML_PARSER_CONTENT;
6665 return;
6666 }
6667 NEXTL(rl);
6668 s = CUR_CHAR(sl);
6669 if (!IS_CHAR(s)) {
6670 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6672 ctxt->sax->error(ctxt->userData,
6673 "CData section not finished\n");
6674 ctxt->wellFormed = 0;
6675 ctxt->disableSAX = 1;
6676 ctxt->instate = XML_PARSER_CONTENT;
6677 return;
6678 }
6679 NEXTL(sl);
6680 cur = CUR_CHAR(l);
6681 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6682 if (buf == NULL) {
6683 xmlGenericError(xmlGenericErrorContext,
6684 "malloc of %d byte failed\n", size);
6685 return;
6686 }
6687 while (IS_CHAR(cur) &&
6688 ((r != ']') || (s != ']') || (cur != '>'))) {
6689 if (len + 5 >= size) {
6690 size *= 2;
6691 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6692 if (buf == NULL) {
6693 xmlGenericError(xmlGenericErrorContext,
6694 "realloc of %d byte failed\n", size);
6695 return;
6696 }
6697 }
6698 COPY_BUF(rl,buf,len,r);
6699 r = s;
6700 rl = sl;
6701 s = cur;
6702 sl = l;
6703 count++;
6704 if (count > 50) {
6705 GROW;
6706 count = 0;
6707 }
6708 NEXTL(l);
6709 cur = CUR_CHAR(l);
6710 }
6711 buf[len] = 0;
6712 ctxt->instate = XML_PARSER_CONTENT;
6713 if (cur != '>') {
6714 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6716 ctxt->sax->error(ctxt->userData,
6717 "CData section not finished\n%.50s\n", buf);
6718 ctxt->wellFormed = 0;
6719 ctxt->disableSAX = 1;
6720 xmlFree(buf);
6721 return;
6722 }
6723 NEXTL(l);
6724
6725 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006726 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006727 */
6728 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6729 if (ctxt->sax->cdataBlock != NULL)
6730 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006731 else if (ctxt->sax->characters != NULL)
6732 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006733 }
6734 xmlFree(buf);
6735}
6736
6737/**
6738 * xmlParseContent:
6739 * @ctxt: an XML parser context
6740 *
6741 * Parse a content:
6742 *
6743 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6744 */
6745
6746void
6747xmlParseContent(xmlParserCtxtPtr ctxt) {
6748 GROW;
Daniel Veillard561b7f82002-03-20 21:55:57 +00006749 while (((RAW != 0) || (ctxt->token != 0)) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006750 ((RAW != '<') || (NXT(1) != '/'))) {
6751 const xmlChar *test = CUR_PTR;
6752 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006753 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006754 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006755
6756 /*
6757 * Handle possible processed charrefs.
6758 */
6759 if (ctxt->token != 0) {
6760 xmlParseCharData(ctxt, 0);
6761 }
6762 /*
6763 * First case : a Processing Instruction.
6764 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006765 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006766 xmlParsePI(ctxt);
6767 }
6768
6769 /*
6770 * Second case : a CDSection
6771 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006772 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006773 (NXT(2) == '[') && (NXT(3) == 'C') &&
6774 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6775 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6776 (NXT(8) == '[')) {
6777 xmlParseCDSect(ctxt);
6778 }
6779
6780 /*
6781 * Third case : a comment
6782 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006783 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006784 (NXT(2) == '-') && (NXT(3) == '-')) {
6785 xmlParseComment(ctxt);
6786 ctxt->instate = XML_PARSER_CONTENT;
6787 }
6788
6789 /*
6790 * Fourth case : a sub-element.
6791 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006792 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006793 xmlParseElement(ctxt);
6794 }
6795
6796 /*
6797 * Fifth case : a reference. If if has not been resolved,
6798 * parsing returns it's Name, create the node
6799 */
6800
Daniel Veillard21a0f912001-02-25 19:54:14 +00006801 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006802 xmlParseReference(ctxt);
6803 }
6804
6805 /*
6806 * Last case, text. Note that References are handled directly.
6807 */
6808 else {
6809 xmlParseCharData(ctxt, 0);
6810 }
6811
6812 GROW;
6813 /*
6814 * Pop-up of finished entities.
6815 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00006816 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00006817 xmlPopInput(ctxt);
6818 SHRINK;
6819
6820 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6821 (tok == ctxt->token)) {
6822 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6824 ctxt->sax->error(ctxt->userData,
6825 "detected an error in element content\n");
6826 ctxt->wellFormed = 0;
6827 ctxt->disableSAX = 1;
6828 ctxt->instate = XML_PARSER_EOF;
6829 break;
6830 }
6831 }
6832}
6833
6834/**
6835 * xmlParseElement:
6836 * @ctxt: an XML parser context
6837 *
6838 * parse an XML element, this is highly recursive
6839 *
6840 * [39] element ::= EmptyElemTag | STag content ETag
6841 *
6842 * [ WFC: Element Type Match ]
6843 * The Name in an element's end-tag must match the element type in the
6844 * start-tag.
6845 *
6846 * [ VC: Element Valid ]
6847 * An element is valid if there is a declaration matching elementdecl
6848 * where the Name matches the element type and one of the following holds:
6849 * - The declaration matches EMPTY and the element has no content.
6850 * - The declaration matches children and the sequence of child elements
6851 * belongs to the language generated by the regular expression in the
6852 * content model, with optional white space (characters matching the
6853 * nonterminal S) between each pair of child elements.
6854 * - The declaration matches Mixed and the content consists of character
6855 * data and child elements whose types match names in the content model.
6856 * - The declaration matches ANY, and the types of any child elements have
6857 * been declared.
6858 */
6859
6860void
6861xmlParseElement(xmlParserCtxtPtr ctxt) {
6862 const xmlChar *openTag = CUR_PTR;
6863 xmlChar *name;
6864 xmlChar *oldname;
6865 xmlParserNodeInfo node_info;
6866 xmlNodePtr ret;
6867
6868 /* Capture start position */
6869 if (ctxt->record_info) {
6870 node_info.begin_pos = ctxt->input->consumed +
6871 (CUR_PTR - ctxt->input->base);
6872 node_info.begin_line = ctxt->input->line;
6873 }
6874
6875 if (ctxt->spaceNr == 0)
6876 spacePush(ctxt, -1);
6877 else
6878 spacePush(ctxt, *ctxt->space);
6879
6880 name = xmlParseStartTag(ctxt);
6881 if (name == NULL) {
6882 spacePop(ctxt);
6883 return;
6884 }
6885 namePush(ctxt, name);
6886 ret = ctxt->node;
6887
6888 /*
6889 * [ VC: Root Element Type ]
6890 * The Name in the document type declaration must match the element
6891 * type of the root element.
6892 */
6893 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6894 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6895 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6896
6897 /*
6898 * Check for an Empty Element.
6899 */
6900 if ((RAW == '/') && (NXT(1) == '>')) {
6901 SKIP(2);
6902 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6903 (!ctxt->disableSAX))
6904 ctxt->sax->endElement(ctxt->userData, name);
6905 oldname = namePop(ctxt);
6906 spacePop(ctxt);
6907 if (oldname != NULL) {
6908#ifdef DEBUG_STACK
6909 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6910#endif
6911 xmlFree(oldname);
6912 }
6913 if ( ret != NULL && ctxt->record_info ) {
6914 node_info.end_pos = ctxt->input->consumed +
6915 (CUR_PTR - ctxt->input->base);
6916 node_info.end_line = ctxt->input->line;
6917 node_info.node = ret;
6918 xmlParserAddNodeInfo(ctxt, &node_info);
6919 }
6920 return;
6921 }
6922 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006923 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006924 } else {
6925 ctxt->errNo = XML_ERR_GT_REQUIRED;
6926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6927 ctxt->sax->error(ctxt->userData,
6928 "Couldn't find end of Start Tag\n%.30s\n",
6929 openTag);
6930 ctxt->wellFormed = 0;
6931 ctxt->disableSAX = 1;
6932
6933 /*
6934 * end of parsing of this node.
6935 */
6936 nodePop(ctxt);
6937 oldname = namePop(ctxt);
6938 spacePop(ctxt);
6939 if (oldname != NULL) {
6940#ifdef DEBUG_STACK
6941 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6942#endif
6943 xmlFree(oldname);
6944 }
6945
6946 /*
6947 * Capture end position and add node
6948 */
6949 if ( ret != NULL && ctxt->record_info ) {
6950 node_info.end_pos = ctxt->input->consumed +
6951 (CUR_PTR - ctxt->input->base);
6952 node_info.end_line = ctxt->input->line;
6953 node_info.node = ret;
6954 xmlParserAddNodeInfo(ctxt, &node_info);
6955 }
6956 return;
6957 }
6958
6959 /*
6960 * Parse the content of the element:
6961 */
6962 xmlParseContent(ctxt);
6963 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00006964 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00006965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6966 ctxt->sax->error(ctxt->userData,
6967 "Premature end of data in tag %.30s\n", openTag);
6968 ctxt->wellFormed = 0;
6969 ctxt->disableSAX = 1;
6970
6971 /*
6972 * end of parsing of this node.
6973 */
6974 nodePop(ctxt);
6975 oldname = namePop(ctxt);
6976 spacePop(ctxt);
6977 if (oldname != NULL) {
6978#ifdef DEBUG_STACK
6979 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6980#endif
6981 xmlFree(oldname);
6982 }
6983 return;
6984 }
6985
6986 /*
6987 * parse the end of tag: '</' should be here.
6988 */
6989 xmlParseEndTag(ctxt);
6990
6991 /*
6992 * Capture end position and add node
6993 */
6994 if ( ret != NULL && ctxt->record_info ) {
6995 node_info.end_pos = ctxt->input->consumed +
6996 (CUR_PTR - ctxt->input->base);
6997 node_info.end_line = ctxt->input->line;
6998 node_info.node = ret;
6999 xmlParserAddNodeInfo(ctxt, &node_info);
7000 }
7001}
7002
7003/**
7004 * xmlParseVersionNum:
7005 * @ctxt: an XML parser context
7006 *
7007 * parse the XML version value.
7008 *
7009 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7010 *
7011 * Returns the string giving the XML version number, or NULL
7012 */
7013xmlChar *
7014xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7015 xmlChar *buf = NULL;
7016 int len = 0;
7017 int size = 10;
7018 xmlChar cur;
7019
7020 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7021 if (buf == NULL) {
7022 xmlGenericError(xmlGenericErrorContext,
7023 "malloc of %d byte failed\n", size);
7024 return(NULL);
7025 }
7026 cur = CUR;
7027 while (((cur >= 'a') && (cur <= 'z')) ||
7028 ((cur >= 'A') && (cur <= 'Z')) ||
7029 ((cur >= '0') && (cur <= '9')) ||
7030 (cur == '_') || (cur == '.') ||
7031 (cur == ':') || (cur == '-')) {
7032 if (len + 1 >= size) {
7033 size *= 2;
7034 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7035 if (buf == NULL) {
7036 xmlGenericError(xmlGenericErrorContext,
7037 "realloc of %d byte failed\n", size);
7038 return(NULL);
7039 }
7040 }
7041 buf[len++] = cur;
7042 NEXT;
7043 cur=CUR;
7044 }
7045 buf[len] = 0;
7046 return(buf);
7047}
7048
7049/**
7050 * xmlParseVersionInfo:
7051 * @ctxt: an XML parser context
7052 *
7053 * parse the XML version.
7054 *
7055 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7056 *
7057 * [25] Eq ::= S? '=' S?
7058 *
7059 * Returns the version string, e.g. "1.0"
7060 */
7061
7062xmlChar *
7063xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7064 xmlChar *version = NULL;
7065 const xmlChar *q;
7066
7067 if ((RAW == 'v') && (NXT(1) == 'e') &&
7068 (NXT(2) == 'r') && (NXT(3) == 's') &&
7069 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7070 (NXT(6) == 'n')) {
7071 SKIP(7);
7072 SKIP_BLANKS;
7073 if (RAW != '=') {
7074 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7076 ctxt->sax->error(ctxt->userData,
7077 "xmlParseVersionInfo : expected '='\n");
7078 ctxt->wellFormed = 0;
7079 ctxt->disableSAX = 1;
7080 return(NULL);
7081 }
7082 NEXT;
7083 SKIP_BLANKS;
7084 if (RAW == '"') {
7085 NEXT;
7086 q = CUR_PTR;
7087 version = xmlParseVersionNum(ctxt);
7088 if (RAW != '"') {
7089 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7091 ctxt->sax->error(ctxt->userData,
7092 "String not closed\n%.50s\n", q);
7093 ctxt->wellFormed = 0;
7094 ctxt->disableSAX = 1;
7095 } else
7096 NEXT;
7097 } else if (RAW == '\''){
7098 NEXT;
7099 q = CUR_PTR;
7100 version = xmlParseVersionNum(ctxt);
7101 if (RAW != '\'') {
7102 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7104 ctxt->sax->error(ctxt->userData,
7105 "String not closed\n%.50s\n", q);
7106 ctxt->wellFormed = 0;
7107 ctxt->disableSAX = 1;
7108 } else
7109 NEXT;
7110 } else {
7111 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7112 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7113 ctxt->sax->error(ctxt->userData,
7114 "xmlParseVersionInfo : expected ' or \"\n");
7115 ctxt->wellFormed = 0;
7116 ctxt->disableSAX = 1;
7117 }
7118 }
7119 return(version);
7120}
7121
7122/**
7123 * xmlParseEncName:
7124 * @ctxt: an XML parser context
7125 *
7126 * parse the XML encoding name
7127 *
7128 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7129 *
7130 * Returns the encoding name value or NULL
7131 */
7132xmlChar *
7133xmlParseEncName(xmlParserCtxtPtr ctxt) {
7134 xmlChar *buf = NULL;
7135 int len = 0;
7136 int size = 10;
7137 xmlChar cur;
7138
7139 cur = CUR;
7140 if (((cur >= 'a') && (cur <= 'z')) ||
7141 ((cur >= 'A') && (cur <= 'Z'))) {
7142 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7143 if (buf == NULL) {
7144 xmlGenericError(xmlGenericErrorContext,
7145 "malloc of %d byte failed\n", size);
7146 return(NULL);
7147 }
7148
7149 buf[len++] = cur;
7150 NEXT;
7151 cur = CUR;
7152 while (((cur >= 'a') && (cur <= 'z')) ||
7153 ((cur >= 'A') && (cur <= 'Z')) ||
7154 ((cur >= '0') && (cur <= '9')) ||
7155 (cur == '.') || (cur == '_') ||
7156 (cur == '-')) {
7157 if (len + 1 >= size) {
7158 size *= 2;
7159 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7160 if (buf == NULL) {
7161 xmlGenericError(xmlGenericErrorContext,
7162 "realloc of %d byte failed\n", size);
7163 return(NULL);
7164 }
7165 }
7166 buf[len++] = cur;
7167 NEXT;
7168 cur = CUR;
7169 if (cur == 0) {
7170 SHRINK;
7171 GROW;
7172 cur = CUR;
7173 }
7174 }
7175 buf[len] = 0;
7176 } else {
7177 ctxt->errNo = XML_ERR_ENCODING_NAME;
7178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7179 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7180 ctxt->wellFormed = 0;
7181 ctxt->disableSAX = 1;
7182 }
7183 return(buf);
7184}
7185
7186/**
7187 * xmlParseEncodingDecl:
7188 * @ctxt: an XML parser context
7189 *
7190 * parse the XML encoding declaration
7191 *
7192 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7193 *
7194 * this setups the conversion filters.
7195 *
7196 * Returns the encoding value or NULL
7197 */
7198
7199xmlChar *
7200xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7201 xmlChar *encoding = NULL;
7202 const xmlChar *q;
7203
7204 SKIP_BLANKS;
7205 if ((RAW == 'e') && (NXT(1) == 'n') &&
7206 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7207 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7208 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7209 SKIP(8);
7210 SKIP_BLANKS;
7211 if (RAW != '=') {
7212 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7214 ctxt->sax->error(ctxt->userData,
7215 "xmlParseEncodingDecl : expected '='\n");
7216 ctxt->wellFormed = 0;
7217 ctxt->disableSAX = 1;
7218 return(NULL);
7219 }
7220 NEXT;
7221 SKIP_BLANKS;
7222 if (RAW == '"') {
7223 NEXT;
7224 q = CUR_PTR;
7225 encoding = xmlParseEncName(ctxt);
7226 if (RAW != '"') {
7227 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7229 ctxt->sax->error(ctxt->userData,
7230 "String not closed\n%.50s\n", q);
7231 ctxt->wellFormed = 0;
7232 ctxt->disableSAX = 1;
7233 } else
7234 NEXT;
7235 } else if (RAW == '\''){
7236 NEXT;
7237 q = CUR_PTR;
7238 encoding = xmlParseEncName(ctxt);
7239 if (RAW != '\'') {
7240 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7242 ctxt->sax->error(ctxt->userData,
7243 "String not closed\n%.50s\n", q);
7244 ctxt->wellFormed = 0;
7245 ctxt->disableSAX = 1;
7246 } else
7247 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007248 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007249 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7251 ctxt->sax->error(ctxt->userData,
7252 "xmlParseEncodingDecl : expected ' or \"\n");
7253 ctxt->wellFormed = 0;
7254 ctxt->disableSAX = 1;
7255 }
7256 if (encoding != NULL) {
7257 xmlCharEncoding enc;
7258 xmlCharEncodingHandlerPtr handler;
7259
7260 if (ctxt->input->encoding != NULL)
7261 xmlFree((xmlChar *) ctxt->input->encoding);
7262 ctxt->input->encoding = encoding;
7263
7264 enc = xmlParseCharEncoding((const char *) encoding);
7265 /*
7266 * registered set of known encodings
7267 */
7268 if (enc != XML_CHAR_ENCODING_ERROR) {
7269 xmlSwitchEncoding(ctxt, enc);
7270 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007271 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007272 xmlFree(encoding);
7273 return(NULL);
7274 }
7275 } else {
7276 /*
7277 * fallback for unknown encodings
7278 */
7279 handler = xmlFindCharEncodingHandler((const char *) encoding);
7280 if (handler != NULL) {
7281 xmlSwitchToEncoding(ctxt, handler);
7282 } else {
7283 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7285 ctxt->sax->error(ctxt->userData,
7286 "Unsupported encoding %s\n", encoding);
7287 return(NULL);
7288 }
7289 }
7290 }
7291 }
7292 return(encoding);
7293}
7294
7295/**
7296 * xmlParseSDDecl:
7297 * @ctxt: an XML parser context
7298 *
7299 * parse the XML standalone declaration
7300 *
7301 * [32] SDDecl ::= S 'standalone' Eq
7302 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7303 *
7304 * [ VC: Standalone Document Declaration ]
7305 * TODO The standalone document declaration must have the value "no"
7306 * if any external markup declarations contain declarations of:
7307 * - attributes with default values, if elements to which these
7308 * attributes apply appear in the document without specifications
7309 * of values for these attributes, or
7310 * - entities (other than amp, lt, gt, apos, quot), if references
7311 * to those entities appear in the document, or
7312 * - attributes with values subject to normalization, where the
7313 * attribute appears in the document with a value which will change
7314 * as a result of normalization, or
7315 * - element types with element content, if white space occurs directly
7316 * within any instance of those types.
7317 *
7318 * Returns 1 if standalone, 0 otherwise
7319 */
7320
7321int
7322xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7323 int standalone = -1;
7324
7325 SKIP_BLANKS;
7326 if ((RAW == 's') && (NXT(1) == 't') &&
7327 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7328 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7329 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7330 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7331 SKIP(10);
7332 SKIP_BLANKS;
7333 if (RAW != '=') {
7334 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7335 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7336 ctxt->sax->error(ctxt->userData,
7337 "XML standalone declaration : expected '='\n");
7338 ctxt->wellFormed = 0;
7339 ctxt->disableSAX = 1;
7340 return(standalone);
7341 }
7342 NEXT;
7343 SKIP_BLANKS;
7344 if (RAW == '\''){
7345 NEXT;
7346 if ((RAW == 'n') && (NXT(1) == 'o')) {
7347 standalone = 0;
7348 SKIP(2);
7349 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7350 (NXT(2) == 's')) {
7351 standalone = 1;
7352 SKIP(3);
7353 } else {
7354 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7356 ctxt->sax->error(ctxt->userData,
7357 "standalone accepts only 'yes' or 'no'\n");
7358 ctxt->wellFormed = 0;
7359 ctxt->disableSAX = 1;
7360 }
7361 if (RAW != '\'') {
7362 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7364 ctxt->sax->error(ctxt->userData, "String not closed\n");
7365 ctxt->wellFormed = 0;
7366 ctxt->disableSAX = 1;
7367 } else
7368 NEXT;
7369 } else if (RAW == '"'){
7370 NEXT;
7371 if ((RAW == 'n') && (NXT(1) == 'o')) {
7372 standalone = 0;
7373 SKIP(2);
7374 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7375 (NXT(2) == 's')) {
7376 standalone = 1;
7377 SKIP(3);
7378 } else {
7379 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7381 ctxt->sax->error(ctxt->userData,
7382 "standalone accepts only 'yes' or 'no'\n");
7383 ctxt->wellFormed = 0;
7384 ctxt->disableSAX = 1;
7385 }
7386 if (RAW != '"') {
7387 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7389 ctxt->sax->error(ctxt->userData, "String not closed\n");
7390 ctxt->wellFormed = 0;
7391 ctxt->disableSAX = 1;
7392 } else
7393 NEXT;
7394 } else {
7395 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7397 ctxt->sax->error(ctxt->userData,
7398 "Standalone value not found\n");
7399 ctxt->wellFormed = 0;
7400 ctxt->disableSAX = 1;
7401 }
7402 }
7403 return(standalone);
7404}
7405
7406/**
7407 * xmlParseXMLDecl:
7408 * @ctxt: an XML parser context
7409 *
7410 * parse an XML declaration header
7411 *
7412 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7413 */
7414
7415void
7416xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7417 xmlChar *version;
7418
7419 /*
7420 * We know that '<?xml' is here.
7421 */
7422 SKIP(5);
7423
7424 if (!IS_BLANK(RAW)) {
7425 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7427 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7428 ctxt->wellFormed = 0;
7429 ctxt->disableSAX = 1;
7430 }
7431 SKIP_BLANKS;
7432
7433 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007434 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007435 */
7436 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007437 if (version == NULL) {
7438 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7439 ctxt->sax->error(ctxt->userData,
7440 "Malformed declaration expecting version\n");
7441 ctxt->wellFormed = 0;
7442 ctxt->disableSAX = 1;
7443 } else {
7444 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7445 /*
7446 * TODO: Blueberry should be detected here
7447 */
7448 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7449 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7450 version);
7451 }
7452 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007453 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007454 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007455 }
Owen Taylor3473f882001-02-23 17:55:21 +00007456
7457 /*
7458 * We may have the encoding declaration
7459 */
7460 if (!IS_BLANK(RAW)) {
7461 if ((RAW == '?') && (NXT(1) == '>')) {
7462 SKIP(2);
7463 return;
7464 }
7465 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7467 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7468 ctxt->wellFormed = 0;
7469 ctxt->disableSAX = 1;
7470 }
7471 xmlParseEncodingDecl(ctxt);
7472 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7473 /*
7474 * The XML REC instructs us to stop parsing right here
7475 */
7476 return;
7477 }
7478
7479 /*
7480 * We may have the standalone status.
7481 */
7482 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7483 if ((RAW == '?') && (NXT(1) == '>')) {
7484 SKIP(2);
7485 return;
7486 }
7487 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7488 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7489 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7490 ctxt->wellFormed = 0;
7491 ctxt->disableSAX = 1;
7492 }
7493 SKIP_BLANKS;
7494 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7495
7496 SKIP_BLANKS;
7497 if ((RAW == '?') && (NXT(1) == '>')) {
7498 SKIP(2);
7499 } else if (RAW == '>') {
7500 /* Deprecated old WD ... */
7501 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7503 ctxt->sax->error(ctxt->userData,
7504 "XML declaration must end-up with '?>'\n");
7505 ctxt->wellFormed = 0;
7506 ctxt->disableSAX = 1;
7507 NEXT;
7508 } else {
7509 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7511 ctxt->sax->error(ctxt->userData,
7512 "parsing XML declaration: '?>' expected\n");
7513 ctxt->wellFormed = 0;
7514 ctxt->disableSAX = 1;
7515 MOVETO_ENDTAG(CUR_PTR);
7516 NEXT;
7517 }
7518}
7519
7520/**
7521 * xmlParseMisc:
7522 * @ctxt: an XML parser context
7523 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007524 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007525 *
7526 * [27] Misc ::= Comment | PI | S
7527 */
7528
7529void
7530xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007531 while (((RAW == '<') && (NXT(1) == '?')) ||
7532 ((RAW == '<') && (NXT(1) == '!') &&
7533 (NXT(2) == '-') && (NXT(3) == '-')) ||
7534 IS_BLANK(CUR)) {
7535 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007536 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007537 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007538 NEXT;
7539 } else
7540 xmlParseComment(ctxt);
7541 }
7542}
7543
7544/**
7545 * xmlParseDocument:
7546 * @ctxt: an XML parser context
7547 *
7548 * parse an XML document (and build a tree if using the standard SAX
7549 * interface).
7550 *
7551 * [1] document ::= prolog element Misc*
7552 *
7553 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7554 *
7555 * Returns 0, -1 in case of error. the parser context is augmented
7556 * as a result of the parsing.
7557 */
7558
7559int
7560xmlParseDocument(xmlParserCtxtPtr ctxt) {
7561 xmlChar start[4];
7562 xmlCharEncoding enc;
7563
7564 xmlInitParser();
7565
7566 GROW;
7567
7568 /*
7569 * SAX: beginning of the document processing.
7570 */
7571 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7572 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7573
Daniel Veillard50f34372001-08-03 12:06:36 +00007574 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007575 /*
7576 * Get the 4 first bytes and decode the charset
7577 * if enc != XML_CHAR_ENCODING_NONE
7578 * plug some encoding conversion routines.
7579 */
7580 start[0] = RAW;
7581 start[1] = NXT(1);
7582 start[2] = NXT(2);
7583 start[3] = NXT(3);
7584 enc = xmlDetectCharEncoding(start, 4);
7585 if (enc != XML_CHAR_ENCODING_NONE) {
7586 xmlSwitchEncoding(ctxt, enc);
7587 }
Owen Taylor3473f882001-02-23 17:55:21 +00007588 }
7589
7590
7591 if (CUR == 0) {
7592 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7594 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7595 ctxt->wellFormed = 0;
7596 ctxt->disableSAX = 1;
7597 }
7598
7599 /*
7600 * Check for the XMLDecl in the Prolog.
7601 */
7602 GROW;
7603 if ((RAW == '<') && (NXT(1) == '?') &&
7604 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7605 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7606
7607 /*
7608 * Note that we will switch encoding on the fly.
7609 */
7610 xmlParseXMLDecl(ctxt);
7611 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7612 /*
7613 * The XML REC instructs us to stop parsing right here
7614 */
7615 return(-1);
7616 }
7617 ctxt->standalone = ctxt->input->standalone;
7618 SKIP_BLANKS;
7619 } else {
7620 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7621 }
7622 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7623 ctxt->sax->startDocument(ctxt->userData);
7624
7625 /*
7626 * The Misc part of the Prolog
7627 */
7628 GROW;
7629 xmlParseMisc(ctxt);
7630
7631 /*
7632 * Then possibly doc type declaration(s) and more Misc
7633 * (doctypedecl Misc*)?
7634 */
7635 GROW;
7636 if ((RAW == '<') && (NXT(1) == '!') &&
7637 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7638 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7639 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7640 (NXT(8) == 'E')) {
7641
7642 ctxt->inSubset = 1;
7643 xmlParseDocTypeDecl(ctxt);
7644 if (RAW == '[') {
7645 ctxt->instate = XML_PARSER_DTD;
7646 xmlParseInternalSubset(ctxt);
7647 }
7648
7649 /*
7650 * Create and update the external subset.
7651 */
7652 ctxt->inSubset = 2;
7653 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7654 (!ctxt->disableSAX))
7655 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7656 ctxt->extSubSystem, ctxt->extSubURI);
7657 ctxt->inSubset = 0;
7658
7659
7660 ctxt->instate = XML_PARSER_PROLOG;
7661 xmlParseMisc(ctxt);
7662 }
7663
7664 /*
7665 * Time to start parsing the tree itself
7666 */
7667 GROW;
7668 if (RAW != '<') {
7669 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7671 ctxt->sax->error(ctxt->userData,
7672 "Start tag expected, '<' not found\n");
7673 ctxt->wellFormed = 0;
7674 ctxt->disableSAX = 1;
7675 ctxt->instate = XML_PARSER_EOF;
7676 } else {
7677 ctxt->instate = XML_PARSER_CONTENT;
7678 xmlParseElement(ctxt);
7679 ctxt->instate = XML_PARSER_EPILOG;
7680
7681
7682 /*
7683 * The Misc part at the end
7684 */
7685 xmlParseMisc(ctxt);
7686
Daniel Veillard561b7f82002-03-20 21:55:57 +00007687 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007688 ctxt->errNo = XML_ERR_DOCUMENT_END;
7689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7690 ctxt->sax->error(ctxt->userData,
7691 "Extra content at the end of the document\n");
7692 ctxt->wellFormed = 0;
7693 ctxt->disableSAX = 1;
7694 }
7695 ctxt->instate = XML_PARSER_EOF;
7696 }
7697
7698 /*
7699 * SAX: end of the document processing.
7700 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007701 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007702 ctxt->sax->endDocument(ctxt->userData);
7703
Daniel Veillard5997aca2002-03-18 18:36:20 +00007704 /*
7705 * Remove locally kept entity definitions if the tree was not built
7706 */
7707 if ((ctxt->myDoc != NULL) &&
7708 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7709 xmlFreeDoc(ctxt->myDoc);
7710 ctxt->myDoc = NULL;
7711 }
7712
Daniel Veillardc7612992002-02-17 22:47:37 +00007713 if (! ctxt->wellFormed) {
7714 ctxt->valid = 0;
7715 return(-1);
7716 }
Owen Taylor3473f882001-02-23 17:55:21 +00007717 return(0);
7718}
7719
7720/**
7721 * xmlParseExtParsedEnt:
7722 * @ctxt: an XML parser context
7723 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007724 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007725 * An external general parsed entity is well-formed if it matches the
7726 * production labeled extParsedEnt.
7727 *
7728 * [78] extParsedEnt ::= TextDecl? content
7729 *
7730 * Returns 0, -1 in case of error. the parser context is augmented
7731 * as a result of the parsing.
7732 */
7733
7734int
7735xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7736 xmlChar start[4];
7737 xmlCharEncoding enc;
7738
7739 xmlDefaultSAXHandlerInit();
7740
7741 GROW;
7742
7743 /*
7744 * SAX: beginning of the document processing.
7745 */
7746 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7747 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7748
7749 /*
7750 * Get the 4 first bytes and decode the charset
7751 * if enc != XML_CHAR_ENCODING_NONE
7752 * plug some encoding conversion routines.
7753 */
7754 start[0] = RAW;
7755 start[1] = NXT(1);
7756 start[2] = NXT(2);
7757 start[3] = NXT(3);
7758 enc = xmlDetectCharEncoding(start, 4);
7759 if (enc != XML_CHAR_ENCODING_NONE) {
7760 xmlSwitchEncoding(ctxt, enc);
7761 }
7762
7763
7764 if (CUR == 0) {
7765 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7767 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7768 ctxt->wellFormed = 0;
7769 ctxt->disableSAX = 1;
7770 }
7771
7772 /*
7773 * Check for the XMLDecl in the Prolog.
7774 */
7775 GROW;
7776 if ((RAW == '<') && (NXT(1) == '?') &&
7777 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7778 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7779
7780 /*
7781 * Note that we will switch encoding on the fly.
7782 */
7783 xmlParseXMLDecl(ctxt);
7784 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7785 /*
7786 * The XML REC instructs us to stop parsing right here
7787 */
7788 return(-1);
7789 }
7790 SKIP_BLANKS;
7791 } else {
7792 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7793 }
7794 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7795 ctxt->sax->startDocument(ctxt->userData);
7796
7797 /*
7798 * Doing validity checking on chunk doesn't make sense
7799 */
7800 ctxt->instate = XML_PARSER_CONTENT;
7801 ctxt->validate = 0;
7802 ctxt->loadsubset = 0;
7803 ctxt->depth = 0;
7804
7805 xmlParseContent(ctxt);
7806
7807 if ((RAW == '<') && (NXT(1) == '/')) {
7808 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7810 ctxt->sax->error(ctxt->userData,
7811 "chunk is not well balanced\n");
7812 ctxt->wellFormed = 0;
7813 ctxt->disableSAX = 1;
7814 } else if (RAW != 0) {
7815 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7817 ctxt->sax->error(ctxt->userData,
7818 "extra content at the end of well balanced chunk\n");
7819 ctxt->wellFormed = 0;
7820 ctxt->disableSAX = 1;
7821 }
7822
7823 /*
7824 * SAX: end of the document processing.
7825 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007826 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007827 ctxt->sax->endDocument(ctxt->userData);
7828
7829 if (! ctxt->wellFormed) return(-1);
7830 return(0);
7831}
7832
7833/************************************************************************
7834 * *
7835 * Progressive parsing interfaces *
7836 * *
7837 ************************************************************************/
7838
7839/**
7840 * xmlParseLookupSequence:
7841 * @ctxt: an XML parser context
7842 * @first: the first char to lookup
7843 * @next: the next char to lookup or zero
7844 * @third: the next char to lookup or zero
7845 *
7846 * Try to find if a sequence (first, next, third) or just (first next) or
7847 * (first) is available in the input stream.
7848 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7849 * to avoid rescanning sequences of bytes, it DOES change the state of the
7850 * parser, do not use liberally.
7851 *
7852 * Returns the index to the current parsing point if the full sequence
7853 * is available, -1 otherwise.
7854 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007855static int
Owen Taylor3473f882001-02-23 17:55:21 +00007856xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7857 xmlChar next, xmlChar third) {
7858 int base, len;
7859 xmlParserInputPtr in;
7860 const xmlChar *buf;
7861
7862 in = ctxt->input;
7863 if (in == NULL) return(-1);
7864 base = in->cur - in->base;
7865 if (base < 0) return(-1);
7866 if (ctxt->checkIndex > base)
7867 base = ctxt->checkIndex;
7868 if (in->buf == NULL) {
7869 buf = in->base;
7870 len = in->length;
7871 } else {
7872 buf = in->buf->buffer->content;
7873 len = in->buf->buffer->use;
7874 }
7875 /* take into account the sequence length */
7876 if (third) len -= 2;
7877 else if (next) len --;
7878 for (;base < len;base++) {
7879 if (buf[base] == first) {
7880 if (third != 0) {
7881 if ((buf[base + 1] != next) ||
7882 (buf[base + 2] != third)) continue;
7883 } else if (next != 0) {
7884 if (buf[base + 1] != next) continue;
7885 }
7886 ctxt->checkIndex = 0;
7887#ifdef DEBUG_PUSH
7888 if (next == 0)
7889 xmlGenericError(xmlGenericErrorContext,
7890 "PP: lookup '%c' found at %d\n",
7891 first, base);
7892 else if (third == 0)
7893 xmlGenericError(xmlGenericErrorContext,
7894 "PP: lookup '%c%c' found at %d\n",
7895 first, next, base);
7896 else
7897 xmlGenericError(xmlGenericErrorContext,
7898 "PP: lookup '%c%c%c' found at %d\n",
7899 first, next, third, base);
7900#endif
7901 return(base - (in->cur - in->base));
7902 }
7903 }
7904 ctxt->checkIndex = base;
7905#ifdef DEBUG_PUSH
7906 if (next == 0)
7907 xmlGenericError(xmlGenericErrorContext,
7908 "PP: lookup '%c' failed\n", first);
7909 else if (third == 0)
7910 xmlGenericError(xmlGenericErrorContext,
7911 "PP: lookup '%c%c' failed\n", first, next);
7912 else
7913 xmlGenericError(xmlGenericErrorContext,
7914 "PP: lookup '%c%c%c' failed\n", first, next, third);
7915#endif
7916 return(-1);
7917}
7918
7919/**
7920 * xmlParseTryOrFinish:
7921 * @ctxt: an XML parser context
7922 * @terminate: last chunk indicator
7923 *
7924 * Try to progress on parsing
7925 *
7926 * Returns zero if no parsing was possible
7927 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007928static int
Owen Taylor3473f882001-02-23 17:55:21 +00007929xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7930 int ret = 0;
7931 int avail;
7932 xmlChar cur, next;
7933
7934#ifdef DEBUG_PUSH
7935 switch (ctxt->instate) {
7936 case XML_PARSER_EOF:
7937 xmlGenericError(xmlGenericErrorContext,
7938 "PP: try EOF\n"); break;
7939 case XML_PARSER_START:
7940 xmlGenericError(xmlGenericErrorContext,
7941 "PP: try START\n"); break;
7942 case XML_PARSER_MISC:
7943 xmlGenericError(xmlGenericErrorContext,
7944 "PP: try MISC\n");break;
7945 case XML_PARSER_COMMENT:
7946 xmlGenericError(xmlGenericErrorContext,
7947 "PP: try COMMENT\n");break;
7948 case XML_PARSER_PROLOG:
7949 xmlGenericError(xmlGenericErrorContext,
7950 "PP: try PROLOG\n");break;
7951 case XML_PARSER_START_TAG:
7952 xmlGenericError(xmlGenericErrorContext,
7953 "PP: try START_TAG\n");break;
7954 case XML_PARSER_CONTENT:
7955 xmlGenericError(xmlGenericErrorContext,
7956 "PP: try CONTENT\n");break;
7957 case XML_PARSER_CDATA_SECTION:
7958 xmlGenericError(xmlGenericErrorContext,
7959 "PP: try CDATA_SECTION\n");break;
7960 case XML_PARSER_END_TAG:
7961 xmlGenericError(xmlGenericErrorContext,
7962 "PP: try END_TAG\n");break;
7963 case XML_PARSER_ENTITY_DECL:
7964 xmlGenericError(xmlGenericErrorContext,
7965 "PP: try ENTITY_DECL\n");break;
7966 case XML_PARSER_ENTITY_VALUE:
7967 xmlGenericError(xmlGenericErrorContext,
7968 "PP: try ENTITY_VALUE\n");break;
7969 case XML_PARSER_ATTRIBUTE_VALUE:
7970 xmlGenericError(xmlGenericErrorContext,
7971 "PP: try ATTRIBUTE_VALUE\n");break;
7972 case XML_PARSER_DTD:
7973 xmlGenericError(xmlGenericErrorContext,
7974 "PP: try DTD\n");break;
7975 case XML_PARSER_EPILOG:
7976 xmlGenericError(xmlGenericErrorContext,
7977 "PP: try EPILOG\n");break;
7978 case XML_PARSER_PI:
7979 xmlGenericError(xmlGenericErrorContext,
7980 "PP: try PI\n");break;
7981 case XML_PARSER_IGNORE:
7982 xmlGenericError(xmlGenericErrorContext,
7983 "PP: try IGNORE\n");break;
7984 }
7985#endif
7986
7987 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00007988 SHRINK;
7989
Owen Taylor3473f882001-02-23 17:55:21 +00007990 /*
7991 * Pop-up of finished entities.
7992 */
7993 while ((RAW == 0) && (ctxt->inputNr > 1))
7994 xmlPopInput(ctxt);
7995
7996 if (ctxt->input ==NULL) break;
7997 if (ctxt->input->buf == NULL)
7998 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00007999 else {
8000 /*
8001 * If we are operating on converted input, try to flush
8002 * remainng chars to avoid them stalling in the non-converted
8003 * buffer.
8004 */
8005 if ((ctxt->input->buf->raw != NULL) &&
8006 (ctxt->input->buf->raw->use > 0)) {
8007 int base = ctxt->input->base -
8008 ctxt->input->buf->buffer->content;
8009 int current = ctxt->input->cur - ctxt->input->base;
8010
8011 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8012 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8013 ctxt->input->cur = ctxt->input->base + current;
8014 ctxt->input->end =
8015 &ctxt->input->buf->buffer->content[
8016 ctxt->input->buf->buffer->use];
8017 }
8018 avail = ctxt->input->buf->buffer->use -
8019 (ctxt->input->cur - ctxt->input->base);
8020 }
Owen Taylor3473f882001-02-23 17:55:21 +00008021 if (avail < 1)
8022 goto done;
8023 switch (ctxt->instate) {
8024 case XML_PARSER_EOF:
8025 /*
8026 * Document parsing is done !
8027 */
8028 goto done;
8029 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008030 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8031 xmlChar start[4];
8032 xmlCharEncoding enc;
8033
8034 /*
8035 * Very first chars read from the document flow.
8036 */
8037 if (avail < 4)
8038 goto done;
8039
8040 /*
8041 * Get the 4 first bytes and decode the charset
8042 * if enc != XML_CHAR_ENCODING_NONE
8043 * plug some encoding conversion routines.
8044 */
8045 start[0] = RAW;
8046 start[1] = NXT(1);
8047 start[2] = NXT(2);
8048 start[3] = NXT(3);
8049 enc = xmlDetectCharEncoding(start, 4);
8050 if (enc != XML_CHAR_ENCODING_NONE) {
8051 xmlSwitchEncoding(ctxt, enc);
8052 }
8053 break;
8054 }
Owen Taylor3473f882001-02-23 17:55:21 +00008055
8056 cur = ctxt->input->cur[0];
8057 next = ctxt->input->cur[1];
8058 if (cur == 0) {
8059 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8060 ctxt->sax->setDocumentLocator(ctxt->userData,
8061 &xmlDefaultSAXLocator);
8062 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8063 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8064 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8065 ctxt->wellFormed = 0;
8066 ctxt->disableSAX = 1;
8067 ctxt->instate = XML_PARSER_EOF;
8068#ifdef DEBUG_PUSH
8069 xmlGenericError(xmlGenericErrorContext,
8070 "PP: entering EOF\n");
8071#endif
8072 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8073 ctxt->sax->endDocument(ctxt->userData);
8074 goto done;
8075 }
8076 if ((cur == '<') && (next == '?')) {
8077 /* PI or XML decl */
8078 if (avail < 5) return(ret);
8079 if ((!terminate) &&
8080 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8081 return(ret);
8082 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8083 ctxt->sax->setDocumentLocator(ctxt->userData,
8084 &xmlDefaultSAXLocator);
8085 if ((ctxt->input->cur[2] == 'x') &&
8086 (ctxt->input->cur[3] == 'm') &&
8087 (ctxt->input->cur[4] == 'l') &&
8088 (IS_BLANK(ctxt->input->cur[5]))) {
8089 ret += 5;
8090#ifdef DEBUG_PUSH
8091 xmlGenericError(xmlGenericErrorContext,
8092 "PP: Parsing XML Decl\n");
8093#endif
8094 xmlParseXMLDecl(ctxt);
8095 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8096 /*
8097 * The XML REC instructs us to stop parsing right
8098 * here
8099 */
8100 ctxt->instate = XML_PARSER_EOF;
8101 return(0);
8102 }
8103 ctxt->standalone = ctxt->input->standalone;
8104 if ((ctxt->encoding == NULL) &&
8105 (ctxt->input->encoding != NULL))
8106 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8107 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8108 (!ctxt->disableSAX))
8109 ctxt->sax->startDocument(ctxt->userData);
8110 ctxt->instate = XML_PARSER_MISC;
8111#ifdef DEBUG_PUSH
8112 xmlGenericError(xmlGenericErrorContext,
8113 "PP: entering MISC\n");
8114#endif
8115 } else {
8116 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8117 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8118 (!ctxt->disableSAX))
8119 ctxt->sax->startDocument(ctxt->userData);
8120 ctxt->instate = XML_PARSER_MISC;
8121#ifdef DEBUG_PUSH
8122 xmlGenericError(xmlGenericErrorContext,
8123 "PP: entering MISC\n");
8124#endif
8125 }
8126 } else {
8127 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8128 ctxt->sax->setDocumentLocator(ctxt->userData,
8129 &xmlDefaultSAXLocator);
8130 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8131 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8132 (!ctxt->disableSAX))
8133 ctxt->sax->startDocument(ctxt->userData);
8134 ctxt->instate = XML_PARSER_MISC;
8135#ifdef DEBUG_PUSH
8136 xmlGenericError(xmlGenericErrorContext,
8137 "PP: entering MISC\n");
8138#endif
8139 }
8140 break;
8141 case XML_PARSER_MISC:
8142 SKIP_BLANKS;
8143 if (ctxt->input->buf == NULL)
8144 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8145 else
8146 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8147 if (avail < 2)
8148 goto done;
8149 cur = ctxt->input->cur[0];
8150 next = ctxt->input->cur[1];
8151 if ((cur == '<') && (next == '?')) {
8152 if ((!terminate) &&
8153 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8154 goto done;
8155#ifdef DEBUG_PUSH
8156 xmlGenericError(xmlGenericErrorContext,
8157 "PP: Parsing PI\n");
8158#endif
8159 xmlParsePI(ctxt);
8160 } else if ((cur == '<') && (next == '!') &&
8161 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8162 if ((!terminate) &&
8163 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8164 goto done;
8165#ifdef DEBUG_PUSH
8166 xmlGenericError(xmlGenericErrorContext,
8167 "PP: Parsing Comment\n");
8168#endif
8169 xmlParseComment(ctxt);
8170 ctxt->instate = XML_PARSER_MISC;
8171 } else if ((cur == '<') && (next == '!') &&
8172 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8173 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8174 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8175 (ctxt->input->cur[8] == 'E')) {
8176 if ((!terminate) &&
8177 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8178 goto done;
8179#ifdef DEBUG_PUSH
8180 xmlGenericError(xmlGenericErrorContext,
8181 "PP: Parsing internal subset\n");
8182#endif
8183 ctxt->inSubset = 1;
8184 xmlParseDocTypeDecl(ctxt);
8185 if (RAW == '[') {
8186 ctxt->instate = XML_PARSER_DTD;
8187#ifdef DEBUG_PUSH
8188 xmlGenericError(xmlGenericErrorContext,
8189 "PP: entering DTD\n");
8190#endif
8191 } else {
8192 /*
8193 * Create and update the external subset.
8194 */
8195 ctxt->inSubset = 2;
8196 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8197 (ctxt->sax->externalSubset != NULL))
8198 ctxt->sax->externalSubset(ctxt->userData,
8199 ctxt->intSubName, ctxt->extSubSystem,
8200 ctxt->extSubURI);
8201 ctxt->inSubset = 0;
8202 ctxt->instate = XML_PARSER_PROLOG;
8203#ifdef DEBUG_PUSH
8204 xmlGenericError(xmlGenericErrorContext,
8205 "PP: entering PROLOG\n");
8206#endif
8207 }
8208 } else if ((cur == '<') && (next == '!') &&
8209 (avail < 9)) {
8210 goto done;
8211 } else {
8212 ctxt->instate = XML_PARSER_START_TAG;
8213#ifdef DEBUG_PUSH
8214 xmlGenericError(xmlGenericErrorContext,
8215 "PP: entering START_TAG\n");
8216#endif
8217 }
8218 break;
8219 case XML_PARSER_IGNORE:
8220 xmlGenericError(xmlGenericErrorContext,
8221 "PP: internal error, state == IGNORE");
8222 ctxt->instate = XML_PARSER_DTD;
8223#ifdef DEBUG_PUSH
8224 xmlGenericError(xmlGenericErrorContext,
8225 "PP: entering DTD\n");
8226#endif
8227 break;
8228 case XML_PARSER_PROLOG:
8229 SKIP_BLANKS;
8230 if (ctxt->input->buf == NULL)
8231 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8232 else
8233 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8234 if (avail < 2)
8235 goto done;
8236 cur = ctxt->input->cur[0];
8237 next = ctxt->input->cur[1];
8238 if ((cur == '<') && (next == '?')) {
8239 if ((!terminate) &&
8240 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8241 goto done;
8242#ifdef DEBUG_PUSH
8243 xmlGenericError(xmlGenericErrorContext,
8244 "PP: Parsing PI\n");
8245#endif
8246 xmlParsePI(ctxt);
8247 } else if ((cur == '<') && (next == '!') &&
8248 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8249 if ((!terminate) &&
8250 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8251 goto done;
8252#ifdef DEBUG_PUSH
8253 xmlGenericError(xmlGenericErrorContext,
8254 "PP: Parsing Comment\n");
8255#endif
8256 xmlParseComment(ctxt);
8257 ctxt->instate = XML_PARSER_PROLOG;
8258 } else if ((cur == '<') && (next == '!') &&
8259 (avail < 4)) {
8260 goto done;
8261 } else {
8262 ctxt->instate = XML_PARSER_START_TAG;
8263#ifdef DEBUG_PUSH
8264 xmlGenericError(xmlGenericErrorContext,
8265 "PP: entering START_TAG\n");
8266#endif
8267 }
8268 break;
8269 case XML_PARSER_EPILOG:
8270 SKIP_BLANKS;
8271 if (ctxt->input->buf == NULL)
8272 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8273 else
8274 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8275 if (avail < 2)
8276 goto done;
8277 cur = ctxt->input->cur[0];
8278 next = ctxt->input->cur[1];
8279 if ((cur == '<') && (next == '?')) {
8280 if ((!terminate) &&
8281 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8282 goto done;
8283#ifdef DEBUG_PUSH
8284 xmlGenericError(xmlGenericErrorContext,
8285 "PP: Parsing PI\n");
8286#endif
8287 xmlParsePI(ctxt);
8288 ctxt->instate = XML_PARSER_EPILOG;
8289 } else if ((cur == '<') && (next == '!') &&
8290 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8291 if ((!terminate) &&
8292 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8293 goto done;
8294#ifdef DEBUG_PUSH
8295 xmlGenericError(xmlGenericErrorContext,
8296 "PP: Parsing Comment\n");
8297#endif
8298 xmlParseComment(ctxt);
8299 ctxt->instate = XML_PARSER_EPILOG;
8300 } else if ((cur == '<') && (next == '!') &&
8301 (avail < 4)) {
8302 goto done;
8303 } else {
8304 ctxt->errNo = XML_ERR_DOCUMENT_END;
8305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8306 ctxt->sax->error(ctxt->userData,
8307 "Extra content at the end of the document\n");
8308 ctxt->wellFormed = 0;
8309 ctxt->disableSAX = 1;
8310 ctxt->instate = XML_PARSER_EOF;
8311#ifdef DEBUG_PUSH
8312 xmlGenericError(xmlGenericErrorContext,
8313 "PP: entering EOF\n");
8314#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008315 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008316 ctxt->sax->endDocument(ctxt->userData);
8317 goto done;
8318 }
8319 break;
8320 case XML_PARSER_START_TAG: {
8321 xmlChar *name, *oldname;
8322
8323 if ((avail < 2) && (ctxt->inputNr == 1))
8324 goto done;
8325 cur = ctxt->input->cur[0];
8326 if (cur != '<') {
8327 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8329 ctxt->sax->error(ctxt->userData,
8330 "Start tag expect, '<' not found\n");
8331 ctxt->wellFormed = 0;
8332 ctxt->disableSAX = 1;
8333 ctxt->instate = XML_PARSER_EOF;
8334#ifdef DEBUG_PUSH
8335 xmlGenericError(xmlGenericErrorContext,
8336 "PP: entering EOF\n");
8337#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008338 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008339 ctxt->sax->endDocument(ctxt->userData);
8340 goto done;
8341 }
8342 if ((!terminate) &&
8343 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8344 goto done;
8345 if (ctxt->spaceNr == 0)
8346 spacePush(ctxt, -1);
8347 else
8348 spacePush(ctxt, *ctxt->space);
8349 name = xmlParseStartTag(ctxt);
8350 if (name == NULL) {
8351 spacePop(ctxt);
8352 ctxt->instate = XML_PARSER_EOF;
8353#ifdef DEBUG_PUSH
8354 xmlGenericError(xmlGenericErrorContext,
8355 "PP: entering EOF\n");
8356#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008357 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008358 ctxt->sax->endDocument(ctxt->userData);
8359 goto done;
8360 }
8361 namePush(ctxt, xmlStrdup(name));
8362
8363 /*
8364 * [ VC: Root Element Type ]
8365 * The Name in the document type declaration must match
8366 * the element type of the root element.
8367 */
8368 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8369 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8370 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8371
8372 /*
8373 * Check for an Empty Element.
8374 */
8375 if ((RAW == '/') && (NXT(1) == '>')) {
8376 SKIP(2);
8377 if ((ctxt->sax != NULL) &&
8378 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8379 ctxt->sax->endElement(ctxt->userData, name);
8380 xmlFree(name);
8381 oldname = namePop(ctxt);
8382 spacePop(ctxt);
8383 if (oldname != NULL) {
8384#ifdef DEBUG_STACK
8385 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8386#endif
8387 xmlFree(oldname);
8388 }
8389 if (ctxt->name == NULL) {
8390 ctxt->instate = XML_PARSER_EPILOG;
8391#ifdef DEBUG_PUSH
8392 xmlGenericError(xmlGenericErrorContext,
8393 "PP: entering EPILOG\n");
8394#endif
8395 } else {
8396 ctxt->instate = XML_PARSER_CONTENT;
8397#ifdef DEBUG_PUSH
8398 xmlGenericError(xmlGenericErrorContext,
8399 "PP: entering CONTENT\n");
8400#endif
8401 }
8402 break;
8403 }
8404 if (RAW == '>') {
8405 NEXT;
8406 } else {
8407 ctxt->errNo = XML_ERR_GT_REQUIRED;
8408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8409 ctxt->sax->error(ctxt->userData,
8410 "Couldn't find end of Start Tag %s\n",
8411 name);
8412 ctxt->wellFormed = 0;
8413 ctxt->disableSAX = 1;
8414
8415 /*
8416 * end of parsing of this node.
8417 */
8418 nodePop(ctxt);
8419 oldname = namePop(ctxt);
8420 spacePop(ctxt);
8421 if (oldname != NULL) {
8422#ifdef DEBUG_STACK
8423 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8424#endif
8425 xmlFree(oldname);
8426 }
8427 }
8428 xmlFree(name);
8429 ctxt->instate = XML_PARSER_CONTENT;
8430#ifdef DEBUG_PUSH
8431 xmlGenericError(xmlGenericErrorContext,
8432 "PP: entering CONTENT\n");
8433#endif
8434 break;
8435 }
8436 case XML_PARSER_CONTENT: {
8437 const xmlChar *test;
8438 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008439 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008440
8441 /*
8442 * Handle preparsed entities and charRef
8443 */
8444 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008445 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008446
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008447 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008448 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8449 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008450 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008451 ctxt->token = 0;
8452 }
8453 if ((avail < 2) && (ctxt->inputNr == 1))
8454 goto done;
8455 cur = ctxt->input->cur[0];
8456 next = ctxt->input->cur[1];
8457
8458 test = CUR_PTR;
8459 cons = ctxt->input->consumed;
8460 tok = ctxt->token;
8461 if ((cur == '<') && (next == '?')) {
8462 if ((!terminate) &&
8463 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8464 goto done;
8465#ifdef DEBUG_PUSH
8466 xmlGenericError(xmlGenericErrorContext,
8467 "PP: Parsing PI\n");
8468#endif
8469 xmlParsePI(ctxt);
8470 } else if ((cur == '<') && (next == '!') &&
8471 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8472 if ((!terminate) &&
8473 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8474 goto done;
8475#ifdef DEBUG_PUSH
8476 xmlGenericError(xmlGenericErrorContext,
8477 "PP: Parsing Comment\n");
8478#endif
8479 xmlParseComment(ctxt);
8480 ctxt->instate = XML_PARSER_CONTENT;
8481 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8482 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8483 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8484 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8485 (ctxt->input->cur[8] == '[')) {
8486 SKIP(9);
8487 ctxt->instate = XML_PARSER_CDATA_SECTION;
8488#ifdef DEBUG_PUSH
8489 xmlGenericError(xmlGenericErrorContext,
8490 "PP: entering CDATA_SECTION\n");
8491#endif
8492 break;
8493 } else if ((cur == '<') && (next == '!') &&
8494 (avail < 9)) {
8495 goto done;
8496 } else if ((cur == '<') && (next == '/')) {
8497 ctxt->instate = XML_PARSER_END_TAG;
8498#ifdef DEBUG_PUSH
8499 xmlGenericError(xmlGenericErrorContext,
8500 "PP: entering END_TAG\n");
8501#endif
8502 break;
8503 } else if (cur == '<') {
8504 ctxt->instate = XML_PARSER_START_TAG;
8505#ifdef DEBUG_PUSH
8506 xmlGenericError(xmlGenericErrorContext,
8507 "PP: entering START_TAG\n");
8508#endif
8509 break;
8510 } else if (cur == '&') {
8511 if ((!terminate) &&
8512 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8513 goto done;
8514#ifdef DEBUG_PUSH
8515 xmlGenericError(xmlGenericErrorContext,
8516 "PP: Parsing Reference\n");
8517#endif
8518 xmlParseReference(ctxt);
8519 } else {
8520 /* TODO Avoid the extra copy, handle directly !!! */
8521 /*
8522 * Goal of the following test is:
8523 * - minimize calls to the SAX 'character' callback
8524 * when they are mergeable
8525 * - handle an problem for isBlank when we only parse
8526 * a sequence of blank chars and the next one is
8527 * not available to check against '<' presence.
8528 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008529 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008530 * of the parser.
8531 */
8532 if ((ctxt->inputNr == 1) &&
8533 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8534 if ((!terminate) &&
8535 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8536 goto done;
8537 }
8538 ctxt->checkIndex = 0;
8539#ifdef DEBUG_PUSH
8540 xmlGenericError(xmlGenericErrorContext,
8541 "PP: Parsing char data\n");
8542#endif
8543 xmlParseCharData(ctxt, 0);
8544 }
8545 /*
8546 * Pop-up of finished entities.
8547 */
8548 while ((RAW == 0) && (ctxt->inputNr > 1))
8549 xmlPopInput(ctxt);
8550 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8551 (tok == ctxt->token)) {
8552 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8554 ctxt->sax->error(ctxt->userData,
8555 "detected an error in element content\n");
8556 ctxt->wellFormed = 0;
8557 ctxt->disableSAX = 1;
8558 ctxt->instate = XML_PARSER_EOF;
8559 break;
8560 }
8561 break;
8562 }
8563 case XML_PARSER_CDATA_SECTION: {
8564 /*
8565 * The Push mode need to have the SAX callback for
8566 * cdataBlock merge back contiguous callbacks.
8567 */
8568 int base;
8569
8570 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8571 if (base < 0) {
8572 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8573 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8574 if (ctxt->sax->cdataBlock != NULL)
8575 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8576 XML_PARSER_BIG_BUFFER_SIZE);
8577 }
8578 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8579 ctxt->checkIndex = 0;
8580 }
8581 goto done;
8582 } else {
8583 if ((ctxt->sax != NULL) && (base > 0) &&
8584 (!ctxt->disableSAX)) {
8585 if (ctxt->sax->cdataBlock != NULL)
8586 ctxt->sax->cdataBlock(ctxt->userData,
8587 ctxt->input->cur, base);
8588 }
8589 SKIP(base + 3);
8590 ctxt->checkIndex = 0;
8591 ctxt->instate = XML_PARSER_CONTENT;
8592#ifdef DEBUG_PUSH
8593 xmlGenericError(xmlGenericErrorContext,
8594 "PP: entering CONTENT\n");
8595#endif
8596 }
8597 break;
8598 }
8599 case XML_PARSER_END_TAG:
8600 if (avail < 2)
8601 goto done;
8602 if ((!terminate) &&
8603 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8604 goto done;
8605 xmlParseEndTag(ctxt);
8606 if (ctxt->name == NULL) {
8607 ctxt->instate = XML_PARSER_EPILOG;
8608#ifdef DEBUG_PUSH
8609 xmlGenericError(xmlGenericErrorContext,
8610 "PP: entering EPILOG\n");
8611#endif
8612 } else {
8613 ctxt->instate = XML_PARSER_CONTENT;
8614#ifdef DEBUG_PUSH
8615 xmlGenericError(xmlGenericErrorContext,
8616 "PP: entering CONTENT\n");
8617#endif
8618 }
8619 break;
8620 case XML_PARSER_DTD: {
8621 /*
8622 * Sorry but progressive parsing of the internal subset
8623 * is not expected to be supported. We first check that
8624 * the full content of the internal subset is available and
8625 * the parsing is launched only at that point.
8626 * Internal subset ends up with "']' S? '>'" in an unescaped
8627 * section and not in a ']]>' sequence which are conditional
8628 * sections (whoever argued to keep that crap in XML deserve
8629 * a place in hell !).
8630 */
8631 int base, i;
8632 xmlChar *buf;
8633 xmlChar quote = 0;
8634
8635 base = ctxt->input->cur - ctxt->input->base;
8636 if (base < 0) return(0);
8637 if (ctxt->checkIndex > base)
8638 base = ctxt->checkIndex;
8639 buf = ctxt->input->buf->buffer->content;
8640 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8641 base++) {
8642 if (quote != 0) {
8643 if (buf[base] == quote)
8644 quote = 0;
8645 continue;
8646 }
8647 if (buf[base] == '"') {
8648 quote = '"';
8649 continue;
8650 }
8651 if (buf[base] == '\'') {
8652 quote = '\'';
8653 continue;
8654 }
8655 if (buf[base] == ']') {
8656 if ((unsigned int) base +1 >=
8657 ctxt->input->buf->buffer->use)
8658 break;
8659 if (buf[base + 1] == ']') {
8660 /* conditional crap, skip both ']' ! */
8661 base++;
8662 continue;
8663 }
8664 for (i = 0;
8665 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8666 i++) {
8667 if (buf[base + i] == '>')
8668 goto found_end_int_subset;
8669 }
8670 break;
8671 }
8672 }
8673 /*
8674 * We didn't found the end of the Internal subset
8675 */
8676 if (quote == 0)
8677 ctxt->checkIndex = base;
8678#ifdef DEBUG_PUSH
8679 if (next == 0)
8680 xmlGenericError(xmlGenericErrorContext,
8681 "PP: lookup of int subset end filed\n");
8682#endif
8683 goto done;
8684
8685found_end_int_subset:
8686 xmlParseInternalSubset(ctxt);
8687 ctxt->inSubset = 2;
8688 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8689 (ctxt->sax->externalSubset != NULL))
8690 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8691 ctxt->extSubSystem, ctxt->extSubURI);
8692 ctxt->inSubset = 0;
8693 ctxt->instate = XML_PARSER_PROLOG;
8694 ctxt->checkIndex = 0;
8695#ifdef DEBUG_PUSH
8696 xmlGenericError(xmlGenericErrorContext,
8697 "PP: entering PROLOG\n");
8698#endif
8699 break;
8700 }
8701 case XML_PARSER_COMMENT:
8702 xmlGenericError(xmlGenericErrorContext,
8703 "PP: internal error, state == COMMENT\n");
8704 ctxt->instate = XML_PARSER_CONTENT;
8705#ifdef DEBUG_PUSH
8706 xmlGenericError(xmlGenericErrorContext,
8707 "PP: entering CONTENT\n");
8708#endif
8709 break;
8710 case XML_PARSER_PI:
8711 xmlGenericError(xmlGenericErrorContext,
8712 "PP: internal error, state == PI\n");
8713 ctxt->instate = XML_PARSER_CONTENT;
8714#ifdef DEBUG_PUSH
8715 xmlGenericError(xmlGenericErrorContext,
8716 "PP: entering CONTENT\n");
8717#endif
8718 break;
8719 case XML_PARSER_ENTITY_DECL:
8720 xmlGenericError(xmlGenericErrorContext,
8721 "PP: internal error, state == ENTITY_DECL\n");
8722 ctxt->instate = XML_PARSER_DTD;
8723#ifdef DEBUG_PUSH
8724 xmlGenericError(xmlGenericErrorContext,
8725 "PP: entering DTD\n");
8726#endif
8727 break;
8728 case XML_PARSER_ENTITY_VALUE:
8729 xmlGenericError(xmlGenericErrorContext,
8730 "PP: internal error, state == ENTITY_VALUE\n");
8731 ctxt->instate = XML_PARSER_CONTENT;
8732#ifdef DEBUG_PUSH
8733 xmlGenericError(xmlGenericErrorContext,
8734 "PP: entering DTD\n");
8735#endif
8736 break;
8737 case XML_PARSER_ATTRIBUTE_VALUE:
8738 xmlGenericError(xmlGenericErrorContext,
8739 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8740 ctxt->instate = XML_PARSER_START_TAG;
8741#ifdef DEBUG_PUSH
8742 xmlGenericError(xmlGenericErrorContext,
8743 "PP: entering START_TAG\n");
8744#endif
8745 break;
8746 case XML_PARSER_SYSTEM_LITERAL:
8747 xmlGenericError(xmlGenericErrorContext,
8748 "PP: internal error, state == SYSTEM_LITERAL\n");
8749 ctxt->instate = XML_PARSER_START_TAG;
8750#ifdef DEBUG_PUSH
8751 xmlGenericError(xmlGenericErrorContext,
8752 "PP: entering START_TAG\n");
8753#endif
8754 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008755 case XML_PARSER_PUBLIC_LITERAL:
8756 xmlGenericError(xmlGenericErrorContext,
8757 "PP: internal error, state == PUBLIC_LITERAL\n");
8758 ctxt->instate = XML_PARSER_START_TAG;
8759#ifdef DEBUG_PUSH
8760 xmlGenericError(xmlGenericErrorContext,
8761 "PP: entering START_TAG\n");
8762#endif
8763 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008764 }
8765 }
8766done:
8767#ifdef DEBUG_PUSH
8768 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8769#endif
8770 return(ret);
8771}
8772
8773/**
Owen Taylor3473f882001-02-23 17:55:21 +00008774 * xmlParseChunk:
8775 * @ctxt: an XML parser context
8776 * @chunk: an char array
8777 * @size: the size in byte of the chunk
8778 * @terminate: last chunk indicator
8779 *
8780 * Parse a Chunk of memory
8781 *
8782 * Returns zero if no error, the xmlParserErrors otherwise.
8783 */
8784int
8785xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8786 int terminate) {
8787 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8788 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8789 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8790 int cur = ctxt->input->cur - ctxt->input->base;
8791
8792 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8793 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8794 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008795 ctxt->input->end =
8796 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008797#ifdef DEBUG_PUSH
8798 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8799#endif
8800
8801 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8802 xmlParseTryOrFinish(ctxt, terminate);
8803 } else if (ctxt->instate != XML_PARSER_EOF) {
8804 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8805 xmlParserInputBufferPtr in = ctxt->input->buf;
8806 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8807 (in->raw != NULL)) {
8808 int nbchars;
8809
8810 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8811 if (nbchars < 0) {
8812 xmlGenericError(xmlGenericErrorContext,
8813 "xmlParseChunk: encoder error\n");
8814 return(XML_ERR_INVALID_ENCODING);
8815 }
8816 }
8817 }
8818 }
8819 xmlParseTryOrFinish(ctxt, terminate);
8820 if (terminate) {
8821 /*
8822 * Check for termination
8823 */
8824 if ((ctxt->instate != XML_PARSER_EOF) &&
8825 (ctxt->instate != XML_PARSER_EPILOG)) {
8826 ctxt->errNo = XML_ERR_DOCUMENT_END;
8827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8828 ctxt->sax->error(ctxt->userData,
8829 "Extra content at the end of the document\n");
8830 ctxt->wellFormed = 0;
8831 ctxt->disableSAX = 1;
8832 }
8833 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008834 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008835 ctxt->sax->endDocument(ctxt->userData);
8836 }
8837 ctxt->instate = XML_PARSER_EOF;
8838 }
8839 return((xmlParserErrors) ctxt->errNo);
8840}
8841
8842/************************************************************************
8843 * *
8844 * I/O front end functions to the parser *
8845 * *
8846 ************************************************************************/
8847
8848/**
8849 * xmlStopParser:
8850 * @ctxt: an XML parser context
8851 *
8852 * Blocks further parser processing
8853 */
8854void
8855xmlStopParser(xmlParserCtxtPtr ctxt) {
8856 ctxt->instate = XML_PARSER_EOF;
8857 if (ctxt->input != NULL)
8858 ctxt->input->cur = BAD_CAST"";
8859}
8860
8861/**
8862 * xmlCreatePushParserCtxt:
8863 * @sax: a SAX handler
8864 * @user_data: The user data returned on SAX callbacks
8865 * @chunk: a pointer to an array of chars
8866 * @size: number of chars in the array
8867 * @filename: an optional file name or URI
8868 *
8869 * Create a parser context for using the XML parser in push mode
8870 * To allow content encoding detection, @size should be >= 4
8871 * The value of @filename is used for fetching external entities
8872 * and error/warning reports.
8873 *
8874 * Returns the new parser context or NULL
8875 */
8876xmlParserCtxtPtr
8877xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8878 const char *chunk, int size, const char *filename) {
8879 xmlParserCtxtPtr ctxt;
8880 xmlParserInputPtr inputStream;
8881 xmlParserInputBufferPtr buf;
8882 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8883
8884 /*
8885 * plug some encoding conversion routines
8886 */
8887 if ((chunk != NULL) && (size >= 4))
8888 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8889
8890 buf = xmlAllocParserInputBuffer(enc);
8891 if (buf == NULL) return(NULL);
8892
8893 ctxt = xmlNewParserCtxt();
8894 if (ctxt == NULL) {
8895 xmlFree(buf);
8896 return(NULL);
8897 }
8898 if (sax != NULL) {
8899 if (ctxt->sax != &xmlDefaultSAXHandler)
8900 xmlFree(ctxt->sax);
8901 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8902 if (ctxt->sax == NULL) {
8903 xmlFree(buf);
8904 xmlFree(ctxt);
8905 return(NULL);
8906 }
8907 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8908 if (user_data != NULL)
8909 ctxt->userData = user_data;
8910 }
8911 if (filename == NULL) {
8912 ctxt->directory = NULL;
8913 } else {
8914 ctxt->directory = xmlParserGetDirectory(filename);
8915 }
8916
8917 inputStream = xmlNewInputStream(ctxt);
8918 if (inputStream == NULL) {
8919 xmlFreeParserCtxt(ctxt);
8920 return(NULL);
8921 }
8922
8923 if (filename == NULL)
8924 inputStream->filename = NULL;
8925 else
8926 inputStream->filename = xmlMemStrdup(filename);
8927 inputStream->buf = buf;
8928 inputStream->base = inputStream->buf->buffer->content;
8929 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008930 inputStream->end =
8931 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008932
8933 inputPush(ctxt, inputStream);
8934
8935 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8936 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008937 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8938 int cur = ctxt->input->cur - ctxt->input->base;
8939
Owen Taylor3473f882001-02-23 17:55:21 +00008940 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008941
8942 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8943 ctxt->input->cur = ctxt->input->base + cur;
8944 ctxt->input->end =
8945 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008946#ifdef DEBUG_PUSH
8947 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8948#endif
8949 }
8950
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008951 if (enc != XML_CHAR_ENCODING_NONE) {
8952 xmlSwitchEncoding(ctxt, enc);
8953 }
8954
Owen Taylor3473f882001-02-23 17:55:21 +00008955 return(ctxt);
8956}
8957
8958/**
8959 * xmlCreateIOParserCtxt:
8960 * @sax: a SAX handler
8961 * @user_data: The user data returned on SAX callbacks
8962 * @ioread: an I/O read function
8963 * @ioclose: an I/O close function
8964 * @ioctx: an I/O handler
8965 * @enc: the charset encoding if known
8966 *
8967 * Create a parser context for using the XML parser with an existing
8968 * I/O stream
8969 *
8970 * Returns the new parser context or NULL
8971 */
8972xmlParserCtxtPtr
8973xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8974 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8975 void *ioctx, xmlCharEncoding enc) {
8976 xmlParserCtxtPtr ctxt;
8977 xmlParserInputPtr inputStream;
8978 xmlParserInputBufferPtr buf;
8979
8980 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8981 if (buf == NULL) return(NULL);
8982
8983 ctxt = xmlNewParserCtxt();
8984 if (ctxt == NULL) {
8985 xmlFree(buf);
8986 return(NULL);
8987 }
8988 if (sax != NULL) {
8989 if (ctxt->sax != &xmlDefaultSAXHandler)
8990 xmlFree(ctxt->sax);
8991 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8992 if (ctxt->sax == NULL) {
8993 xmlFree(buf);
8994 xmlFree(ctxt);
8995 return(NULL);
8996 }
8997 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8998 if (user_data != NULL)
8999 ctxt->userData = user_data;
9000 }
9001
9002 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9003 if (inputStream == NULL) {
9004 xmlFreeParserCtxt(ctxt);
9005 return(NULL);
9006 }
9007 inputPush(ctxt, inputStream);
9008
9009 return(ctxt);
9010}
9011
9012/************************************************************************
9013 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009014 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009015 * *
9016 ************************************************************************/
9017
9018/**
9019 * xmlIOParseDTD:
9020 * @sax: the SAX handler block or NULL
9021 * @input: an Input Buffer
9022 * @enc: the charset encoding if known
9023 *
9024 * Load and parse a DTD
9025 *
9026 * Returns the resulting xmlDtdPtr or NULL in case of error.
9027 * @input will be freed at parsing end.
9028 */
9029
9030xmlDtdPtr
9031xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9032 xmlCharEncoding enc) {
9033 xmlDtdPtr ret = NULL;
9034 xmlParserCtxtPtr ctxt;
9035 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009036 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009037
9038 if (input == NULL)
9039 return(NULL);
9040
9041 ctxt = xmlNewParserCtxt();
9042 if (ctxt == NULL) {
9043 return(NULL);
9044 }
9045
9046 /*
9047 * Set-up the SAX context
9048 */
9049 if (sax != NULL) {
9050 if (ctxt->sax != NULL)
9051 xmlFree(ctxt->sax);
9052 ctxt->sax = sax;
9053 ctxt->userData = NULL;
9054 }
9055
9056 /*
9057 * generate a parser input from the I/O handler
9058 */
9059
9060 pinput = xmlNewIOInputStream(ctxt, input, enc);
9061 if (pinput == NULL) {
9062 if (sax != NULL) ctxt->sax = NULL;
9063 xmlFreeParserCtxt(ctxt);
9064 return(NULL);
9065 }
9066
9067 /*
9068 * plug some encoding conversion routines here.
9069 */
9070 xmlPushInput(ctxt, pinput);
9071
9072 pinput->filename = NULL;
9073 pinput->line = 1;
9074 pinput->col = 1;
9075 pinput->base = ctxt->input->cur;
9076 pinput->cur = ctxt->input->cur;
9077 pinput->free = NULL;
9078
9079 /*
9080 * let's parse that entity knowing it's an external subset.
9081 */
9082 ctxt->inSubset = 2;
9083 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9084 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9085 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009086
9087 if (enc == XML_CHAR_ENCODING_NONE) {
9088 /*
9089 * Get the 4 first bytes and decode the charset
9090 * if enc != XML_CHAR_ENCODING_NONE
9091 * plug some encoding conversion routines.
9092 */
9093 start[0] = RAW;
9094 start[1] = NXT(1);
9095 start[2] = NXT(2);
9096 start[3] = NXT(3);
9097 enc = xmlDetectCharEncoding(start, 4);
9098 if (enc != XML_CHAR_ENCODING_NONE) {
9099 xmlSwitchEncoding(ctxt, enc);
9100 }
9101 }
9102
Owen Taylor3473f882001-02-23 17:55:21 +00009103 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9104
9105 if (ctxt->myDoc != NULL) {
9106 if (ctxt->wellFormed) {
9107 ret = ctxt->myDoc->extSubset;
9108 ctxt->myDoc->extSubset = NULL;
9109 } else {
9110 ret = NULL;
9111 }
9112 xmlFreeDoc(ctxt->myDoc);
9113 ctxt->myDoc = NULL;
9114 }
9115 if (sax != NULL) ctxt->sax = NULL;
9116 xmlFreeParserCtxt(ctxt);
9117
9118 return(ret);
9119}
9120
9121/**
9122 * xmlSAXParseDTD:
9123 * @sax: the SAX handler block
9124 * @ExternalID: a NAME* containing the External ID of the DTD
9125 * @SystemID: a NAME* containing the URL to the DTD
9126 *
9127 * Load and parse an external subset.
9128 *
9129 * Returns the resulting xmlDtdPtr or NULL in case of error.
9130 */
9131
9132xmlDtdPtr
9133xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9134 const xmlChar *SystemID) {
9135 xmlDtdPtr ret = NULL;
9136 xmlParserCtxtPtr ctxt;
9137 xmlParserInputPtr input = NULL;
9138 xmlCharEncoding enc;
9139
9140 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9141
9142 ctxt = xmlNewParserCtxt();
9143 if (ctxt == NULL) {
9144 return(NULL);
9145 }
9146
9147 /*
9148 * Set-up the SAX context
9149 */
9150 if (sax != NULL) {
9151 if (ctxt->sax != NULL)
9152 xmlFree(ctxt->sax);
9153 ctxt->sax = sax;
9154 ctxt->userData = NULL;
9155 }
9156
9157 /*
9158 * Ask the Entity resolver to load the damn thing
9159 */
9160
9161 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9162 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9163 if (input == NULL) {
9164 if (sax != NULL) ctxt->sax = NULL;
9165 xmlFreeParserCtxt(ctxt);
9166 return(NULL);
9167 }
9168
9169 /*
9170 * plug some encoding conversion routines here.
9171 */
9172 xmlPushInput(ctxt, input);
9173 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9174 xmlSwitchEncoding(ctxt, enc);
9175
9176 if (input->filename == NULL)
9177 input->filename = (char *) xmlStrdup(SystemID);
9178 input->line = 1;
9179 input->col = 1;
9180 input->base = ctxt->input->cur;
9181 input->cur = ctxt->input->cur;
9182 input->free = NULL;
9183
9184 /*
9185 * let's parse that entity knowing it's an external subset.
9186 */
9187 ctxt->inSubset = 2;
9188 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9189 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9190 ExternalID, SystemID);
9191 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9192
9193 if (ctxt->myDoc != NULL) {
9194 if (ctxt->wellFormed) {
9195 ret = ctxt->myDoc->extSubset;
9196 ctxt->myDoc->extSubset = NULL;
9197 } else {
9198 ret = NULL;
9199 }
9200 xmlFreeDoc(ctxt->myDoc);
9201 ctxt->myDoc = NULL;
9202 }
9203 if (sax != NULL) ctxt->sax = NULL;
9204 xmlFreeParserCtxt(ctxt);
9205
9206 return(ret);
9207}
9208
9209/**
9210 * xmlParseDTD:
9211 * @ExternalID: a NAME* containing the External ID of the DTD
9212 * @SystemID: a NAME* containing the URL to the DTD
9213 *
9214 * Load and parse an external subset.
9215 *
9216 * Returns the resulting xmlDtdPtr or NULL in case of error.
9217 */
9218
9219xmlDtdPtr
9220xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9221 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9222}
9223
9224/************************************************************************
9225 * *
9226 * Front ends when parsing an Entity *
9227 * *
9228 ************************************************************************/
9229
9230/**
Owen Taylor3473f882001-02-23 17:55:21 +00009231 * xmlParseCtxtExternalEntity:
9232 * @ctx: the existing parsing context
9233 * @URL: the URL for the entity to load
9234 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009235 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009236 *
9237 * Parse an external general entity within an existing parsing context
9238 * An external general parsed entity is well-formed if it matches the
9239 * production labeled extParsedEnt.
9240 *
9241 * [78] extParsedEnt ::= TextDecl? content
9242 *
9243 * Returns 0 if the entity is well formed, -1 in case of args problem and
9244 * the parser error code otherwise
9245 */
9246
9247int
9248xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009249 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009250 xmlParserCtxtPtr ctxt;
9251 xmlDocPtr newDoc;
9252 xmlSAXHandlerPtr oldsax = NULL;
9253 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009254 xmlChar start[4];
9255 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009256
9257 if (ctx->depth > 40) {
9258 return(XML_ERR_ENTITY_LOOP);
9259 }
9260
Daniel Veillardcda96922001-08-21 10:56:31 +00009261 if (lst != NULL)
9262 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009263 if ((URL == NULL) && (ID == NULL))
9264 return(-1);
9265 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9266 return(-1);
9267
9268
9269 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9270 if (ctxt == NULL) return(-1);
9271 ctxt->userData = ctxt;
9272 oldsax = ctxt->sax;
9273 ctxt->sax = ctx->sax;
9274 newDoc = xmlNewDoc(BAD_CAST "1.0");
9275 if (newDoc == NULL) {
9276 xmlFreeParserCtxt(ctxt);
9277 return(-1);
9278 }
9279 if (ctx->myDoc != NULL) {
9280 newDoc->intSubset = ctx->myDoc->intSubset;
9281 newDoc->extSubset = ctx->myDoc->extSubset;
9282 }
9283 if (ctx->myDoc->URL != NULL) {
9284 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9285 }
9286 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9287 if (newDoc->children == NULL) {
9288 ctxt->sax = oldsax;
9289 xmlFreeParserCtxt(ctxt);
9290 newDoc->intSubset = NULL;
9291 newDoc->extSubset = NULL;
9292 xmlFreeDoc(newDoc);
9293 return(-1);
9294 }
9295 nodePush(ctxt, newDoc->children);
9296 if (ctx->myDoc == NULL) {
9297 ctxt->myDoc = newDoc;
9298 } else {
9299 ctxt->myDoc = ctx->myDoc;
9300 newDoc->children->doc = ctx->myDoc;
9301 }
9302
Daniel Veillard87a764e2001-06-20 17:41:10 +00009303 /*
9304 * Get the 4 first bytes and decode the charset
9305 * if enc != XML_CHAR_ENCODING_NONE
9306 * plug some encoding conversion routines.
9307 */
9308 GROW
9309 start[0] = RAW;
9310 start[1] = NXT(1);
9311 start[2] = NXT(2);
9312 start[3] = NXT(3);
9313 enc = xmlDetectCharEncoding(start, 4);
9314 if (enc != XML_CHAR_ENCODING_NONE) {
9315 xmlSwitchEncoding(ctxt, enc);
9316 }
9317
Owen Taylor3473f882001-02-23 17:55:21 +00009318 /*
9319 * Parse a possible text declaration first
9320 */
Owen Taylor3473f882001-02-23 17:55:21 +00009321 if ((RAW == '<') && (NXT(1) == '?') &&
9322 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9323 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9324 xmlParseTextDecl(ctxt);
9325 }
9326
9327 /*
9328 * Doing validity checking on chunk doesn't make sense
9329 */
9330 ctxt->instate = XML_PARSER_CONTENT;
9331 ctxt->validate = ctx->validate;
9332 ctxt->loadsubset = ctx->loadsubset;
9333 ctxt->depth = ctx->depth + 1;
9334 ctxt->replaceEntities = ctx->replaceEntities;
9335 if (ctxt->validate) {
9336 ctxt->vctxt.error = ctx->vctxt.error;
9337 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009338 } else {
9339 ctxt->vctxt.error = NULL;
9340 ctxt->vctxt.warning = NULL;
9341 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009342 ctxt->vctxt.nodeTab = NULL;
9343 ctxt->vctxt.nodeNr = 0;
9344 ctxt->vctxt.nodeMax = 0;
9345 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009346
9347 xmlParseContent(ctxt);
9348
9349 if ((RAW == '<') && (NXT(1) == '/')) {
9350 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9352 ctxt->sax->error(ctxt->userData,
9353 "chunk is not well balanced\n");
9354 ctxt->wellFormed = 0;
9355 ctxt->disableSAX = 1;
9356 } else if (RAW != 0) {
9357 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9359 ctxt->sax->error(ctxt->userData,
9360 "extra content at the end of well balanced chunk\n");
9361 ctxt->wellFormed = 0;
9362 ctxt->disableSAX = 1;
9363 }
9364 if (ctxt->node != newDoc->children) {
9365 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9367 ctxt->sax->error(ctxt->userData,
9368 "chunk is not well balanced\n");
9369 ctxt->wellFormed = 0;
9370 ctxt->disableSAX = 1;
9371 }
9372
9373 if (!ctxt->wellFormed) {
9374 if (ctxt->errNo == 0)
9375 ret = 1;
9376 else
9377 ret = ctxt->errNo;
9378 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009379 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009380 xmlNodePtr cur;
9381
9382 /*
9383 * Return the newly created nodeset after unlinking it from
9384 * they pseudo parent.
9385 */
9386 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009387 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009388 while (cur != NULL) {
9389 cur->parent = NULL;
9390 cur = cur->next;
9391 }
9392 newDoc->children->children = NULL;
9393 }
9394 ret = 0;
9395 }
9396 ctxt->sax = oldsax;
9397 xmlFreeParserCtxt(ctxt);
9398 newDoc->intSubset = NULL;
9399 newDoc->extSubset = NULL;
9400 xmlFreeDoc(newDoc);
9401
9402 return(ret);
9403}
9404
9405/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009406 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009407 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009408 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009409 * @sax: the SAX handler bloc (possibly NULL)
9410 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9411 * @depth: Used for loop detection, use 0
9412 * @URL: the URL for the entity to load
9413 * @ID: the System ID for the entity to load
9414 * @list: the return value for the set of parsed nodes
9415 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009416 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009417 *
9418 * Returns 0 if the entity is well formed, -1 in case of args problem and
9419 * the parser error code otherwise
9420 */
9421
Daniel Veillard257d9102001-05-08 10:41:44 +00009422static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009423xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9424 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009425 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009426 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009427 xmlParserCtxtPtr ctxt;
9428 xmlDocPtr newDoc;
9429 xmlSAXHandlerPtr oldsax = NULL;
9430 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009431 xmlChar start[4];
9432 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009433
9434 if (depth > 40) {
9435 return(XML_ERR_ENTITY_LOOP);
9436 }
9437
9438
9439
9440 if (list != NULL)
9441 *list = NULL;
9442 if ((URL == NULL) && (ID == NULL))
9443 return(-1);
9444 if (doc == NULL) /* @@ relax but check for dereferences */
9445 return(-1);
9446
9447
9448 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9449 if (ctxt == NULL) return(-1);
9450 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009451 if (oldctxt != NULL) {
9452 ctxt->_private = oldctxt->_private;
9453 ctxt->loadsubset = oldctxt->loadsubset;
9454 ctxt->validate = oldctxt->validate;
9455 ctxt->external = oldctxt->external;
9456 } else {
9457 /*
9458 * Doing validity checking on chunk without context
9459 * doesn't make sense
9460 */
9461 ctxt->_private = NULL;
9462 ctxt->validate = 0;
9463 ctxt->external = 2;
9464 ctxt->loadsubset = 0;
9465 }
Owen Taylor3473f882001-02-23 17:55:21 +00009466 if (sax != NULL) {
9467 oldsax = ctxt->sax;
9468 ctxt->sax = sax;
9469 if (user_data != NULL)
9470 ctxt->userData = user_data;
9471 }
9472 newDoc = xmlNewDoc(BAD_CAST "1.0");
9473 if (newDoc == NULL) {
9474 xmlFreeParserCtxt(ctxt);
9475 return(-1);
9476 }
9477 if (doc != NULL) {
9478 newDoc->intSubset = doc->intSubset;
9479 newDoc->extSubset = doc->extSubset;
9480 }
9481 if (doc->URL != NULL) {
9482 newDoc->URL = xmlStrdup(doc->URL);
9483 }
9484 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9485 if (newDoc->children == NULL) {
9486 if (sax != NULL)
9487 ctxt->sax = oldsax;
9488 xmlFreeParserCtxt(ctxt);
9489 newDoc->intSubset = NULL;
9490 newDoc->extSubset = NULL;
9491 xmlFreeDoc(newDoc);
9492 return(-1);
9493 }
9494 nodePush(ctxt, newDoc->children);
9495 if (doc == NULL) {
9496 ctxt->myDoc = newDoc;
9497 } else {
9498 ctxt->myDoc = doc;
9499 newDoc->children->doc = doc;
9500 }
9501
Daniel Veillard87a764e2001-06-20 17:41:10 +00009502 /*
9503 * Get the 4 first bytes and decode the charset
9504 * if enc != XML_CHAR_ENCODING_NONE
9505 * plug some encoding conversion routines.
9506 */
9507 GROW;
9508 start[0] = RAW;
9509 start[1] = NXT(1);
9510 start[2] = NXT(2);
9511 start[3] = NXT(3);
9512 enc = xmlDetectCharEncoding(start, 4);
9513 if (enc != XML_CHAR_ENCODING_NONE) {
9514 xmlSwitchEncoding(ctxt, enc);
9515 }
9516
Owen Taylor3473f882001-02-23 17:55:21 +00009517 /*
9518 * Parse a possible text declaration first
9519 */
Owen Taylor3473f882001-02-23 17:55:21 +00009520 if ((RAW == '<') && (NXT(1) == '?') &&
9521 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9522 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9523 xmlParseTextDecl(ctxt);
9524 }
9525
Owen Taylor3473f882001-02-23 17:55:21 +00009526 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009527 ctxt->depth = depth;
9528
9529 xmlParseContent(ctxt);
9530
Daniel Veillard561b7f82002-03-20 21:55:57 +00009531 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009532 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9534 ctxt->sax->error(ctxt->userData,
9535 "chunk is not well balanced\n");
9536 ctxt->wellFormed = 0;
9537 ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009538 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009539 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9540 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9541 ctxt->sax->error(ctxt->userData,
9542 "extra content at the end of well balanced chunk\n");
9543 ctxt->wellFormed = 0;
9544 ctxt->disableSAX = 1;
9545 }
9546 if (ctxt->node != newDoc->children) {
9547 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9549 ctxt->sax->error(ctxt->userData,
9550 "chunk is not well balanced\n");
9551 ctxt->wellFormed = 0;
9552 ctxt->disableSAX = 1;
9553 }
9554
9555 if (!ctxt->wellFormed) {
9556 if (ctxt->errNo == 0)
9557 ret = 1;
9558 else
9559 ret = ctxt->errNo;
9560 } else {
9561 if (list != NULL) {
9562 xmlNodePtr cur;
9563
9564 /*
9565 * Return the newly created nodeset after unlinking it from
9566 * they pseudo parent.
9567 */
9568 cur = newDoc->children->children;
9569 *list = cur;
9570 while (cur != NULL) {
9571 cur->parent = NULL;
9572 cur = cur->next;
9573 }
9574 newDoc->children->children = NULL;
9575 }
9576 ret = 0;
9577 }
9578 if (sax != NULL)
9579 ctxt->sax = oldsax;
9580 xmlFreeParserCtxt(ctxt);
9581 newDoc->intSubset = NULL;
9582 newDoc->extSubset = NULL;
9583 xmlFreeDoc(newDoc);
9584
9585 return(ret);
9586}
9587
9588/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009589 * xmlParseExternalEntity:
9590 * @doc: the document the chunk pertains to
9591 * @sax: the SAX handler bloc (possibly NULL)
9592 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9593 * @depth: Used for loop detection, use 0
9594 * @URL: the URL for the entity to load
9595 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009596 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009597 *
9598 * Parse an external general entity
9599 * An external general parsed entity is well-formed if it matches the
9600 * production labeled extParsedEnt.
9601 *
9602 * [78] extParsedEnt ::= TextDecl? content
9603 *
9604 * Returns 0 if the entity is well formed, -1 in case of args problem and
9605 * the parser error code otherwise
9606 */
9607
9608int
9609xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009610 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009611 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009612 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009613}
9614
9615/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009616 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009617 * @doc: the document the chunk pertains to
9618 * @sax: the SAX handler bloc (possibly NULL)
9619 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9620 * @depth: Used for loop detection, use 0
9621 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009622 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009623 *
9624 * Parse a well-balanced chunk of an XML document
9625 * called by the parser
9626 * The allowed sequence for the Well Balanced Chunk is the one defined by
9627 * the content production in the XML grammar:
9628 *
9629 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9630 *
9631 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9632 * the parser error code otherwise
9633 */
9634
9635int
9636xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009637 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009638 xmlParserCtxtPtr ctxt;
9639 xmlDocPtr newDoc;
9640 xmlSAXHandlerPtr oldsax = NULL;
9641 int size;
9642 int ret = 0;
9643
9644 if (depth > 40) {
9645 return(XML_ERR_ENTITY_LOOP);
9646 }
9647
9648
Daniel Veillardcda96922001-08-21 10:56:31 +00009649 if (lst != NULL)
9650 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009651 if (string == NULL)
9652 return(-1);
9653
9654 size = xmlStrlen(string);
9655
9656 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9657 if (ctxt == NULL) return(-1);
9658 ctxt->userData = ctxt;
9659 if (sax != NULL) {
9660 oldsax = ctxt->sax;
9661 ctxt->sax = sax;
9662 if (user_data != NULL)
9663 ctxt->userData = user_data;
9664 }
9665 newDoc = xmlNewDoc(BAD_CAST "1.0");
9666 if (newDoc == NULL) {
9667 xmlFreeParserCtxt(ctxt);
9668 return(-1);
9669 }
9670 if (doc != NULL) {
9671 newDoc->intSubset = doc->intSubset;
9672 newDoc->extSubset = doc->extSubset;
9673 }
9674 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9675 if (newDoc->children == NULL) {
9676 if (sax != NULL)
9677 ctxt->sax = oldsax;
9678 xmlFreeParserCtxt(ctxt);
9679 newDoc->intSubset = NULL;
9680 newDoc->extSubset = NULL;
9681 xmlFreeDoc(newDoc);
9682 return(-1);
9683 }
9684 nodePush(ctxt, newDoc->children);
9685 if (doc == NULL) {
9686 ctxt->myDoc = newDoc;
9687 } else {
9688 ctxt->myDoc = doc;
9689 newDoc->children->doc = doc;
9690 }
9691 ctxt->instate = XML_PARSER_CONTENT;
9692 ctxt->depth = depth;
9693
9694 /*
9695 * Doing validity checking on chunk doesn't make sense
9696 */
9697 ctxt->validate = 0;
9698 ctxt->loadsubset = 0;
9699
9700 xmlParseContent(ctxt);
9701
9702 if ((RAW == '<') && (NXT(1) == '/')) {
9703 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9705 ctxt->sax->error(ctxt->userData,
9706 "chunk is not well balanced\n");
9707 ctxt->wellFormed = 0;
9708 ctxt->disableSAX = 1;
9709 } else if (RAW != 0) {
9710 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9712 ctxt->sax->error(ctxt->userData,
9713 "extra content at the end of well balanced chunk\n");
9714 ctxt->wellFormed = 0;
9715 ctxt->disableSAX = 1;
9716 }
9717 if (ctxt->node != newDoc->children) {
9718 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9720 ctxt->sax->error(ctxt->userData,
9721 "chunk is not well balanced\n");
9722 ctxt->wellFormed = 0;
9723 ctxt->disableSAX = 1;
9724 }
9725
9726 if (!ctxt->wellFormed) {
9727 if (ctxt->errNo == 0)
9728 ret = 1;
9729 else
9730 ret = ctxt->errNo;
9731 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009732 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009733 xmlNodePtr cur;
9734
9735 /*
9736 * Return the newly created nodeset after unlinking it from
9737 * they pseudo parent.
9738 */
9739 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009740 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009741 while (cur != NULL) {
9742 cur->parent = NULL;
9743 cur = cur->next;
9744 }
9745 newDoc->children->children = NULL;
9746 }
9747 ret = 0;
9748 }
9749 if (sax != NULL)
9750 ctxt->sax = oldsax;
9751 xmlFreeParserCtxt(ctxt);
9752 newDoc->intSubset = NULL;
9753 newDoc->extSubset = NULL;
9754 xmlFreeDoc(newDoc);
9755
9756 return(ret);
9757}
9758
9759/**
9760 * xmlSAXParseEntity:
9761 * @sax: the SAX handler block
9762 * @filename: the filename
9763 *
9764 * parse an XML external entity out of context and build a tree.
9765 * It use the given SAX function block to handle the parsing callback.
9766 * If sax is NULL, fallback to the default DOM tree building routines.
9767 *
9768 * [78] extParsedEnt ::= TextDecl? content
9769 *
9770 * This correspond to a "Well Balanced" chunk
9771 *
9772 * Returns the resulting document tree
9773 */
9774
9775xmlDocPtr
9776xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9777 xmlDocPtr ret;
9778 xmlParserCtxtPtr ctxt;
9779 char *directory = NULL;
9780
9781 ctxt = xmlCreateFileParserCtxt(filename);
9782 if (ctxt == NULL) {
9783 return(NULL);
9784 }
9785 if (sax != NULL) {
9786 if (ctxt->sax != NULL)
9787 xmlFree(ctxt->sax);
9788 ctxt->sax = sax;
9789 ctxt->userData = NULL;
9790 }
9791
9792 if ((ctxt->directory == NULL) && (directory == NULL))
9793 directory = xmlParserGetDirectory(filename);
9794
9795 xmlParseExtParsedEnt(ctxt);
9796
9797 if (ctxt->wellFormed)
9798 ret = ctxt->myDoc;
9799 else {
9800 ret = NULL;
9801 xmlFreeDoc(ctxt->myDoc);
9802 ctxt->myDoc = NULL;
9803 }
9804 if (sax != NULL)
9805 ctxt->sax = NULL;
9806 xmlFreeParserCtxt(ctxt);
9807
9808 return(ret);
9809}
9810
9811/**
9812 * xmlParseEntity:
9813 * @filename: the filename
9814 *
9815 * parse an XML external entity out of context and build a tree.
9816 *
9817 * [78] extParsedEnt ::= TextDecl? content
9818 *
9819 * This correspond to a "Well Balanced" chunk
9820 *
9821 * Returns the resulting document tree
9822 */
9823
9824xmlDocPtr
9825xmlParseEntity(const char *filename) {
9826 return(xmlSAXParseEntity(NULL, filename));
9827}
9828
9829/**
9830 * xmlCreateEntityParserCtxt:
9831 * @URL: the entity URL
9832 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009833 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009834 *
9835 * Create a parser context for an external entity
9836 * Automatic support for ZLIB/Compress compressed document is provided
9837 * by default if found at compile-time.
9838 *
9839 * Returns the new parser context or NULL
9840 */
9841xmlParserCtxtPtr
9842xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9843 const xmlChar *base) {
9844 xmlParserCtxtPtr ctxt;
9845 xmlParserInputPtr inputStream;
9846 char *directory = NULL;
9847 xmlChar *uri;
9848
9849 ctxt = xmlNewParserCtxt();
9850 if (ctxt == NULL) {
9851 return(NULL);
9852 }
9853
9854 uri = xmlBuildURI(URL, base);
9855
9856 if (uri == NULL) {
9857 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9858 if (inputStream == NULL) {
9859 xmlFreeParserCtxt(ctxt);
9860 return(NULL);
9861 }
9862
9863 inputPush(ctxt, inputStream);
9864
9865 if ((ctxt->directory == NULL) && (directory == NULL))
9866 directory = xmlParserGetDirectory((char *)URL);
9867 if ((ctxt->directory == NULL) && (directory != NULL))
9868 ctxt->directory = directory;
9869 } else {
9870 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9871 if (inputStream == NULL) {
9872 xmlFree(uri);
9873 xmlFreeParserCtxt(ctxt);
9874 return(NULL);
9875 }
9876
9877 inputPush(ctxt, inputStream);
9878
9879 if ((ctxt->directory == NULL) && (directory == NULL))
9880 directory = xmlParserGetDirectory((char *)uri);
9881 if ((ctxt->directory == NULL) && (directory != NULL))
9882 ctxt->directory = directory;
9883 xmlFree(uri);
9884 }
9885
9886 return(ctxt);
9887}
9888
9889/************************************************************************
9890 * *
9891 * Front ends when parsing from a file *
9892 * *
9893 ************************************************************************/
9894
9895/**
9896 * xmlCreateFileParserCtxt:
9897 * @filename: the filename
9898 *
9899 * Create a parser context for a file content.
9900 * Automatic support for ZLIB/Compress compressed document is provided
9901 * by default if found at compile-time.
9902 *
9903 * Returns the new parser context or NULL
9904 */
9905xmlParserCtxtPtr
9906xmlCreateFileParserCtxt(const char *filename)
9907{
9908 xmlParserCtxtPtr ctxt;
9909 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009910 char *directory = NULL;
9911
Owen Taylor3473f882001-02-23 17:55:21 +00009912 ctxt = xmlNewParserCtxt();
9913 if (ctxt == NULL) {
9914 if (xmlDefaultSAXHandler.error != NULL) {
9915 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9916 }
9917 return(NULL);
9918 }
9919
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009920 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009921 if (inputStream == NULL) {
9922 xmlFreeParserCtxt(ctxt);
9923 return(NULL);
9924 }
9925
Owen Taylor3473f882001-02-23 17:55:21 +00009926 inputPush(ctxt, inputStream);
9927 if ((ctxt->directory == NULL) && (directory == NULL))
9928 directory = xmlParserGetDirectory(filename);
9929 if ((ctxt->directory == NULL) && (directory != NULL))
9930 ctxt->directory = directory;
9931
9932 return(ctxt);
9933}
9934
9935/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009936 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009937 * @sax: the SAX handler block
9938 * @filename: the filename
9939 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9940 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009941 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009942 *
9943 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9944 * compressed document is provided by default if found at compile-time.
9945 * It use the given SAX function block to handle the parsing callback.
9946 * If sax is NULL, fallback to the default DOM tree building routines.
9947 *
Daniel Veillarde19fc232002-04-22 16:01:24 +00009948 * User data (void *) is stored within the parser context in the
9949 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +00009950 *
Owen Taylor3473f882001-02-23 17:55:21 +00009951 * Returns the resulting document tree
9952 */
9953
9954xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009955xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9956 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009957 xmlDocPtr ret;
9958 xmlParserCtxtPtr ctxt;
9959 char *directory = NULL;
9960
Daniel Veillard635ef722001-10-29 11:48:19 +00009961 xmlInitParser();
9962
Owen Taylor3473f882001-02-23 17:55:21 +00009963 ctxt = xmlCreateFileParserCtxt(filename);
9964 if (ctxt == NULL) {
9965 return(NULL);
9966 }
9967 if (sax != NULL) {
9968 if (ctxt->sax != NULL)
9969 xmlFree(ctxt->sax);
9970 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009971 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009972 if (data!=NULL) {
9973 ctxt->_private=data;
9974 }
Owen Taylor3473f882001-02-23 17:55:21 +00009975
9976 if ((ctxt->directory == NULL) && (directory == NULL))
9977 directory = xmlParserGetDirectory(filename);
9978 if ((ctxt->directory == NULL) && (directory != NULL))
9979 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9980
9981 xmlParseDocument(ctxt);
9982
9983 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9984 else {
9985 ret = NULL;
9986 xmlFreeDoc(ctxt->myDoc);
9987 ctxt->myDoc = NULL;
9988 }
9989 if (sax != NULL)
9990 ctxt->sax = NULL;
9991 xmlFreeParserCtxt(ctxt);
9992
9993 return(ret);
9994}
9995
9996/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009997 * xmlSAXParseFile:
9998 * @sax: the SAX handler block
9999 * @filename: the filename
10000 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10001 * documents
10002 *
10003 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10004 * compressed document is provided by default if found at compile-time.
10005 * It use the given SAX function block to handle the parsing callback.
10006 * If sax is NULL, fallback to the default DOM tree building routines.
10007 *
10008 * Returns the resulting document tree
10009 */
10010
10011xmlDocPtr
10012xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10013 int recovery) {
10014 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10015}
10016
10017/**
Owen Taylor3473f882001-02-23 17:55:21 +000010018 * xmlRecoverDoc:
10019 * @cur: a pointer to an array of xmlChar
10020 *
10021 * parse an XML in-memory document and build a tree.
10022 * In the case the document is not Well Formed, a tree is built anyway
10023 *
10024 * Returns the resulting document tree
10025 */
10026
10027xmlDocPtr
10028xmlRecoverDoc(xmlChar *cur) {
10029 return(xmlSAXParseDoc(NULL, cur, 1));
10030}
10031
10032/**
10033 * xmlParseFile:
10034 * @filename: the filename
10035 *
10036 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10037 * compressed document is provided by default if found at compile-time.
10038 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010039 * Returns the resulting document tree if the file was wellformed,
10040 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010041 */
10042
10043xmlDocPtr
10044xmlParseFile(const char *filename) {
10045 return(xmlSAXParseFile(NULL, filename, 0));
10046}
10047
10048/**
10049 * xmlRecoverFile:
10050 * @filename: the filename
10051 *
10052 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10053 * compressed document is provided by default if found at compile-time.
10054 * In the case the document is not Well Formed, a tree is built anyway
10055 *
10056 * Returns the resulting document tree
10057 */
10058
10059xmlDocPtr
10060xmlRecoverFile(const char *filename) {
10061 return(xmlSAXParseFile(NULL, filename, 1));
10062}
10063
10064
10065/**
10066 * xmlSetupParserForBuffer:
10067 * @ctxt: an XML parser context
10068 * @buffer: a xmlChar * buffer
10069 * @filename: a file name
10070 *
10071 * Setup the parser context to parse a new buffer; Clears any prior
10072 * contents from the parser context. The buffer parameter must not be
10073 * NULL, but the filename parameter can be
10074 */
10075void
10076xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10077 const char* filename)
10078{
10079 xmlParserInputPtr input;
10080
10081 input = xmlNewInputStream(ctxt);
10082 if (input == NULL) {
10083 perror("malloc");
10084 xmlFree(ctxt);
10085 return;
10086 }
10087
10088 xmlClearParserCtxt(ctxt);
10089 if (filename != NULL)
10090 input->filename = xmlMemStrdup(filename);
10091 input->base = buffer;
10092 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010093 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010094 inputPush(ctxt, input);
10095}
10096
10097/**
10098 * xmlSAXUserParseFile:
10099 * @sax: a SAX handler
10100 * @user_data: The user data returned on SAX callbacks
10101 * @filename: a file name
10102 *
10103 * parse an XML file and call the given SAX handler routines.
10104 * Automatic support for ZLIB/Compress compressed document is provided
10105 *
10106 * Returns 0 in case of success or a error number otherwise
10107 */
10108int
10109xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10110 const char *filename) {
10111 int ret = 0;
10112 xmlParserCtxtPtr ctxt;
10113
10114 ctxt = xmlCreateFileParserCtxt(filename);
10115 if (ctxt == NULL) return -1;
10116 if (ctxt->sax != &xmlDefaultSAXHandler)
10117 xmlFree(ctxt->sax);
10118 ctxt->sax = sax;
10119 if (user_data != NULL)
10120 ctxt->userData = user_data;
10121
10122 xmlParseDocument(ctxt);
10123
10124 if (ctxt->wellFormed)
10125 ret = 0;
10126 else {
10127 if (ctxt->errNo != 0)
10128 ret = ctxt->errNo;
10129 else
10130 ret = -1;
10131 }
10132 if (sax != NULL)
10133 ctxt->sax = NULL;
10134 xmlFreeParserCtxt(ctxt);
10135
10136 return ret;
10137}
10138
10139/************************************************************************
10140 * *
10141 * Front ends when parsing from memory *
10142 * *
10143 ************************************************************************/
10144
10145/**
10146 * xmlCreateMemoryParserCtxt:
10147 * @buffer: a pointer to a char array
10148 * @size: the size of the array
10149 *
10150 * Create a parser context for an XML in-memory document.
10151 *
10152 * Returns the new parser context or NULL
10153 */
10154xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010155xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010156 xmlParserCtxtPtr ctxt;
10157 xmlParserInputPtr input;
10158 xmlParserInputBufferPtr buf;
10159
10160 if (buffer == NULL)
10161 return(NULL);
10162 if (size <= 0)
10163 return(NULL);
10164
10165 ctxt = xmlNewParserCtxt();
10166 if (ctxt == NULL)
10167 return(NULL);
10168
10169 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10170 if (buf == NULL) return(NULL);
10171
10172 input = xmlNewInputStream(ctxt);
10173 if (input == NULL) {
10174 xmlFreeParserCtxt(ctxt);
10175 return(NULL);
10176 }
10177
10178 input->filename = NULL;
10179 input->buf = buf;
10180 input->base = input->buf->buffer->content;
10181 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010182 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010183
10184 inputPush(ctxt, input);
10185 return(ctxt);
10186}
10187
10188/**
10189 * xmlSAXParseMemory:
10190 * @sax: the SAX handler block
10191 * @buffer: an pointer to a char array
10192 * @size: the size of the array
10193 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10194 * documents
10195 *
10196 * parse an XML in-memory block and use the given SAX function block
10197 * to handle the parsing callback. If sax is NULL, fallback to the default
10198 * DOM tree building routines.
10199 *
10200 * Returns the resulting document tree
10201 */
10202xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010203xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10204 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010205 xmlDocPtr ret;
10206 xmlParserCtxtPtr ctxt;
10207
10208 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10209 if (ctxt == NULL) return(NULL);
10210 if (sax != NULL) {
10211 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010212 }
10213
10214 xmlParseDocument(ctxt);
10215
10216 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10217 else {
10218 ret = NULL;
10219 xmlFreeDoc(ctxt->myDoc);
10220 ctxt->myDoc = NULL;
10221 }
10222 if (sax != NULL)
10223 ctxt->sax = NULL;
10224 xmlFreeParserCtxt(ctxt);
10225
10226 return(ret);
10227}
10228
10229/**
10230 * xmlParseMemory:
10231 * @buffer: an pointer to a char array
10232 * @size: the size of the array
10233 *
10234 * parse an XML in-memory block and build a tree.
10235 *
10236 * Returns the resulting document tree
10237 */
10238
Daniel Veillard50822cb2001-07-26 20:05:51 +000010239xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010240 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10241}
10242
10243/**
10244 * xmlRecoverMemory:
10245 * @buffer: an pointer to a char array
10246 * @size: the size of the array
10247 *
10248 * parse an XML in-memory block and build a tree.
10249 * In the case the document is not Well Formed, a tree is built anyway
10250 *
10251 * Returns the resulting document tree
10252 */
10253
Daniel Veillard50822cb2001-07-26 20:05:51 +000010254xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010255 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10256}
10257
10258/**
10259 * xmlSAXUserParseMemory:
10260 * @sax: a SAX handler
10261 * @user_data: The user data returned on SAX callbacks
10262 * @buffer: an in-memory XML document input
10263 * @size: the length of the XML document in bytes
10264 *
10265 * A better SAX parsing routine.
10266 * parse an XML in-memory buffer and call the given SAX handler routines.
10267 *
10268 * Returns 0 in case of success or a error number otherwise
10269 */
10270int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010271 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010272 int ret = 0;
10273 xmlParserCtxtPtr ctxt;
10274 xmlSAXHandlerPtr oldsax = NULL;
10275
10276 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10277 if (ctxt == NULL) return -1;
10278 if (sax != NULL) {
10279 oldsax = ctxt->sax;
10280 ctxt->sax = sax;
10281 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010282 if (user_data != NULL)
10283 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010284
10285 xmlParseDocument(ctxt);
10286
10287 if (ctxt->wellFormed)
10288 ret = 0;
10289 else {
10290 if (ctxt->errNo != 0)
10291 ret = ctxt->errNo;
10292 else
10293 ret = -1;
10294 }
10295 if (sax != NULL) {
10296 ctxt->sax = oldsax;
10297 }
10298 xmlFreeParserCtxt(ctxt);
10299
10300 return ret;
10301}
10302
10303/**
10304 * xmlCreateDocParserCtxt:
10305 * @cur: a pointer to an array of xmlChar
10306 *
10307 * Creates a parser context for an XML in-memory document.
10308 *
10309 * Returns the new parser context or NULL
10310 */
10311xmlParserCtxtPtr
10312xmlCreateDocParserCtxt(xmlChar *cur) {
10313 int len;
10314
10315 if (cur == NULL)
10316 return(NULL);
10317 len = xmlStrlen(cur);
10318 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10319}
10320
10321/**
10322 * xmlSAXParseDoc:
10323 * @sax: the SAX handler block
10324 * @cur: a pointer to an array of xmlChar
10325 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10326 * documents
10327 *
10328 * parse an XML in-memory document and build a tree.
10329 * It use the given SAX function block to handle the parsing callback.
10330 * If sax is NULL, fallback to the default DOM tree building routines.
10331 *
10332 * Returns the resulting document tree
10333 */
10334
10335xmlDocPtr
10336xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10337 xmlDocPtr ret;
10338 xmlParserCtxtPtr ctxt;
10339
10340 if (cur == NULL) return(NULL);
10341
10342
10343 ctxt = xmlCreateDocParserCtxt(cur);
10344 if (ctxt == NULL) return(NULL);
10345 if (sax != NULL) {
10346 ctxt->sax = sax;
10347 ctxt->userData = NULL;
10348 }
10349
10350 xmlParseDocument(ctxt);
10351 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10352 else {
10353 ret = NULL;
10354 xmlFreeDoc(ctxt->myDoc);
10355 ctxt->myDoc = NULL;
10356 }
10357 if (sax != NULL)
10358 ctxt->sax = NULL;
10359 xmlFreeParserCtxt(ctxt);
10360
10361 return(ret);
10362}
10363
10364/**
10365 * xmlParseDoc:
10366 * @cur: a pointer to an array of xmlChar
10367 *
10368 * parse an XML in-memory document and build a tree.
10369 *
10370 * Returns the resulting document tree
10371 */
10372
10373xmlDocPtr
10374xmlParseDoc(xmlChar *cur) {
10375 return(xmlSAXParseDoc(NULL, cur, 0));
10376}
10377
Daniel Veillard8107a222002-01-13 14:10:10 +000010378/************************************************************************
10379 * *
10380 * Specific function to keep track of entities references *
10381 * and used by the XSLT debugger *
10382 * *
10383 ************************************************************************/
10384
10385static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10386
10387/**
10388 * xmlAddEntityReference:
10389 * @ent : A valid entity
10390 * @firstNode : A valid first node for children of entity
10391 * @lastNode : A valid last node of children entity
10392 *
10393 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10394 */
10395static void
10396xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10397 xmlNodePtr lastNode)
10398{
10399 if (xmlEntityRefFunc != NULL) {
10400 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10401 }
10402}
10403
10404
10405/**
10406 * xmlSetEntityReferenceFunc:
10407 * @func : A valid function
10408 *
10409 * Set the function to call call back when a xml reference has been made
10410 */
10411void
10412xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10413{
10414 xmlEntityRefFunc = func;
10415}
Owen Taylor3473f882001-02-23 17:55:21 +000010416
10417/************************************************************************
10418 * *
10419 * Miscellaneous *
10420 * *
10421 ************************************************************************/
10422
10423#ifdef LIBXML_XPATH_ENABLED
10424#include <libxml/xpath.h>
10425#endif
10426
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010427extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010428static int xmlParserInitialized = 0;
10429
10430/**
10431 * xmlInitParser:
10432 *
10433 * Initialization function for the XML parser.
10434 * This is not reentrant. Call once before processing in case of
10435 * use in multithreaded programs.
10436 */
10437
10438void
10439xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010440 if (xmlParserInitialized != 0)
10441 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010442
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010443 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10444 (xmlGenericError == NULL))
10445 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010446 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010447 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010448 xmlInitCharEncodingHandlers();
10449 xmlInitializePredefinedEntities();
10450 xmlDefaultSAXHandlerInit();
10451 xmlRegisterDefaultInputCallbacks();
10452 xmlRegisterDefaultOutputCallbacks();
10453#ifdef LIBXML_HTML_ENABLED
10454 htmlInitAutoClose();
10455 htmlDefaultSAXHandlerInit();
10456#endif
10457#ifdef LIBXML_XPATH_ENABLED
10458 xmlXPathInit();
10459#endif
10460 xmlParserInitialized = 1;
10461}
10462
10463/**
10464 * xmlCleanupParser:
10465 *
10466 * Cleanup function for the XML parser. It tries to reclaim all
10467 * parsing related global memory allocated for the parser processing.
10468 * It doesn't deallocate any document related memory. Calling this
10469 * function should not prevent reusing the parser.
10470 */
10471
10472void
10473xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010474 xmlCleanupCharEncodingHandlers();
10475 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010476#ifdef LIBXML_CATALOG_ENABLED
10477 xmlCatalogCleanup();
10478#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010479 xmlCleanupThreads();
10480 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010481}