blob: 4601997ae2e878e2fc999c457e51f88790f7f78b [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Owen Taylor3473f882001-02-23 17:55:21 +0000108/************************************************************************
109 * *
110 * Parser stacks related functions and macros *
111 * *
112 ************************************************************************/
113
114xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
115 const xmlChar ** str);
116
117/*
118 * Generic function for accessing stacks in the Parser Context
119 */
120
121#define PUSH_AND_POP(scope, type, name) \
122scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
123 if (ctxt->name##Nr >= ctxt->name##Max) { \
124 ctxt->name##Max *= 2; \
125 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
126 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
127 if (ctxt->name##Tab == NULL) { \
128 xmlGenericError(xmlGenericErrorContext, \
129 "realloc failed !\n"); \
130 return(0); \
131 } \
132 } \
133 ctxt->name##Tab[ctxt->name##Nr] = value; \
134 ctxt->name = value; \
135 return(ctxt->name##Nr++); \
136} \
137scope type name##Pop(xmlParserCtxtPtr ctxt) { \
138 type ret; \
139 if (ctxt->name##Nr <= 0) return(0); \
140 ctxt->name##Nr--; \
141 if (ctxt->name##Nr > 0) \
142 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
143 else \
144 ctxt->name = NULL; \
145 ret = ctxt->name##Tab[ctxt->name##Nr]; \
146 ctxt->name##Tab[ctxt->name##Nr] = 0; \
147 return(ret); \
148} \
149
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000150/**
151 * inputPop:
152 * @ctxt: an XML parser context
153 *
154 * Pops the top parser input from the input stack
155 *
156 * Returns the input just removed
157 */
158/**
159 * inputPush:
160 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 *
163 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000164 *
165 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000166 */
167/**
168 * namePop:
169 * @ctxt: an XML parser context
170 *
171 * Pops the top element name from the name stack
172 *
173 * Returns the name just removed
174 */
175/**
176 * namePush:
177 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 *
180 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000181 *
182 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 */
184/**
185 * nodePop:
186 * @ctxt: an XML parser context
187 *
188 * Pops the top element node from the node stack
189 *
190 * Returns the node just removed
191 */
192/**
193 * nodePush:
194 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000196 *
197 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000198 *
199 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000200 */
Owen Taylor3473f882001-02-23 17:55:21 +0000201/*
202 * Those macros actually generate the functions
203 */
204PUSH_AND_POP(extern, xmlParserInputPtr, input)
205PUSH_AND_POP(extern, xmlNodePtr, node)
206PUSH_AND_POP(extern, xmlChar*, name)
207
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000208static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000209 if (ctxt->spaceNr >= ctxt->spaceMax) {
210 ctxt->spaceMax *= 2;
211 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
212 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
213 if (ctxt->spaceTab == NULL) {
214 xmlGenericError(xmlGenericErrorContext,
215 "realloc failed !\n");
216 return(0);
217 }
218 }
219 ctxt->spaceTab[ctxt->spaceNr] = val;
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
221 return(ctxt->spaceNr++);
222}
223
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000224static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000225 int ret;
226 if (ctxt->spaceNr <= 0) return(0);
227 ctxt->spaceNr--;
228 if (ctxt->spaceNr > 0)
229 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
230 else
231 ctxt->space = NULL;
232 ret = ctxt->spaceTab[ctxt->spaceNr];
233 ctxt->spaceTab[ctxt->spaceNr] = -1;
234 return(ret);
235}
236
237/*
238 * Macros for accessing the content. Those should be used only by the parser,
239 * and not exported.
240 *
241 * Dirty macros, i.e. one often need to make assumption on the context to
242 * use them
243 *
244 * CUR_PTR return the current pointer to the xmlChar to be parsed.
245 * To be used with extreme caution since operations consuming
246 * characters may move the input buffer to a different location !
247 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
248 * This should be used internally by the parser
249 * only to compare to ASCII values otherwise it would break when
250 * running with UTF-8 encoding.
251 * RAW same as CUR but in the input buffer, bypass any token
252 * extraction that may have been done
253 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
254 * to compare on ASCII based substring.
255 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
256 * strings within the parser.
257 *
258 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
259 *
260 * NEXT Skip to the next character, this does the proper decoding
261 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000262 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000263 * CUR_CHAR(l) returns the current unicode character (int), set l
264 * to the number of xmlChars used for the encoding [0-5].
265 * CUR_SCHAR same but operate on a string instead of the context
266 * COPY_BUF copy the current unicode char to the target buffer, increment
267 * the index
268 * GROW, SHRINK handling of input buffers
269 */
270
Daniel Veillard561b7f82002-03-20 21:55:57 +0000271#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
272#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
Owen Taylor3473f882001-02-23 17:55:21 +0000273#define NXT(val) ctxt->input->cur[(val)]
274#define CUR_PTR ctxt->input->cur
275
276#define SKIP(val) do { \
277 ctxt->nbChars += (val),ctxt->input->cur += (val); \
278 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000279 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
281 xmlPopInput(ctxt); \
282 } while (0)
283
Daniel Veillard48b2f892001-02-25 16:11:03 +0000284#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000285 xmlParserInputShrink(ctxt->input); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000286 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000287 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
288 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000289 }
Owen Taylor3473f882001-02-23 17:55:21 +0000290
Daniel Veillard48b2f892001-02-25 16:11:03 +0000291#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000292 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000293 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000294 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
295 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000296 }
Owen Taylor3473f882001-02-23 17:55:21 +0000297
298#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
299
300#define NEXT xmlNextChar(ctxt)
301
Daniel Veillard21a0f912001-02-25 19:54:14 +0000302#define NEXT1 { \
303 ctxt->input->cur++; \
304 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000305 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000306 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
307 }
308
Owen Taylor3473f882001-02-23 17:55:21 +0000309#define NEXTL(l) do { \
310 if (*(ctxt->input->cur) == '\n') { \
311 ctxt->input->line++; ctxt->input->col = 1; \
312 } else ctxt->input->col++; \
313 ctxt->token = 0; ctxt->input->cur += l; \
314 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000315 } while (0)
316
317#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
318#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
319
320#define COPY_BUF(l,b,i,v) \
321 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000322 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000323
324/**
325 * xmlSkipBlankChars:
326 * @ctxt: the XML parser context
327 *
328 * skip all blanks character found at that point in the input streams.
329 * It pops up finished entities in the process if allowable at that point.
330 *
331 * Returns the number of space chars skipped
332 */
333
334int
335xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000336 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000337
Daniel Veillard02141ea2001-04-30 11:46:40 +0000338 if (ctxt->token != 0) {
339 if (!IS_BLANK(ctxt->token))
340 return(0);
341 ctxt->token = 0;
342 res++;
343 }
Owen Taylor3473f882001-02-23 17:55:21 +0000344 /*
345 * It's Okay to use CUR/NEXT here since all the blanks are on
346 * the ASCII range.
347 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000348 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
349 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000350 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000351 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000352 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000353 cur = ctxt->input->cur;
354 while (IS_BLANK(*cur)) {
355 if (*cur == '\n') {
356 ctxt->input->line++; ctxt->input->col = 1;
357 }
358 cur++;
359 res++;
360 if (*cur == 0) {
361 ctxt->input->cur = cur;
362 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
363 cur = ctxt->input->cur;
364 }
365 }
366 ctxt->input->cur = cur;
367 } else {
368 int cur;
369 do {
370 cur = CUR;
371 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
372 NEXT;
373 cur = CUR;
374 res++;
375 }
376 while ((cur == 0) && (ctxt->inputNr > 1) &&
377 (ctxt->instate != XML_PARSER_COMMENT)) {
378 xmlPopInput(ctxt);
379 cur = CUR;
380 }
381 /*
382 * Need to handle support of entities branching here
383 */
384 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
385 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
386 }
Owen Taylor3473f882001-02-23 17:55:21 +0000387 return(res);
388}
389
390/************************************************************************
391 * *
392 * Commodity functions to handle entities *
393 * *
394 ************************************************************************/
395
396/**
397 * xmlPopInput:
398 * @ctxt: an XML parser context
399 *
400 * xmlPopInput: the current input pointed by ctxt->input came to an end
401 * pop it and return the next char.
402 *
403 * Returns the current xmlChar in the parser context
404 */
405xmlChar
406xmlPopInput(xmlParserCtxtPtr ctxt) {
407 if (ctxt->inputNr == 1) return(0); /* End of main Input */
408 if (xmlParserDebugEntities)
409 xmlGenericError(xmlGenericErrorContext,
410 "Popping input %d\n", ctxt->inputNr);
411 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000412 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000413 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
414 return(xmlPopInput(ctxt));
415 return(CUR);
416}
417
418/**
419 * xmlPushInput:
420 * @ctxt: an XML parser context
421 * @input: an XML parser input fragment (entity, XML fragment ...).
422 *
423 * xmlPushInput: switch to a new input stream which is stacked on top
424 * of the previous one(s).
425 */
426void
427xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
428 if (input == NULL) return;
429
430 if (xmlParserDebugEntities) {
431 if ((ctxt->input != NULL) && (ctxt->input->filename))
432 xmlGenericError(xmlGenericErrorContext,
433 "%s(%d): ", ctxt->input->filename,
434 ctxt->input->line);
435 xmlGenericError(xmlGenericErrorContext,
436 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
437 }
438 inputPush(ctxt, input);
439 GROW;
440}
441
442/**
443 * xmlParseCharRef:
444 * @ctxt: an XML parser context
445 *
446 * parse Reference declarations
447 *
448 * [66] CharRef ::= '&#' [0-9]+ ';' |
449 * '&#x' [0-9a-fA-F]+ ';'
450 *
451 * [ WFC: Legal Character ]
452 * Characters referred to using character references must match the
453 * production for Char.
454 *
455 * Returns the value parsed (as an int), 0 in case of error
456 */
457int
458xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000459 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000460 int count = 0;
461
462 if (ctxt->token != 0) {
463 val = ctxt->token;
464 ctxt->token = 0;
465 return(val);
466 }
467 /*
468 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
469 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000470 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000471 (NXT(2) == 'x')) {
472 SKIP(3);
473 GROW;
474 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000475 if (count++ > 20) {
476 count = 0;
477 GROW;
478 }
479 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000480 val = val * 16 + (CUR - '0');
481 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
482 val = val * 16 + (CUR - 'a') + 10;
483 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
484 val = val * 16 + (CUR - 'A') + 10;
485 else {
486 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
488 ctxt->sax->error(ctxt->userData,
489 "xmlParseCharRef: invalid hexadecimal value\n");
490 ctxt->wellFormed = 0;
491 ctxt->disableSAX = 1;
492 val = 0;
493 break;
494 }
495 NEXT;
496 count++;
497 }
498 if (RAW == ';') {
499 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
500 ctxt->nbChars ++;
501 ctxt->input->cur++;
502 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000503 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000504 SKIP(2);
505 GROW;
506 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000507 if (count++ > 20) {
508 count = 0;
509 GROW;
510 }
511 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000512 val = val * 10 + (CUR - '0');
513 else {
514 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
516 ctxt->sax->error(ctxt->userData,
517 "xmlParseCharRef: invalid decimal value\n");
518 ctxt->wellFormed = 0;
519 ctxt->disableSAX = 1;
520 val = 0;
521 break;
522 }
523 NEXT;
524 count++;
525 }
526 if (RAW == ';') {
527 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
528 ctxt->nbChars ++;
529 ctxt->input->cur++;
530 }
531 } else {
532 ctxt->errNo = XML_ERR_INVALID_CHARREF;
533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
534 ctxt->sax->error(ctxt->userData,
535 "xmlParseCharRef: invalid value\n");
536 ctxt->wellFormed = 0;
537 ctxt->disableSAX = 1;
538 }
539
540 /*
541 * [ WFC: Legal Character ]
542 * Characters referred to using character references must match the
543 * production for Char.
544 */
545 if (IS_CHAR(val)) {
546 return(val);
547 } else {
548 ctxt->errNo = XML_ERR_INVALID_CHAR;
549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000550 ctxt->sax->error(ctxt->userData,
551 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000552 val);
553 ctxt->wellFormed = 0;
554 ctxt->disableSAX = 1;
555 }
556 return(0);
557}
558
559/**
560 * xmlParseStringCharRef:
561 * @ctxt: an XML parser context
562 * @str: a pointer to an index in the string
563 *
564 * parse Reference declarations, variant parsing from a string rather
565 * than an an input flow.
566 *
567 * [66] CharRef ::= '&#' [0-9]+ ';' |
568 * '&#x' [0-9a-fA-F]+ ';'
569 *
570 * [ WFC: Legal Character ]
571 * Characters referred to using character references must match the
572 * production for Char.
573 *
574 * Returns the value parsed (as an int), 0 in case of error, str will be
575 * updated to the current value of the index
576 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000577static int
Owen Taylor3473f882001-02-23 17:55:21 +0000578xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
579 const xmlChar *ptr;
580 xmlChar cur;
581 int val = 0;
582
583 if ((str == NULL) || (*str == NULL)) return(0);
584 ptr = *str;
585 cur = *ptr;
586 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
587 ptr += 3;
588 cur = *ptr;
589 while (cur != ';') { /* Non input consuming loop */
590 if ((cur >= '0') && (cur <= '9'))
591 val = val * 16 + (cur - '0');
592 else if ((cur >= 'a') && (cur <= 'f'))
593 val = val * 16 + (cur - 'a') + 10;
594 else if ((cur >= 'A') && (cur <= 'F'))
595 val = val * 16 + (cur - 'A') + 10;
596 else {
597 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
599 ctxt->sax->error(ctxt->userData,
600 "xmlParseStringCharRef: invalid hexadecimal value\n");
601 ctxt->wellFormed = 0;
602 ctxt->disableSAX = 1;
603 val = 0;
604 break;
605 }
606 ptr++;
607 cur = *ptr;
608 }
609 if (cur == ';')
610 ptr++;
611 } else if ((cur == '&') && (ptr[1] == '#')){
612 ptr += 2;
613 cur = *ptr;
614 while (cur != ';') { /* Non input consuming loops */
615 if ((cur >= '0') && (cur <= '9'))
616 val = val * 10 + (cur - '0');
617 else {
618 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
620 ctxt->sax->error(ctxt->userData,
621 "xmlParseStringCharRef: invalid decimal value\n");
622 ctxt->wellFormed = 0;
623 ctxt->disableSAX = 1;
624 val = 0;
625 break;
626 }
627 ptr++;
628 cur = *ptr;
629 }
630 if (cur == ';')
631 ptr++;
632 } else {
633 ctxt->errNo = XML_ERR_INVALID_CHARREF;
634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
635 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000636 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000637 ctxt->wellFormed = 0;
638 ctxt->disableSAX = 1;
639 return(0);
640 }
641 *str = ptr;
642
643 /*
644 * [ WFC: Legal Character ]
645 * Characters referred to using character references must match the
646 * production for Char.
647 */
648 if (IS_CHAR(val)) {
649 return(val);
650 } else {
651 ctxt->errNo = XML_ERR_INVALID_CHAR;
652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
653 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000654 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000655 ctxt->wellFormed = 0;
656 ctxt->disableSAX = 1;
657 }
658 return(0);
659}
660
661/**
662 * xmlParserHandlePEReference:
663 * @ctxt: the parser context
664 *
665 * [69] PEReference ::= '%' Name ';'
666 *
667 * [ WFC: No Recursion ]
668 * A parsed entity must not contain a recursive
669 * reference to itself, either directly or indirectly.
670 *
671 * [ WFC: Entity Declared ]
672 * In a document without any DTD, a document with only an internal DTD
673 * subset which contains no parameter entity references, or a document
674 * with "standalone='yes'", ... ... The declaration of a parameter
675 * entity must precede any reference to it...
676 *
677 * [ VC: Entity Declared ]
678 * In a document with an external subset or external parameter entities
679 * with "standalone='no'", ... ... The declaration of a parameter entity
680 * must precede any reference to it...
681 *
682 * [ WFC: In DTD ]
683 * Parameter-entity references may only appear in the DTD.
684 * NOTE: misleading but this is handled.
685 *
686 * A PEReference may have been detected in the current input stream
687 * the handling is done accordingly to
688 * http://www.w3.org/TR/REC-xml#entproc
689 * i.e.
690 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000691 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000692 */
693void
694xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
695 xmlChar *name;
696 xmlEntityPtr entity = NULL;
697 xmlParserInputPtr input;
698
699 if (ctxt->token != 0) {
700 return;
701 }
702 if (RAW != '%') return;
703 switch(ctxt->instate) {
704 case XML_PARSER_CDATA_SECTION:
705 return;
706 case XML_PARSER_COMMENT:
707 return;
708 case XML_PARSER_START_TAG:
709 return;
710 case XML_PARSER_END_TAG:
711 return;
712 case XML_PARSER_EOF:
713 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
715 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
716 ctxt->wellFormed = 0;
717 ctxt->disableSAX = 1;
718 return;
719 case XML_PARSER_PROLOG:
720 case XML_PARSER_START:
721 case XML_PARSER_MISC:
722 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
724 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
725 ctxt->wellFormed = 0;
726 ctxt->disableSAX = 1;
727 return;
728 case XML_PARSER_ENTITY_DECL:
729 case XML_PARSER_CONTENT:
730 case XML_PARSER_ATTRIBUTE_VALUE:
731 case XML_PARSER_PI:
732 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000733 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000734 /* we just ignore it there */
735 return;
736 case XML_PARSER_EPILOG:
737 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
739 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
740 ctxt->wellFormed = 0;
741 ctxt->disableSAX = 1;
742 return;
743 case XML_PARSER_ENTITY_VALUE:
744 /*
745 * NOTE: in the case of entity values, we don't do the
746 * substitution here since we need the literal
747 * entity value to be able to save the internal
748 * subset of the document.
749 * This will be handled by xmlStringDecodeEntities
750 */
751 return;
752 case XML_PARSER_DTD:
753 /*
754 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
755 * In the internal DTD subset, parameter-entity references
756 * can occur only where markup declarations can occur, not
757 * within markup declarations.
758 * In that case this is handled in xmlParseMarkupDecl
759 */
760 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
761 return;
762 break;
763 case XML_PARSER_IGNORE:
764 return;
765 }
766
767 NEXT;
768 name = xmlParseName(ctxt);
769 if (xmlParserDebugEntities)
770 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000771 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000772 if (name == NULL) {
773 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000775 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000776 ctxt->wellFormed = 0;
777 ctxt->disableSAX = 1;
778 } else {
779 if (RAW == ';') {
780 NEXT;
781 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
782 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
783 if (entity == NULL) {
784
785 /*
786 * [ WFC: Entity Declared ]
787 * In a document without any DTD, a document with only an
788 * internal DTD subset which contains no parameter entity
789 * references, or a document with "standalone='yes'", ...
790 * ... The declaration of a parameter entity must precede
791 * any reference to it...
792 */
793 if ((ctxt->standalone == 1) ||
794 ((ctxt->hasExternalSubset == 0) &&
795 (ctxt->hasPErefs == 0))) {
796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
797 ctxt->sax->error(ctxt->userData,
798 "PEReference: %%%s; not found\n", name);
799 ctxt->wellFormed = 0;
800 ctxt->disableSAX = 1;
801 } else {
802 /*
803 * [ VC: Entity Declared ]
804 * In a document with an external subset or external
805 * parameter entities with "standalone='no'", ...
806 * ... The declaration of a parameter entity must precede
807 * any reference to it...
808 */
809 if ((!ctxt->disableSAX) &&
810 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
811 ctxt->vctxt.error(ctxt->vctxt.userData,
812 "PEReference: %%%s; not found\n", name);
813 } else if ((!ctxt->disableSAX) &&
814 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
815 ctxt->sax->warning(ctxt->userData,
816 "PEReference: %%%s; not found\n", name);
817 ctxt->valid = 0;
818 }
819 } else {
820 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
821 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000822 xmlChar start[4];
823 xmlCharEncoding enc;
824
Owen Taylor3473f882001-02-23 17:55:21 +0000825 /*
826 * handle the extra spaces added before and after
827 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000828 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000829 */
830 input = xmlNewEntityInputStream(ctxt, entity);
831 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000832
833 /*
834 * Get the 4 first bytes and decode the charset
835 * if enc != XML_CHAR_ENCODING_NONE
836 * plug some encoding conversion routines.
837 */
838 GROW
Daniel Veillard561b7f82002-03-20 21:55:57 +0000839 start[0] = RAW;
840 start[1] = NXT(1);
841 start[2] = NXT(2);
842 start[3] = NXT(3);
843 enc = xmlDetectCharEncoding(start, 4);
844 if (enc != XML_CHAR_ENCODING_NONE) {
845 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000846 }
847
Owen Taylor3473f882001-02-23 17:55:21 +0000848 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
849 (RAW == '<') && (NXT(1) == '?') &&
850 (NXT(2) == 'x') && (NXT(3) == 'm') &&
851 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
852 xmlParseTextDecl(ctxt);
853 }
854 if (ctxt->token == 0)
855 ctxt->token = ' ';
856 } else {
857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
858 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000859 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000860 name);
861 ctxt->wellFormed = 0;
862 ctxt->disableSAX = 1;
863 }
864 }
865 } else {
866 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
868 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000869 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000870 ctxt->wellFormed = 0;
871 ctxt->disableSAX = 1;
872 }
873 xmlFree(name);
874 }
875}
876
877/*
878 * Macro used to grow the current buffer.
879 */
880#define growBuffer(buffer) { \
881 buffer##_size *= 2; \
882 buffer = (xmlChar *) \
883 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
884 if (buffer == NULL) { \
885 perror("realloc failed"); \
886 return(NULL); \
887 } \
888}
889
890/**
891 * xmlStringDecodeEntities:
892 * @ctxt: the parser context
893 * @str: the input string
894 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
895 * @end: an end marker xmlChar, 0 if none
896 * @end2: an end marker xmlChar, 0 if none
897 * @end3: an end marker xmlChar, 0 if none
898 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000899 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000900 *
901 * [67] Reference ::= EntityRef | CharRef
902 *
903 * [69] PEReference ::= '%' Name ';'
904 *
905 * Returns A newly allocated string with the substitution done. The caller
906 * must deallocate it !
907 */
908xmlChar *
909xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
910 xmlChar end, xmlChar end2, xmlChar end3) {
911 xmlChar *buffer = NULL;
912 int buffer_size = 0;
913
914 xmlChar *current = NULL;
915 xmlEntityPtr ent;
916 int c,l;
917 int nbchars = 0;
918
919 if (str == NULL)
920 return(NULL);
921
922 if (ctxt->depth > 40) {
923 ctxt->errNo = XML_ERR_ENTITY_LOOP;
924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
925 ctxt->sax->error(ctxt->userData,
926 "Detected entity reference loop\n");
927 ctxt->wellFormed = 0;
928 ctxt->disableSAX = 1;
929 return(NULL);
930 }
931
932 /*
933 * allocate a translation buffer.
934 */
935 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
936 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
937 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000938 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000939 return(NULL);
940 }
941
942 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000943 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000944 * we are operating on already parsed values.
945 */
946 c = CUR_SCHAR(str, l);
947 while ((c != 0) && (c != end) && /* non input consuming loop */
948 (c != end2) && (c != end3)) {
949
950 if (c == 0) break;
951 if ((c == '&') && (str[1] == '#')) {
952 int val = xmlParseStringCharRef(ctxt, &str);
953 if (val != 0) {
954 COPY_BUF(0,buffer,nbchars,val);
955 }
956 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
957 if (xmlParserDebugEntities)
958 xmlGenericError(xmlGenericErrorContext,
959 "String decoding Entity Reference: %.30s\n",
960 str);
961 ent = xmlParseStringEntityRef(ctxt, &str);
962 if ((ent != NULL) &&
963 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
964 if (ent->content != NULL) {
965 COPY_BUF(0,buffer,nbchars,ent->content[0]);
966 } else {
967 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
968 ctxt->sax->error(ctxt->userData,
969 "internal error entity has no content\n");
970 }
971 } else if ((ent != NULL) && (ent->content != NULL)) {
972 xmlChar *rep;
973
974 ctxt->depth++;
975 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
976 0, 0, 0);
977 ctxt->depth--;
978 if (rep != NULL) {
979 current = rep;
980 while (*current != 0) { /* non input consuming loop */
981 buffer[nbchars++] = *current++;
982 if (nbchars >
983 buffer_size - XML_PARSER_BUFFER_SIZE) {
984 growBuffer(buffer);
985 }
986 }
987 xmlFree(rep);
988 }
989 } else if (ent != NULL) {
990 int i = xmlStrlen(ent->name);
991 const xmlChar *cur = ent->name;
992
993 buffer[nbchars++] = '&';
994 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
995 growBuffer(buffer);
996 }
997 for (;i > 0;i--)
998 buffer[nbchars++] = *cur++;
999 buffer[nbchars++] = ';';
1000 }
1001 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1002 if (xmlParserDebugEntities)
1003 xmlGenericError(xmlGenericErrorContext,
1004 "String decoding PE Reference: %.30s\n", str);
1005 ent = xmlParseStringPEReference(ctxt, &str);
1006 if (ent != NULL) {
1007 xmlChar *rep;
1008
1009 ctxt->depth++;
1010 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1011 0, 0, 0);
1012 ctxt->depth--;
1013 if (rep != NULL) {
1014 current = rep;
1015 while (*current != 0) { /* non input consuming loop */
1016 buffer[nbchars++] = *current++;
1017 if (nbchars >
1018 buffer_size - XML_PARSER_BUFFER_SIZE) {
1019 growBuffer(buffer);
1020 }
1021 }
1022 xmlFree(rep);
1023 }
1024 }
1025 } else {
1026 COPY_BUF(l,buffer,nbchars,c);
1027 str += l;
1028 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1029 growBuffer(buffer);
1030 }
1031 }
1032 c = CUR_SCHAR(str, l);
1033 }
1034 buffer[nbchars++] = 0;
1035 return(buffer);
1036}
1037
1038
1039/************************************************************************
1040 * *
1041 * Commodity functions to handle xmlChars *
1042 * *
1043 ************************************************************************/
1044
1045/**
1046 * xmlStrndup:
1047 * @cur: the input xmlChar *
1048 * @len: the len of @cur
1049 *
1050 * a strndup for array of xmlChar's
1051 *
1052 * Returns a new xmlChar * or NULL
1053 */
1054xmlChar *
1055xmlStrndup(const xmlChar *cur, int len) {
1056 xmlChar *ret;
1057
1058 if ((cur == NULL) || (len < 0)) return(NULL);
1059 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1060 if (ret == NULL) {
1061 xmlGenericError(xmlGenericErrorContext,
1062 "malloc of %ld byte failed\n",
1063 (len + 1) * (long)sizeof(xmlChar));
1064 return(NULL);
1065 }
1066 memcpy(ret, cur, len * sizeof(xmlChar));
1067 ret[len] = 0;
1068 return(ret);
1069}
1070
1071/**
1072 * xmlStrdup:
1073 * @cur: the input xmlChar *
1074 *
1075 * a strdup for array of xmlChar's. Since they are supposed to be
1076 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1077 * a termination mark of '0'.
1078 *
1079 * Returns a new xmlChar * or NULL
1080 */
1081xmlChar *
1082xmlStrdup(const xmlChar *cur) {
1083 const xmlChar *p = cur;
1084
1085 if (cur == NULL) return(NULL);
1086 while (*p != 0) p++; /* non input consuming */
1087 return(xmlStrndup(cur, p - cur));
1088}
1089
1090/**
1091 * xmlCharStrndup:
1092 * @cur: the input char *
1093 * @len: the len of @cur
1094 *
1095 * a strndup for char's to xmlChar's
1096 *
1097 * Returns a new xmlChar * or NULL
1098 */
1099
1100xmlChar *
1101xmlCharStrndup(const char *cur, int len) {
1102 int i;
1103 xmlChar *ret;
1104
1105 if ((cur == NULL) || (len < 0)) return(NULL);
1106 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1107 if (ret == NULL) {
1108 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1109 (len + 1) * (long)sizeof(xmlChar));
1110 return(NULL);
1111 }
1112 for (i = 0;i < len;i++)
1113 ret[i] = (xmlChar) cur[i];
1114 ret[len] = 0;
1115 return(ret);
1116}
1117
1118/**
1119 * xmlCharStrdup:
1120 * @cur: the input char *
1121 * @len: the len of @cur
1122 *
1123 * a strdup for char's to xmlChar's
1124 *
1125 * Returns a new xmlChar * or NULL
1126 */
1127
1128xmlChar *
1129xmlCharStrdup(const char *cur) {
1130 const char *p = cur;
1131
1132 if (cur == NULL) return(NULL);
1133 while (*p != '\0') p++; /* non input consuming */
1134 return(xmlCharStrndup(cur, p - cur));
1135}
1136
1137/**
1138 * xmlStrcmp:
1139 * @str1: the first xmlChar *
1140 * @str2: the second xmlChar *
1141 *
1142 * a strcmp for xmlChar's
1143 *
1144 * Returns the integer result of the comparison
1145 */
1146
1147int
1148xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1149 register int tmp;
1150
1151 if (str1 == str2) return(0);
1152 if (str1 == NULL) return(-1);
1153 if (str2 == NULL) return(1);
1154 do {
1155 tmp = *str1++ - *str2;
1156 if (tmp != 0) return(tmp);
1157 } while (*str2++ != 0);
1158 return 0;
1159}
1160
1161/**
1162 * xmlStrEqual:
1163 * @str1: the first xmlChar *
1164 * @str2: the second xmlChar *
1165 *
1166 * Check if both string are equal of have same content
1167 * Should be a bit more readable and faster than xmlStrEqual()
1168 *
1169 * Returns 1 if they are equal, 0 if they are different
1170 */
1171
1172int
1173xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1174 if (str1 == str2) return(1);
1175 if (str1 == NULL) return(0);
1176 if (str2 == NULL) return(0);
1177 do {
1178 if (*str1++ != *str2) return(0);
1179 } while (*str2++);
1180 return(1);
1181}
1182
1183/**
1184 * xmlStrncmp:
1185 * @str1: the first xmlChar *
1186 * @str2: the second xmlChar *
1187 * @len: the max comparison length
1188 *
1189 * a strncmp for xmlChar's
1190 *
1191 * Returns the integer result of the comparison
1192 */
1193
1194int
1195xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1196 register int tmp;
1197
1198 if (len <= 0) return(0);
1199 if (str1 == str2) return(0);
1200 if (str1 == NULL) return(-1);
1201 if (str2 == NULL) return(1);
1202 do {
1203 tmp = *str1++ - *str2;
1204 if (tmp != 0 || --len == 0) return(tmp);
1205 } while (*str2++ != 0);
1206 return 0;
1207}
1208
Daniel Veillardb44025c2001-10-11 22:55:55 +00001209static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001210 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1211 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1212 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1213 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1214 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1215 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1216 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1217 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1218 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1219 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1220 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1221 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1222 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1223 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1224 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1225 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1226 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1227 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1228 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1229 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1230 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1231 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1232 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1233 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1234 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1235 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1236 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1237 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1238 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1239 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1240 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1241 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1242};
1243
1244/**
1245 * xmlStrcasecmp:
1246 * @str1: the first xmlChar *
1247 * @str2: the second xmlChar *
1248 *
1249 * a strcasecmp for xmlChar's
1250 *
1251 * Returns the integer result of the comparison
1252 */
1253
1254int
1255xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1256 register int tmp;
1257
1258 if (str1 == str2) return(0);
1259 if (str1 == NULL) return(-1);
1260 if (str2 == NULL) return(1);
1261 do {
1262 tmp = casemap[*str1++] - casemap[*str2];
1263 if (tmp != 0) return(tmp);
1264 } while (*str2++ != 0);
1265 return 0;
1266}
1267
1268/**
1269 * xmlStrncasecmp:
1270 * @str1: the first xmlChar *
1271 * @str2: the second xmlChar *
1272 * @len: the max comparison length
1273 *
1274 * a strncasecmp for xmlChar's
1275 *
1276 * Returns the integer result of the comparison
1277 */
1278
1279int
1280xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1281 register int tmp;
1282
1283 if (len <= 0) return(0);
1284 if (str1 == str2) return(0);
1285 if (str1 == NULL) return(-1);
1286 if (str2 == NULL) return(1);
1287 do {
1288 tmp = casemap[*str1++] - casemap[*str2];
1289 if (tmp != 0 || --len == 0) return(tmp);
1290 } while (*str2++ != 0);
1291 return 0;
1292}
1293
1294/**
1295 * xmlStrchr:
1296 * @str: the xmlChar * array
1297 * @val: the xmlChar to search
1298 *
1299 * a strchr for xmlChar's
1300 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001301 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001302 */
1303
1304const xmlChar *
1305xmlStrchr(const xmlChar *str, xmlChar val) {
1306 if (str == NULL) return(NULL);
1307 while (*str != 0) { /* non input consuming */
1308 if (*str == val) return((xmlChar *) str);
1309 str++;
1310 }
1311 return(NULL);
1312}
1313
1314/**
1315 * xmlStrstr:
1316 * @str: the xmlChar * array (haystack)
1317 * @val: the xmlChar to search (needle)
1318 *
1319 * a strstr for xmlChar's
1320 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001321 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001322 */
1323
1324const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001325xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001326 int n;
1327
1328 if (str == NULL) return(NULL);
1329 if (val == NULL) return(NULL);
1330 n = xmlStrlen(val);
1331
1332 if (n == 0) return(str);
1333 while (*str != 0) { /* non input consuming */
1334 if (*str == *val) {
1335 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1336 }
1337 str++;
1338 }
1339 return(NULL);
1340}
1341
1342/**
1343 * xmlStrcasestr:
1344 * @str: the xmlChar * array (haystack)
1345 * @val: the xmlChar to search (needle)
1346 *
1347 * a case-ignoring strstr for xmlChar's
1348 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001349 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001350 */
1351
1352const xmlChar *
1353xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1354 int n;
1355
1356 if (str == NULL) return(NULL);
1357 if (val == NULL) return(NULL);
1358 n = xmlStrlen(val);
1359
1360 if (n == 0) return(str);
1361 while (*str != 0) { /* non input consuming */
1362 if (casemap[*str] == casemap[*val])
1363 if (!xmlStrncasecmp(str, val, n)) return(str);
1364 str++;
1365 }
1366 return(NULL);
1367}
1368
1369/**
1370 * xmlStrsub:
1371 * @str: the xmlChar * array (haystack)
1372 * @start: the index of the first char (zero based)
1373 * @len: the length of the substring
1374 *
1375 * Extract a substring of a given string
1376 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001377 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001378 */
1379
1380xmlChar *
1381xmlStrsub(const xmlChar *str, int start, int len) {
1382 int i;
1383
1384 if (str == NULL) return(NULL);
1385 if (start < 0) return(NULL);
1386 if (len < 0) return(NULL);
1387
1388 for (i = 0;i < start;i++) {
1389 if (*str == 0) return(NULL);
1390 str++;
1391 }
1392 if (*str == 0) return(NULL);
1393 return(xmlStrndup(str, len));
1394}
1395
1396/**
1397 * xmlStrlen:
1398 * @str: the xmlChar * array
1399 *
1400 * length of a xmlChar's string
1401 *
1402 * Returns the number of xmlChar contained in the ARRAY.
1403 */
1404
1405int
1406xmlStrlen(const xmlChar *str) {
1407 int len = 0;
1408
1409 if (str == NULL) return(0);
1410 while (*str != 0) { /* non input consuming */
1411 str++;
1412 len++;
1413 }
1414 return(len);
1415}
1416
1417/**
1418 * xmlStrncat:
1419 * @cur: the original xmlChar * array
1420 * @add: the xmlChar * array added
1421 * @len: the length of @add
1422 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001423 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001424 * first bytes of @add.
1425 *
1426 * Returns a new xmlChar *, the original @cur is reallocated if needed
1427 * and should not be freed
1428 */
1429
1430xmlChar *
1431xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1432 int size;
1433 xmlChar *ret;
1434
1435 if ((add == NULL) || (len == 0))
1436 return(cur);
1437 if (cur == NULL)
1438 return(xmlStrndup(add, len));
1439
1440 size = xmlStrlen(cur);
1441 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1442 if (ret == NULL) {
1443 xmlGenericError(xmlGenericErrorContext,
1444 "xmlStrncat: realloc of %ld byte failed\n",
1445 (size + len + 1) * (long)sizeof(xmlChar));
1446 return(cur);
1447 }
1448 memcpy(&ret[size], add, len * sizeof(xmlChar));
1449 ret[size + len] = 0;
1450 return(ret);
1451}
1452
1453/**
1454 * xmlStrcat:
1455 * @cur: the original xmlChar * array
1456 * @add: the xmlChar * array added
1457 *
1458 * a strcat for array of xmlChar's. Since they are supposed to be
1459 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1460 * a termination mark of '0'.
1461 *
1462 * Returns a new xmlChar * containing the concatenated string.
1463 */
1464xmlChar *
1465xmlStrcat(xmlChar *cur, const xmlChar *add) {
1466 const xmlChar *p = add;
1467
1468 if (add == NULL) return(cur);
1469 if (cur == NULL)
1470 return(xmlStrdup(add));
1471
1472 while (*p != 0) p++; /* non input consuming */
1473 return(xmlStrncat(cur, add, p - add));
1474}
1475
1476/************************************************************************
1477 * *
1478 * Commodity functions, cleanup needed ? *
1479 * *
1480 ************************************************************************/
1481
1482/**
1483 * areBlanks:
1484 * @ctxt: an XML parser context
1485 * @str: a xmlChar *
1486 * @len: the size of @str
1487 *
1488 * Is this a sequence of blank chars that one can ignore ?
1489 *
1490 * Returns 1 if ignorable 0 otherwise.
1491 */
1492
1493static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1494 int i, ret;
1495 xmlNodePtr lastChild;
1496
Daniel Veillard05c13a22001-09-09 08:38:09 +00001497 /*
1498 * Don't spend time trying to differentiate them, the same callback is
1499 * used !
1500 */
1501 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001502 return(0);
1503
Owen Taylor3473f882001-02-23 17:55:21 +00001504 /*
1505 * Check for xml:space value.
1506 */
1507 if (*(ctxt->space) == 1)
1508 return(0);
1509
1510 /*
1511 * Check that the string is made of blanks
1512 */
1513 for (i = 0;i < len;i++)
1514 if (!(IS_BLANK(str[i]))) return(0);
1515
1516 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001517 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001518 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001519 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001520 if (ctxt->myDoc != NULL) {
1521 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1522 if (ret == 0) return(1);
1523 if (ret == 1) return(0);
1524 }
1525
1526 /*
1527 * Otherwise, heuristic :-\
1528 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001529 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001530 if ((ctxt->node->children == NULL) &&
1531 (RAW == '<') && (NXT(1) == '/')) return(0);
1532
1533 lastChild = xmlGetLastChild(ctxt->node);
1534 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001535 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1536 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001537 } else if (xmlNodeIsText(lastChild))
1538 return(0);
1539 else if ((ctxt->node->children != NULL) &&
1540 (xmlNodeIsText(ctxt->node->children)))
1541 return(0);
1542 return(1);
1543}
1544
Owen Taylor3473f882001-02-23 17:55:21 +00001545/************************************************************************
1546 * *
1547 * Extra stuff for namespace support *
1548 * Relates to http://www.w3.org/TR/WD-xml-names *
1549 * *
1550 ************************************************************************/
1551
1552/**
1553 * xmlSplitQName:
1554 * @ctxt: an XML parser context
1555 * @name: an XML parser context
1556 * @prefix: a xmlChar **
1557 *
1558 * parse an UTF8 encoded XML qualified name string
1559 *
1560 * [NS 5] QName ::= (Prefix ':')? LocalPart
1561 *
1562 * [NS 6] Prefix ::= NCName
1563 *
1564 * [NS 7] LocalPart ::= NCName
1565 *
1566 * Returns the local part, and prefix is updated
1567 * to get the Prefix if any.
1568 */
1569
1570xmlChar *
1571xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1572 xmlChar buf[XML_MAX_NAMELEN + 5];
1573 xmlChar *buffer = NULL;
1574 int len = 0;
1575 int max = XML_MAX_NAMELEN;
1576 xmlChar *ret = NULL;
1577 const xmlChar *cur = name;
1578 int c;
1579
1580 *prefix = NULL;
1581
1582#ifndef XML_XML_NAMESPACE
1583 /* xml: prefix is not really a namespace */
1584 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1585 (cur[2] == 'l') && (cur[3] == ':'))
1586 return(xmlStrdup(name));
1587#endif
1588
1589 /* nasty but valid */
1590 if (cur[0] == ':')
1591 return(xmlStrdup(name));
1592
1593 c = *cur++;
1594 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1595 buf[len++] = c;
1596 c = *cur++;
1597 }
1598 if (len >= max) {
1599 /*
1600 * Okay someone managed to make a huge name, so he's ready to pay
1601 * for the processing speed.
1602 */
1603 max = len * 2;
1604
1605 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1606 if (buffer == NULL) {
1607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1608 ctxt->sax->error(ctxt->userData,
1609 "xmlSplitQName: out of memory\n");
1610 return(NULL);
1611 }
1612 memcpy(buffer, buf, len);
1613 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1614 if (len + 10 > max) {
1615 max *= 2;
1616 buffer = (xmlChar *) xmlRealloc(buffer,
1617 max * sizeof(xmlChar));
1618 if (buffer == NULL) {
1619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1620 ctxt->sax->error(ctxt->userData,
1621 "xmlSplitQName: out of memory\n");
1622 return(NULL);
1623 }
1624 }
1625 buffer[len++] = c;
1626 c = *cur++;
1627 }
1628 buffer[len] = 0;
1629 }
1630
1631 if (buffer == NULL)
1632 ret = xmlStrndup(buf, len);
1633 else {
1634 ret = buffer;
1635 buffer = NULL;
1636 max = XML_MAX_NAMELEN;
1637 }
1638
1639
1640 if (c == ':') {
1641 c = *cur++;
1642 if (c == 0) return(ret);
1643 *prefix = ret;
1644 len = 0;
1645
1646 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1647 buf[len++] = c;
1648 c = *cur++;
1649 }
1650 if (len >= max) {
1651 /*
1652 * Okay someone managed to make a huge name, so he's ready to pay
1653 * for the processing speed.
1654 */
1655 max = len * 2;
1656
1657 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1658 if (buffer == NULL) {
1659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1660 ctxt->sax->error(ctxt->userData,
1661 "xmlSplitQName: out of memory\n");
1662 return(NULL);
1663 }
1664 memcpy(buffer, buf, len);
1665 while (c != 0) { /* tested bigname2.xml */
1666 if (len + 10 > max) {
1667 max *= 2;
1668 buffer = (xmlChar *) xmlRealloc(buffer,
1669 max * sizeof(xmlChar));
1670 if (buffer == NULL) {
1671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1672 ctxt->sax->error(ctxt->userData,
1673 "xmlSplitQName: out of memory\n");
1674 return(NULL);
1675 }
1676 }
1677 buffer[len++] = c;
1678 c = *cur++;
1679 }
1680 buffer[len] = 0;
1681 }
1682
1683 if (buffer == NULL)
1684 ret = xmlStrndup(buf, len);
1685 else {
1686 ret = buffer;
1687 }
1688 }
1689
1690 return(ret);
1691}
1692
1693/************************************************************************
1694 * *
1695 * The parser itself *
1696 * Relates to http://www.w3.org/TR/REC-xml *
1697 * *
1698 ************************************************************************/
1699
Daniel Veillard76d66f42001-05-16 21:05:17 +00001700static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001701/**
1702 * xmlParseName:
1703 * @ctxt: an XML parser context
1704 *
1705 * parse an XML name.
1706 *
1707 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1708 * CombiningChar | Extender
1709 *
1710 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1711 *
1712 * [6] Names ::= Name (S Name)*
1713 *
1714 * Returns the Name parsed or NULL
1715 */
1716
1717xmlChar *
1718xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001719 const xmlChar *in;
1720 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001721 int count = 0;
1722
1723 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001724
1725 /*
1726 * Accelerator for simple ASCII names
1727 */
1728 in = ctxt->input->cur;
1729 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1730 ((*in >= 0x41) && (*in <= 0x5A)) ||
1731 (*in == '_') || (*in == ':')) {
1732 in++;
1733 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1734 ((*in >= 0x41) && (*in <= 0x5A)) ||
1735 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001736 (*in == '_') || (*in == '-') ||
1737 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001738 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001739 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001740 count = in - ctxt->input->cur;
1741 ret = xmlStrndup(ctxt->input->cur, count);
1742 ctxt->input->cur = in;
1743 return(ret);
1744 }
1745 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001746 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001747}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001748
Daniel Veillard76d66f42001-05-16 21:05:17 +00001749static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001750xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1751 xmlChar buf[XML_MAX_NAMELEN + 5];
1752 int len = 0, l;
1753 int c;
1754 int count = 0;
1755
1756 /*
1757 * Handler for more complex cases
1758 */
1759 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001760 c = CUR_CHAR(l);
1761 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1762 (!IS_LETTER(c) && (c != '_') &&
1763 (c != ':'))) {
1764 return(NULL);
1765 }
1766
1767 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1768 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1769 (c == '.') || (c == '-') ||
1770 (c == '_') || (c == ':') ||
1771 (IS_COMBINING(c)) ||
1772 (IS_EXTENDER(c)))) {
1773 if (count++ > 100) {
1774 count = 0;
1775 GROW;
1776 }
1777 COPY_BUF(l,buf,len,c);
1778 NEXTL(l);
1779 c = CUR_CHAR(l);
1780 if (len >= XML_MAX_NAMELEN) {
1781 /*
1782 * Okay someone managed to make a huge name, so he's ready to pay
1783 * for the processing speed.
1784 */
1785 xmlChar *buffer;
1786 int max = len * 2;
1787
1788 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1789 if (buffer == NULL) {
1790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1791 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001792 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001793 return(NULL);
1794 }
1795 memcpy(buffer, buf, len);
1796 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1797 (c == '.') || (c == '-') ||
1798 (c == '_') || (c == ':') ||
1799 (IS_COMBINING(c)) ||
1800 (IS_EXTENDER(c))) {
1801 if (count++ > 100) {
1802 count = 0;
1803 GROW;
1804 }
1805 if (len + 10 > max) {
1806 max *= 2;
1807 buffer = (xmlChar *) xmlRealloc(buffer,
1808 max * sizeof(xmlChar));
1809 if (buffer == NULL) {
1810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1811 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001812 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001813 return(NULL);
1814 }
1815 }
1816 COPY_BUF(l,buffer,len,c);
1817 NEXTL(l);
1818 c = CUR_CHAR(l);
1819 }
1820 buffer[len] = 0;
1821 return(buffer);
1822 }
1823 }
1824 return(xmlStrndup(buf, len));
1825}
1826
1827/**
1828 * xmlParseStringName:
1829 * @ctxt: an XML parser context
1830 * @str: a pointer to the string pointer (IN/OUT)
1831 *
1832 * parse an XML name.
1833 *
1834 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1835 * CombiningChar | Extender
1836 *
1837 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1838 *
1839 * [6] Names ::= Name (S Name)*
1840 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001841 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001842 * is updated to the current location in the string.
1843 */
1844
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001845static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001846xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1847 xmlChar buf[XML_MAX_NAMELEN + 5];
1848 const xmlChar *cur = *str;
1849 int len = 0, l;
1850 int c;
1851
1852 c = CUR_SCHAR(cur, l);
1853 if (!IS_LETTER(c) && (c != '_') &&
1854 (c != ':')) {
1855 return(NULL);
1856 }
1857
1858 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1859 (c == '.') || (c == '-') ||
1860 (c == '_') || (c == ':') ||
1861 (IS_COMBINING(c)) ||
1862 (IS_EXTENDER(c))) {
1863 COPY_BUF(l,buf,len,c);
1864 cur += l;
1865 c = CUR_SCHAR(cur, l);
1866 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1867 /*
1868 * Okay someone managed to make a huge name, so he's ready to pay
1869 * for the processing speed.
1870 */
1871 xmlChar *buffer;
1872 int max = len * 2;
1873
1874 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1875 if (buffer == NULL) {
1876 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1877 ctxt->sax->error(ctxt->userData,
1878 "xmlParseStringName: out of memory\n");
1879 return(NULL);
1880 }
1881 memcpy(buffer, buf, len);
1882 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1883 (c == '.') || (c == '-') ||
1884 (c == '_') || (c == ':') ||
1885 (IS_COMBINING(c)) ||
1886 (IS_EXTENDER(c))) {
1887 if (len + 10 > max) {
1888 max *= 2;
1889 buffer = (xmlChar *) xmlRealloc(buffer,
1890 max * sizeof(xmlChar));
1891 if (buffer == NULL) {
1892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1893 ctxt->sax->error(ctxt->userData,
1894 "xmlParseStringName: out of memory\n");
1895 return(NULL);
1896 }
1897 }
1898 COPY_BUF(l,buffer,len,c);
1899 cur += l;
1900 c = CUR_SCHAR(cur, l);
1901 }
1902 buffer[len] = 0;
1903 *str = cur;
1904 return(buffer);
1905 }
1906 }
1907 *str = cur;
1908 return(xmlStrndup(buf, len));
1909}
1910
1911/**
1912 * xmlParseNmtoken:
1913 * @ctxt: an XML parser context
1914 *
1915 * parse an XML Nmtoken.
1916 *
1917 * [7] Nmtoken ::= (NameChar)+
1918 *
1919 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1920 *
1921 * Returns the Nmtoken parsed or NULL
1922 */
1923
1924xmlChar *
1925xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1926 xmlChar buf[XML_MAX_NAMELEN + 5];
1927 int len = 0, l;
1928 int c;
1929 int count = 0;
1930
1931 GROW;
1932 c = CUR_CHAR(l);
1933
1934 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1935 (c == '.') || (c == '-') ||
1936 (c == '_') || (c == ':') ||
1937 (IS_COMBINING(c)) ||
1938 (IS_EXTENDER(c))) {
1939 if (count++ > 100) {
1940 count = 0;
1941 GROW;
1942 }
1943 COPY_BUF(l,buf,len,c);
1944 NEXTL(l);
1945 c = CUR_CHAR(l);
1946 if (len >= XML_MAX_NAMELEN) {
1947 /*
1948 * Okay someone managed to make a huge token, so he's ready to pay
1949 * for the processing speed.
1950 */
1951 xmlChar *buffer;
1952 int max = len * 2;
1953
1954 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1955 if (buffer == NULL) {
1956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1957 ctxt->sax->error(ctxt->userData,
1958 "xmlParseNmtoken: out of memory\n");
1959 return(NULL);
1960 }
1961 memcpy(buffer, buf, len);
1962 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1963 (c == '.') || (c == '-') ||
1964 (c == '_') || (c == ':') ||
1965 (IS_COMBINING(c)) ||
1966 (IS_EXTENDER(c))) {
1967 if (count++ > 100) {
1968 count = 0;
1969 GROW;
1970 }
1971 if (len + 10 > max) {
1972 max *= 2;
1973 buffer = (xmlChar *) xmlRealloc(buffer,
1974 max * sizeof(xmlChar));
1975 if (buffer == NULL) {
1976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1977 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001978 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001979 return(NULL);
1980 }
1981 }
1982 COPY_BUF(l,buffer,len,c);
1983 NEXTL(l);
1984 c = CUR_CHAR(l);
1985 }
1986 buffer[len] = 0;
1987 return(buffer);
1988 }
1989 }
1990 if (len == 0)
1991 return(NULL);
1992 return(xmlStrndup(buf, len));
1993}
1994
1995/**
1996 * xmlParseEntityValue:
1997 * @ctxt: an XML parser context
1998 * @orig: if non-NULL store a copy of the original entity value
1999 *
2000 * parse a value for ENTITY declarations
2001 *
2002 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2003 * "'" ([^%&'] | PEReference | Reference)* "'"
2004 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002005 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002006 */
2007
2008xmlChar *
2009xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2010 xmlChar *buf = NULL;
2011 int len = 0;
2012 int size = XML_PARSER_BUFFER_SIZE;
2013 int c, l;
2014 xmlChar stop;
2015 xmlChar *ret = NULL;
2016 const xmlChar *cur = NULL;
2017 xmlParserInputPtr input;
2018
2019 if (RAW == '"') stop = '"';
2020 else if (RAW == '\'') stop = '\'';
2021 else {
2022 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2024 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2025 ctxt->wellFormed = 0;
2026 ctxt->disableSAX = 1;
2027 return(NULL);
2028 }
2029 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2030 if (buf == NULL) {
2031 xmlGenericError(xmlGenericErrorContext,
2032 "malloc of %d byte failed\n", size);
2033 return(NULL);
2034 }
2035
2036 /*
2037 * The content of the entity definition is copied in a buffer.
2038 */
2039
2040 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2041 input = ctxt->input;
2042 GROW;
2043 NEXT;
2044 c = CUR_CHAR(l);
2045 /*
2046 * NOTE: 4.4.5 Included in Literal
2047 * When a parameter entity reference appears in a literal entity
2048 * value, ... a single or double quote character in the replacement
2049 * text is always treated as a normal data character and will not
2050 * terminate the literal.
2051 * In practice it means we stop the loop only when back at parsing
2052 * the initial entity and the quote is found
2053 */
2054 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2055 (ctxt->input != input))) {
2056 if (len + 5 >= size) {
2057 size *= 2;
2058 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2059 if (buf == NULL) {
2060 xmlGenericError(xmlGenericErrorContext,
2061 "realloc of %d byte failed\n", size);
2062 return(NULL);
2063 }
2064 }
2065 COPY_BUF(l,buf,len,c);
2066 NEXTL(l);
2067 /*
2068 * Pop-up of finished entities.
2069 */
2070 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2071 xmlPopInput(ctxt);
2072
2073 GROW;
2074 c = CUR_CHAR(l);
2075 if (c == 0) {
2076 GROW;
2077 c = CUR_CHAR(l);
2078 }
2079 }
2080 buf[len] = 0;
2081
2082 /*
2083 * Raise problem w.r.t. '&' and '%' being used in non-entities
2084 * reference constructs. Note Charref will be handled in
2085 * xmlStringDecodeEntities()
2086 */
2087 cur = buf;
2088 while (*cur != 0) { /* non input consuming */
2089 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2090 xmlChar *name;
2091 xmlChar tmp = *cur;
2092
2093 cur++;
2094 name = xmlParseStringName(ctxt, &cur);
2095 if ((name == NULL) || (*cur != ';')) {
2096 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2098 ctxt->sax->error(ctxt->userData,
2099 "EntityValue: '%c' forbidden except for entities references\n",
2100 tmp);
2101 ctxt->wellFormed = 0;
2102 ctxt->disableSAX = 1;
2103 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002104 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2105 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002106 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2108 ctxt->sax->error(ctxt->userData,
2109 "EntityValue: PEReferences forbidden in internal subset\n",
2110 tmp);
2111 ctxt->wellFormed = 0;
2112 ctxt->disableSAX = 1;
2113 }
2114 if (name != NULL)
2115 xmlFree(name);
2116 }
2117 cur++;
2118 }
2119
2120 /*
2121 * Then PEReference entities are substituted.
2122 */
2123 if (c != stop) {
2124 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2126 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2127 ctxt->wellFormed = 0;
2128 ctxt->disableSAX = 1;
2129 xmlFree(buf);
2130 } else {
2131 NEXT;
2132 /*
2133 * NOTE: 4.4.7 Bypassed
2134 * When a general entity reference appears in the EntityValue in
2135 * an entity declaration, it is bypassed and left as is.
2136 * so XML_SUBSTITUTE_REF is not set here.
2137 */
2138 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2139 0, 0, 0);
2140 if (orig != NULL)
2141 *orig = buf;
2142 else
2143 xmlFree(buf);
2144 }
2145
2146 return(ret);
2147}
2148
2149/**
2150 * xmlParseAttValue:
2151 * @ctxt: an XML parser context
2152 *
2153 * parse a value for an attribute
2154 * Note: the parser won't do substitution of entities here, this
2155 * will be handled later in xmlStringGetNodeList
2156 *
2157 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2158 * "'" ([^<&'] | Reference)* "'"
2159 *
2160 * 3.3.3 Attribute-Value Normalization:
2161 * Before the value of an attribute is passed to the application or
2162 * checked for validity, the XML processor must normalize it as follows:
2163 * - a character reference is processed by appending the referenced
2164 * character to the attribute value
2165 * - an entity reference is processed by recursively processing the
2166 * replacement text of the entity
2167 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2168 * appending #x20 to the normalized value, except that only a single
2169 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2170 * parsed entity or the literal entity value of an internal parsed entity
2171 * - other characters are processed by appending them to the normalized value
2172 * If the declared value is not CDATA, then the XML processor must further
2173 * process the normalized attribute value by discarding any leading and
2174 * trailing space (#x20) characters, and by replacing sequences of space
2175 * (#x20) characters by a single space (#x20) character.
2176 * All attributes for which no declaration has been read should be treated
2177 * by a non-validating parser as if declared CDATA.
2178 *
2179 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2180 */
2181
2182xmlChar *
2183xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2184 xmlChar limit = 0;
2185 xmlChar *buf = NULL;
2186 int len = 0;
2187 int buf_size = 0;
2188 int c, l;
2189 xmlChar *current = NULL;
2190 xmlEntityPtr ent;
2191
2192
2193 SHRINK;
2194 if (NXT(0) == '"') {
2195 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2196 limit = '"';
2197 NEXT;
2198 } else if (NXT(0) == '\'') {
2199 limit = '\'';
2200 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2201 NEXT;
2202 } else {
2203 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2205 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2206 ctxt->wellFormed = 0;
2207 ctxt->disableSAX = 1;
2208 return(NULL);
2209 }
2210
2211 /*
2212 * allocate a translation buffer.
2213 */
2214 buf_size = XML_PARSER_BUFFER_SIZE;
2215 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2216 if (buf == NULL) {
2217 perror("xmlParseAttValue: malloc failed");
2218 return(NULL);
2219 }
2220
2221 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002222 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002223 */
2224 c = CUR_CHAR(l);
2225 while (((NXT(0) != limit) && /* checked */
2226 (c != '<')) || (ctxt->token != 0)) {
2227 if (c == 0) break;
2228 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002229 if (ctxt->replaceEntities) {
2230 if (len > buf_size - 10) {
2231 growBuffer(buf);
2232 }
2233 buf[len++] = '&';
2234 } else {
2235 /*
2236 * The reparsing will be done in xmlStringGetNodeList()
2237 * called by the attribute() function in SAX.c
2238 */
2239 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002240
Daniel Veillard319a7422001-09-11 09:27:09 +00002241 if (len > buf_size - 10) {
2242 growBuffer(buf);
2243 }
2244 current = &buffer[0];
2245 while (*current != 0) { /* non input consuming */
2246 buf[len++] = *current++;
2247 }
2248 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002249 }
Owen Taylor3473f882001-02-23 17:55:21 +00002250 } else if (c == '&') {
2251 if (NXT(1) == '#') {
2252 int val = xmlParseCharRef(ctxt);
2253 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002254 if (ctxt->replaceEntities) {
2255 if (len > buf_size - 10) {
2256 growBuffer(buf);
2257 }
2258 buf[len++] = '&';
2259 } else {
2260 /*
2261 * The reparsing will be done in xmlStringGetNodeList()
2262 * called by the attribute() function in SAX.c
2263 */
2264 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002265
Daniel Veillard319a7422001-09-11 09:27:09 +00002266 if (len > buf_size - 10) {
2267 growBuffer(buf);
2268 }
2269 current = &buffer[0];
2270 while (*current != 0) { /* non input consuming */
2271 buf[len++] = *current++;
2272 }
Owen Taylor3473f882001-02-23 17:55:21 +00002273 }
2274 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002275 if (len > buf_size - 10) {
2276 growBuffer(buf);
2277 }
Owen Taylor3473f882001-02-23 17:55:21 +00002278 len += xmlCopyChar(0, &buf[len], val);
2279 }
2280 } else {
2281 ent = xmlParseEntityRef(ctxt);
2282 if ((ent != NULL) &&
2283 (ctxt->replaceEntities != 0)) {
2284 xmlChar *rep;
2285
2286 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2287 rep = xmlStringDecodeEntities(ctxt, ent->content,
2288 XML_SUBSTITUTE_REF, 0, 0, 0);
2289 if (rep != NULL) {
2290 current = rep;
2291 while (*current != 0) { /* non input consuming */
2292 buf[len++] = *current++;
2293 if (len > buf_size - 10) {
2294 growBuffer(buf);
2295 }
2296 }
2297 xmlFree(rep);
2298 }
2299 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002300 if (len > buf_size - 10) {
2301 growBuffer(buf);
2302 }
Owen Taylor3473f882001-02-23 17:55:21 +00002303 if (ent->content != NULL)
2304 buf[len++] = ent->content[0];
2305 }
2306 } else if (ent != NULL) {
2307 int i = xmlStrlen(ent->name);
2308 const xmlChar *cur = ent->name;
2309
2310 /*
2311 * This may look absurd but is needed to detect
2312 * entities problems
2313 */
2314 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2315 (ent->content != NULL)) {
2316 xmlChar *rep;
2317 rep = xmlStringDecodeEntities(ctxt, ent->content,
2318 XML_SUBSTITUTE_REF, 0, 0, 0);
2319 if (rep != NULL)
2320 xmlFree(rep);
2321 }
2322
2323 /*
2324 * Just output the reference
2325 */
2326 buf[len++] = '&';
2327 if (len > buf_size - i - 10) {
2328 growBuffer(buf);
2329 }
2330 for (;i > 0;i--)
2331 buf[len++] = *cur++;
2332 buf[len++] = ';';
2333 }
2334 }
2335 } else {
2336 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2337 COPY_BUF(l,buf,len,0x20);
2338 if (len > buf_size - 10) {
2339 growBuffer(buf);
2340 }
2341 } else {
2342 COPY_BUF(l,buf,len,c);
2343 if (len > buf_size - 10) {
2344 growBuffer(buf);
2345 }
2346 }
2347 NEXTL(l);
2348 }
2349 GROW;
2350 c = CUR_CHAR(l);
2351 }
2352 buf[len++] = 0;
2353 if (RAW == '<') {
2354 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2356 ctxt->sax->error(ctxt->userData,
2357 "Unescaped '<' not allowed in attributes values\n");
2358 ctxt->wellFormed = 0;
2359 ctxt->disableSAX = 1;
2360 } else if (RAW != limit) {
2361 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2363 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2364 ctxt->wellFormed = 0;
2365 ctxt->disableSAX = 1;
2366 } else
2367 NEXT;
2368 return(buf);
2369}
2370
2371/**
2372 * xmlParseSystemLiteral:
2373 * @ctxt: an XML parser context
2374 *
2375 * parse an XML Literal
2376 *
2377 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2378 *
2379 * Returns the SystemLiteral parsed or NULL
2380 */
2381
2382xmlChar *
2383xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2384 xmlChar *buf = NULL;
2385 int len = 0;
2386 int size = XML_PARSER_BUFFER_SIZE;
2387 int cur, l;
2388 xmlChar stop;
2389 int state = ctxt->instate;
2390 int count = 0;
2391
2392 SHRINK;
2393 if (RAW == '"') {
2394 NEXT;
2395 stop = '"';
2396 } else if (RAW == '\'') {
2397 NEXT;
2398 stop = '\'';
2399 } else {
2400 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2402 ctxt->sax->error(ctxt->userData,
2403 "SystemLiteral \" or ' expected\n");
2404 ctxt->wellFormed = 0;
2405 ctxt->disableSAX = 1;
2406 return(NULL);
2407 }
2408
2409 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2410 if (buf == NULL) {
2411 xmlGenericError(xmlGenericErrorContext,
2412 "malloc of %d byte failed\n", size);
2413 return(NULL);
2414 }
2415 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2416 cur = CUR_CHAR(l);
2417 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2418 if (len + 5 >= size) {
2419 size *= 2;
2420 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2421 if (buf == NULL) {
2422 xmlGenericError(xmlGenericErrorContext,
2423 "realloc of %d byte failed\n", size);
2424 ctxt->instate = (xmlParserInputState) state;
2425 return(NULL);
2426 }
2427 }
2428 count++;
2429 if (count > 50) {
2430 GROW;
2431 count = 0;
2432 }
2433 COPY_BUF(l,buf,len,cur);
2434 NEXTL(l);
2435 cur = CUR_CHAR(l);
2436 if (cur == 0) {
2437 GROW;
2438 SHRINK;
2439 cur = CUR_CHAR(l);
2440 }
2441 }
2442 buf[len] = 0;
2443 ctxt->instate = (xmlParserInputState) state;
2444 if (!IS_CHAR(cur)) {
2445 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2446 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2447 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2448 ctxt->wellFormed = 0;
2449 ctxt->disableSAX = 1;
2450 } else {
2451 NEXT;
2452 }
2453 return(buf);
2454}
2455
2456/**
2457 * xmlParsePubidLiteral:
2458 * @ctxt: an XML parser context
2459 *
2460 * parse an XML public literal
2461 *
2462 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2463 *
2464 * Returns the PubidLiteral parsed or NULL.
2465 */
2466
2467xmlChar *
2468xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2469 xmlChar *buf = NULL;
2470 int len = 0;
2471 int size = XML_PARSER_BUFFER_SIZE;
2472 xmlChar cur;
2473 xmlChar stop;
2474 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002475 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002476
2477 SHRINK;
2478 if (RAW == '"') {
2479 NEXT;
2480 stop = '"';
2481 } else if (RAW == '\'') {
2482 NEXT;
2483 stop = '\'';
2484 } else {
2485 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2487 ctxt->sax->error(ctxt->userData,
2488 "SystemLiteral \" or ' expected\n");
2489 ctxt->wellFormed = 0;
2490 ctxt->disableSAX = 1;
2491 return(NULL);
2492 }
2493 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2494 if (buf == NULL) {
2495 xmlGenericError(xmlGenericErrorContext,
2496 "malloc of %d byte failed\n", size);
2497 return(NULL);
2498 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002499 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002500 cur = CUR;
2501 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2502 if (len + 1 >= size) {
2503 size *= 2;
2504 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2505 if (buf == NULL) {
2506 xmlGenericError(xmlGenericErrorContext,
2507 "realloc of %d byte failed\n", size);
2508 return(NULL);
2509 }
2510 }
2511 buf[len++] = cur;
2512 count++;
2513 if (count > 50) {
2514 GROW;
2515 count = 0;
2516 }
2517 NEXT;
2518 cur = CUR;
2519 if (cur == 0) {
2520 GROW;
2521 SHRINK;
2522 cur = CUR;
2523 }
2524 }
2525 buf[len] = 0;
2526 if (cur != stop) {
2527 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2529 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2530 ctxt->wellFormed = 0;
2531 ctxt->disableSAX = 1;
2532 } else {
2533 NEXT;
2534 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002535 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002536 return(buf);
2537}
2538
Daniel Veillard48b2f892001-02-25 16:11:03 +00002539void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002540/**
2541 * xmlParseCharData:
2542 * @ctxt: an XML parser context
2543 * @cdata: int indicating whether we are within a CDATA section
2544 *
2545 * parse a CharData section.
2546 * if we are within a CDATA section ']]>' marks an end of section.
2547 *
2548 * The right angle bracket (>) may be represented using the string "&gt;",
2549 * and must, for compatibility, be escaped using "&gt;" or a character
2550 * reference when it appears in the string "]]>" in content, when that
2551 * string is not marking the end of a CDATA section.
2552 *
2553 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2554 */
2555
2556void
2557xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002558 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002559 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002560 int line = ctxt->input->line;
2561 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002562
2563 SHRINK;
2564 GROW;
2565 /*
2566 * Accelerated common case where input don't need to be
2567 * modified before passing it to the handler.
2568 */
2569 if ((ctxt->token == 0) && (!cdata)) {
2570 in = ctxt->input->cur;
2571 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002572get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002573 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2574 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002575 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002576 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002577 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002578 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002579 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002580 ctxt->input->line++;
2581 in++;
2582 }
2583 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002584 }
2585 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002586 if ((in[1] == ']') && (in[2] == '>')) {
2587 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2589 ctxt->sax->error(ctxt->userData,
2590 "Sequence ']]>' not allowed in content\n");
2591 ctxt->input->cur = in;
2592 ctxt->wellFormed = 0;
2593 ctxt->disableSAX = 1;
2594 return;
2595 }
2596 in++;
2597 goto get_more;
2598 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002599 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002600 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002601 if (IS_BLANK(*ctxt->input->cur)) {
2602 const xmlChar *tmp = ctxt->input->cur;
2603 ctxt->input->cur = in;
2604 if (areBlanks(ctxt, tmp, nbchar)) {
2605 if (ctxt->sax->ignorableWhitespace != NULL)
2606 ctxt->sax->ignorableWhitespace(ctxt->userData,
2607 tmp, nbchar);
2608 } else {
2609 if (ctxt->sax->characters != NULL)
2610 ctxt->sax->characters(ctxt->userData,
2611 tmp, nbchar);
2612 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002613 line = ctxt->input->line;
2614 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002615 } else {
2616 if (ctxt->sax->characters != NULL)
2617 ctxt->sax->characters(ctxt->userData,
2618 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002619 line = ctxt->input->line;
2620 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002621 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002622 }
2623 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002624 if (*in == 0xD) {
2625 in++;
2626 if (*in == 0xA) {
2627 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002628 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002629 ctxt->input->line++;
2630 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002631 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002632 in--;
2633 }
2634 if (*in == '<') {
2635 return;
2636 }
2637 if (*in == '&') {
2638 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002639 }
2640 SHRINK;
2641 GROW;
2642 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002643 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002644 nbchar = 0;
2645 }
Daniel Veillard50582112001-03-26 22:52:16 +00002646 ctxt->input->line = line;
2647 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002648 xmlParseCharDataComplex(ctxt, cdata);
2649}
2650
2651void
2652xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002653 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2654 int nbchar = 0;
2655 int cur, l;
2656 int count = 0;
2657
2658 SHRINK;
2659 GROW;
2660 cur = CUR_CHAR(l);
2661 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2662 ((cur != '&') || (ctxt->token == '&')) &&
2663 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2664 if ((cur == ']') && (NXT(1) == ']') &&
2665 (NXT(2) == '>')) {
2666 if (cdata) break;
2667 else {
2668 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2670 ctxt->sax->error(ctxt->userData,
2671 "Sequence ']]>' not allowed in content\n");
2672 /* Should this be relaxed ??? I see a "must here */
2673 ctxt->wellFormed = 0;
2674 ctxt->disableSAX = 1;
2675 }
2676 }
2677 COPY_BUF(l,buf,nbchar,cur);
2678 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2679 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002680 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002681 */
2682 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2683 if (areBlanks(ctxt, buf, nbchar)) {
2684 if (ctxt->sax->ignorableWhitespace != NULL)
2685 ctxt->sax->ignorableWhitespace(ctxt->userData,
2686 buf, nbchar);
2687 } else {
2688 if (ctxt->sax->characters != NULL)
2689 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2690 }
2691 }
2692 nbchar = 0;
2693 }
2694 count++;
2695 if (count > 50) {
2696 GROW;
2697 count = 0;
2698 }
2699 NEXTL(l);
2700 cur = CUR_CHAR(l);
2701 }
2702 if (nbchar != 0) {
2703 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002704 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002705 */
2706 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2707 if (areBlanks(ctxt, buf, nbchar)) {
2708 if (ctxt->sax->ignorableWhitespace != NULL)
2709 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2710 } else {
2711 if (ctxt->sax->characters != NULL)
2712 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2713 }
2714 }
2715 }
2716}
2717
2718/**
2719 * xmlParseExternalID:
2720 * @ctxt: an XML parser context
2721 * @publicID: a xmlChar** receiving PubidLiteral
2722 * @strict: indicate whether we should restrict parsing to only
2723 * production [75], see NOTE below
2724 *
2725 * Parse an External ID or a Public ID
2726 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002727 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002728 * 'PUBLIC' S PubidLiteral S SystemLiteral
2729 *
2730 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2731 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2732 *
2733 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2734 *
2735 * Returns the function returns SystemLiteral and in the second
2736 * case publicID receives PubidLiteral, is strict is off
2737 * it is possible to return NULL and have publicID set.
2738 */
2739
2740xmlChar *
2741xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2742 xmlChar *URI = NULL;
2743
2744 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002745
2746 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002747 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2748 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2749 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2750 SKIP(6);
2751 if (!IS_BLANK(CUR)) {
2752 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2753 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2754 ctxt->sax->error(ctxt->userData,
2755 "Space required after 'SYSTEM'\n");
2756 ctxt->wellFormed = 0;
2757 ctxt->disableSAX = 1;
2758 }
2759 SKIP_BLANKS;
2760 URI = xmlParseSystemLiteral(ctxt);
2761 if (URI == NULL) {
2762 ctxt->errNo = XML_ERR_URI_REQUIRED;
2763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2764 ctxt->sax->error(ctxt->userData,
2765 "xmlParseExternalID: SYSTEM, no URI\n");
2766 ctxt->wellFormed = 0;
2767 ctxt->disableSAX = 1;
2768 }
2769 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2770 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2771 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2772 SKIP(6);
2773 if (!IS_BLANK(CUR)) {
2774 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2776 ctxt->sax->error(ctxt->userData,
2777 "Space required after 'PUBLIC'\n");
2778 ctxt->wellFormed = 0;
2779 ctxt->disableSAX = 1;
2780 }
2781 SKIP_BLANKS;
2782 *publicID = xmlParsePubidLiteral(ctxt);
2783 if (*publicID == NULL) {
2784 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2786 ctxt->sax->error(ctxt->userData,
2787 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2788 ctxt->wellFormed = 0;
2789 ctxt->disableSAX = 1;
2790 }
2791 if (strict) {
2792 /*
2793 * We don't handle [83] so "S SystemLiteral" is required.
2794 */
2795 if (!IS_BLANK(CUR)) {
2796 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2798 ctxt->sax->error(ctxt->userData,
2799 "Space required after the Public Identifier\n");
2800 ctxt->wellFormed = 0;
2801 ctxt->disableSAX = 1;
2802 }
2803 } else {
2804 /*
2805 * We handle [83] so we return immediately, if
2806 * "S SystemLiteral" is not detected. From a purely parsing
2807 * point of view that's a nice mess.
2808 */
2809 const xmlChar *ptr;
2810 GROW;
2811
2812 ptr = CUR_PTR;
2813 if (!IS_BLANK(*ptr)) return(NULL);
2814
2815 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2816 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2817 }
2818 SKIP_BLANKS;
2819 URI = xmlParseSystemLiteral(ctxt);
2820 if (URI == NULL) {
2821 ctxt->errNo = XML_ERR_URI_REQUIRED;
2822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2823 ctxt->sax->error(ctxt->userData,
2824 "xmlParseExternalID: PUBLIC, no URI\n");
2825 ctxt->wellFormed = 0;
2826 ctxt->disableSAX = 1;
2827 }
2828 }
2829 return(URI);
2830}
2831
2832/**
2833 * xmlParseComment:
2834 * @ctxt: an XML parser context
2835 *
2836 * Skip an XML (SGML) comment <!-- .... -->
2837 * The spec says that "For compatibility, the string "--" (double-hyphen)
2838 * must not occur within comments. "
2839 *
2840 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2841 */
2842void
2843xmlParseComment(xmlParserCtxtPtr ctxt) {
2844 xmlChar *buf = NULL;
2845 int len;
2846 int size = XML_PARSER_BUFFER_SIZE;
2847 int q, ql;
2848 int r, rl;
2849 int cur, l;
2850 xmlParserInputState state;
2851 xmlParserInputPtr input = ctxt->input;
2852 int count = 0;
2853
2854 /*
2855 * Check that there is a comment right here.
2856 */
2857 if ((RAW != '<') || (NXT(1) != '!') ||
2858 (NXT(2) != '-') || (NXT(3) != '-')) return;
2859
2860 state = ctxt->instate;
2861 ctxt->instate = XML_PARSER_COMMENT;
2862 SHRINK;
2863 SKIP(4);
2864 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2865 if (buf == NULL) {
2866 xmlGenericError(xmlGenericErrorContext,
2867 "malloc of %d byte failed\n", size);
2868 ctxt->instate = state;
2869 return;
2870 }
2871 q = CUR_CHAR(ql);
2872 NEXTL(ql);
2873 r = CUR_CHAR(rl);
2874 NEXTL(rl);
2875 cur = CUR_CHAR(l);
2876 len = 0;
2877 while (IS_CHAR(cur) && /* checked */
2878 ((cur != '>') ||
2879 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002880 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002881 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2883 ctxt->sax->error(ctxt->userData,
2884 "Comment must not contain '--' (double-hyphen)`\n");
2885 ctxt->wellFormed = 0;
2886 ctxt->disableSAX = 1;
2887 }
2888 if (len + 5 >= size) {
2889 size *= 2;
2890 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2891 if (buf == NULL) {
2892 xmlGenericError(xmlGenericErrorContext,
2893 "realloc of %d byte failed\n", size);
2894 ctxt->instate = state;
2895 return;
2896 }
2897 }
2898 COPY_BUF(ql,buf,len,q);
2899 q = r;
2900 ql = rl;
2901 r = cur;
2902 rl = l;
2903
2904 count++;
2905 if (count > 50) {
2906 GROW;
2907 count = 0;
2908 }
2909 NEXTL(l);
2910 cur = CUR_CHAR(l);
2911 if (cur == 0) {
2912 SHRINK;
2913 GROW;
2914 cur = CUR_CHAR(l);
2915 }
2916 }
2917 buf[len] = 0;
2918 if (!IS_CHAR(cur)) {
2919 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2921 ctxt->sax->error(ctxt->userData,
2922 "Comment not terminated \n<!--%.50s\n", buf);
2923 ctxt->wellFormed = 0;
2924 ctxt->disableSAX = 1;
2925 xmlFree(buf);
2926 } else {
2927 if (input != ctxt->input) {
2928 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2930 ctxt->sax->error(ctxt->userData,
2931"Comment doesn't start and stop in the same entity\n");
2932 ctxt->wellFormed = 0;
2933 ctxt->disableSAX = 1;
2934 }
2935 NEXT;
2936 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2937 (!ctxt->disableSAX))
2938 ctxt->sax->comment(ctxt->userData, buf);
2939 xmlFree(buf);
2940 }
2941 ctxt->instate = state;
2942}
2943
2944/**
2945 * xmlParsePITarget:
2946 * @ctxt: an XML parser context
2947 *
2948 * parse the name of a PI
2949 *
2950 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2951 *
2952 * Returns the PITarget name or NULL
2953 */
2954
2955xmlChar *
2956xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2957 xmlChar *name;
2958
2959 name = xmlParseName(ctxt);
2960 if ((name != NULL) &&
2961 ((name[0] == 'x') || (name[0] == 'X')) &&
2962 ((name[1] == 'm') || (name[1] == 'M')) &&
2963 ((name[2] == 'l') || (name[2] == 'L'))) {
2964 int i;
2965 if ((name[0] == 'x') && (name[1] == 'm') &&
2966 (name[2] == 'l') && (name[3] == 0)) {
2967 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2969 ctxt->sax->error(ctxt->userData,
2970 "XML declaration allowed only at the start of the document\n");
2971 ctxt->wellFormed = 0;
2972 ctxt->disableSAX = 1;
2973 return(name);
2974 } else if (name[3] == 0) {
2975 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2977 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2978 ctxt->wellFormed = 0;
2979 ctxt->disableSAX = 1;
2980 return(name);
2981 }
2982 for (i = 0;;i++) {
2983 if (xmlW3CPIs[i] == NULL) break;
2984 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2985 return(name);
2986 }
2987 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2988 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2989 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002990 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002991 }
2992 }
2993 return(name);
2994}
2995
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002996#ifdef LIBXML_CATALOG_ENABLED
2997/**
2998 * xmlParseCatalogPI:
2999 * @ctxt: an XML parser context
3000 * @catalog: the PI value string
3001 *
3002 * parse an XML Catalog Processing Instruction.
3003 *
3004 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3005 *
3006 * Occurs only if allowed by the user and if happening in the Misc
3007 * part of the document before any doctype informations
3008 * This will add the given catalog to the parsing context in order
3009 * to be used if there is a resolution need further down in the document
3010 */
3011
3012static void
3013xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3014 xmlChar *URL = NULL;
3015 const xmlChar *tmp, *base;
3016 xmlChar marker;
3017
3018 tmp = catalog;
3019 while (IS_BLANK(*tmp)) tmp++;
3020 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3021 goto error;
3022 tmp += 7;
3023 while (IS_BLANK(*tmp)) tmp++;
3024 if (*tmp != '=') {
3025 return;
3026 }
3027 tmp++;
3028 while (IS_BLANK(*tmp)) tmp++;
3029 marker = *tmp;
3030 if ((marker != '\'') && (marker != '"'))
3031 goto error;
3032 tmp++;
3033 base = tmp;
3034 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3035 if (*tmp == 0)
3036 goto error;
3037 URL = xmlStrndup(base, tmp - base);
3038 tmp++;
3039 while (IS_BLANK(*tmp)) tmp++;
3040 if (*tmp != 0)
3041 goto error;
3042
3043 if (URL != NULL) {
3044 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3045 xmlFree(URL);
3046 }
3047 return;
3048
3049error:
3050 ctxt->errNo = XML_WAR_CATALOG_PI;
3051 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3052 ctxt->sax->warning(ctxt->userData,
3053 "Catalog PI syntax error: %s\n", catalog);
3054 if (URL != NULL)
3055 xmlFree(URL);
3056}
3057#endif
3058
Owen Taylor3473f882001-02-23 17:55:21 +00003059/**
3060 * xmlParsePI:
3061 * @ctxt: an XML parser context
3062 *
3063 * parse an XML Processing Instruction.
3064 *
3065 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3066 *
3067 * The processing is transfered to SAX once parsed.
3068 */
3069
3070void
3071xmlParsePI(xmlParserCtxtPtr ctxt) {
3072 xmlChar *buf = NULL;
3073 int len = 0;
3074 int size = XML_PARSER_BUFFER_SIZE;
3075 int cur, l;
3076 xmlChar *target;
3077 xmlParserInputState state;
3078 int count = 0;
3079
3080 if ((RAW == '<') && (NXT(1) == '?')) {
3081 xmlParserInputPtr input = ctxt->input;
3082 state = ctxt->instate;
3083 ctxt->instate = XML_PARSER_PI;
3084 /*
3085 * this is a Processing Instruction.
3086 */
3087 SKIP(2);
3088 SHRINK;
3089
3090 /*
3091 * Parse the target name and check for special support like
3092 * namespace.
3093 */
3094 target = xmlParsePITarget(ctxt);
3095 if (target != NULL) {
3096 if ((RAW == '?') && (NXT(1) == '>')) {
3097 if (input != ctxt->input) {
3098 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3100 ctxt->sax->error(ctxt->userData,
3101 "PI declaration doesn't start and stop in the same entity\n");
3102 ctxt->wellFormed = 0;
3103 ctxt->disableSAX = 1;
3104 }
3105 SKIP(2);
3106
3107 /*
3108 * SAX: PI detected.
3109 */
3110 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3111 (ctxt->sax->processingInstruction != NULL))
3112 ctxt->sax->processingInstruction(ctxt->userData,
3113 target, NULL);
3114 ctxt->instate = state;
3115 xmlFree(target);
3116 return;
3117 }
3118 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3119 if (buf == NULL) {
3120 xmlGenericError(xmlGenericErrorContext,
3121 "malloc of %d byte failed\n", size);
3122 ctxt->instate = state;
3123 return;
3124 }
3125 cur = CUR;
3126 if (!IS_BLANK(cur)) {
3127 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3129 ctxt->sax->error(ctxt->userData,
3130 "xmlParsePI: PI %s space expected\n", target);
3131 ctxt->wellFormed = 0;
3132 ctxt->disableSAX = 1;
3133 }
3134 SKIP_BLANKS;
3135 cur = CUR_CHAR(l);
3136 while (IS_CHAR(cur) && /* checked */
3137 ((cur != '?') || (NXT(1) != '>'))) {
3138 if (len + 5 >= size) {
3139 size *= 2;
3140 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3141 if (buf == NULL) {
3142 xmlGenericError(xmlGenericErrorContext,
3143 "realloc of %d byte failed\n", size);
3144 ctxt->instate = state;
3145 return;
3146 }
3147 }
3148 count++;
3149 if (count > 50) {
3150 GROW;
3151 count = 0;
3152 }
3153 COPY_BUF(l,buf,len,cur);
3154 NEXTL(l);
3155 cur = CUR_CHAR(l);
3156 if (cur == 0) {
3157 SHRINK;
3158 GROW;
3159 cur = CUR_CHAR(l);
3160 }
3161 }
3162 buf[len] = 0;
3163 if (cur != '?') {
3164 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3166 ctxt->sax->error(ctxt->userData,
3167 "xmlParsePI: PI %s never end ...\n", target);
3168 ctxt->wellFormed = 0;
3169 ctxt->disableSAX = 1;
3170 } else {
3171 if (input != ctxt->input) {
3172 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3173 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3174 ctxt->sax->error(ctxt->userData,
3175 "PI declaration doesn't start and stop in the same entity\n");
3176 ctxt->wellFormed = 0;
3177 ctxt->disableSAX = 1;
3178 }
3179 SKIP(2);
3180
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003181#ifdef LIBXML_CATALOG_ENABLED
3182 if (((state == XML_PARSER_MISC) ||
3183 (state == XML_PARSER_START)) &&
3184 (xmlStrEqual(target, XML_CATALOG_PI))) {
3185 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3186 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3187 (allow == XML_CATA_ALLOW_ALL))
3188 xmlParseCatalogPI(ctxt, buf);
3189 }
3190#endif
3191
3192
Owen Taylor3473f882001-02-23 17:55:21 +00003193 /*
3194 * SAX: PI detected.
3195 */
3196 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3197 (ctxt->sax->processingInstruction != NULL))
3198 ctxt->sax->processingInstruction(ctxt->userData,
3199 target, buf);
3200 }
3201 xmlFree(buf);
3202 xmlFree(target);
3203 } else {
3204 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3206 ctxt->sax->error(ctxt->userData,
3207 "xmlParsePI : no target name\n");
3208 ctxt->wellFormed = 0;
3209 ctxt->disableSAX = 1;
3210 }
3211 ctxt->instate = state;
3212 }
3213}
3214
3215/**
3216 * xmlParseNotationDecl:
3217 * @ctxt: an XML parser context
3218 *
3219 * parse a notation declaration
3220 *
3221 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3222 *
3223 * Hence there is actually 3 choices:
3224 * 'PUBLIC' S PubidLiteral
3225 * 'PUBLIC' S PubidLiteral S SystemLiteral
3226 * and 'SYSTEM' S SystemLiteral
3227 *
3228 * See the NOTE on xmlParseExternalID().
3229 */
3230
3231void
3232xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3233 xmlChar *name;
3234 xmlChar *Pubid;
3235 xmlChar *Systemid;
3236
3237 if ((RAW == '<') && (NXT(1) == '!') &&
3238 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3239 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3240 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3241 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3242 xmlParserInputPtr input = ctxt->input;
3243 SHRINK;
3244 SKIP(10);
3245 if (!IS_BLANK(CUR)) {
3246 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3248 ctxt->sax->error(ctxt->userData,
3249 "Space required after '<!NOTATION'\n");
3250 ctxt->wellFormed = 0;
3251 ctxt->disableSAX = 1;
3252 return;
3253 }
3254 SKIP_BLANKS;
3255
Daniel Veillard76d66f42001-05-16 21:05:17 +00003256 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003257 if (name == NULL) {
3258 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3259 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3260 ctxt->sax->error(ctxt->userData,
3261 "NOTATION: Name expected here\n");
3262 ctxt->wellFormed = 0;
3263 ctxt->disableSAX = 1;
3264 return;
3265 }
3266 if (!IS_BLANK(CUR)) {
3267 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3269 ctxt->sax->error(ctxt->userData,
3270 "Space required after the NOTATION name'\n");
3271 ctxt->wellFormed = 0;
3272 ctxt->disableSAX = 1;
3273 return;
3274 }
3275 SKIP_BLANKS;
3276
3277 /*
3278 * Parse the IDs.
3279 */
3280 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3281 SKIP_BLANKS;
3282
3283 if (RAW == '>') {
3284 if (input != ctxt->input) {
3285 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3287 ctxt->sax->error(ctxt->userData,
3288"Notation declaration doesn't start and stop in the same entity\n");
3289 ctxt->wellFormed = 0;
3290 ctxt->disableSAX = 1;
3291 }
3292 NEXT;
3293 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3294 (ctxt->sax->notationDecl != NULL))
3295 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3296 } else {
3297 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3298 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3299 ctxt->sax->error(ctxt->userData,
3300 "'>' required to close NOTATION declaration\n");
3301 ctxt->wellFormed = 0;
3302 ctxt->disableSAX = 1;
3303 }
3304 xmlFree(name);
3305 if (Systemid != NULL) xmlFree(Systemid);
3306 if (Pubid != NULL) xmlFree(Pubid);
3307 }
3308}
3309
3310/**
3311 * xmlParseEntityDecl:
3312 * @ctxt: an XML parser context
3313 *
3314 * parse <!ENTITY declarations
3315 *
3316 * [70] EntityDecl ::= GEDecl | PEDecl
3317 *
3318 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3319 *
3320 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3321 *
3322 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3323 *
3324 * [74] PEDef ::= EntityValue | ExternalID
3325 *
3326 * [76] NDataDecl ::= S 'NDATA' S Name
3327 *
3328 * [ VC: Notation Declared ]
3329 * The Name must match the declared name of a notation.
3330 */
3331
3332void
3333xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3334 xmlChar *name = NULL;
3335 xmlChar *value = NULL;
3336 xmlChar *URI = NULL, *literal = NULL;
3337 xmlChar *ndata = NULL;
3338 int isParameter = 0;
3339 xmlChar *orig = NULL;
3340
3341 GROW;
3342 if ((RAW == '<') && (NXT(1) == '!') &&
3343 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3344 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3345 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3346 xmlParserInputPtr input = ctxt->input;
3347 ctxt->instate = XML_PARSER_ENTITY_DECL;
3348 SHRINK;
3349 SKIP(8);
3350 if (!IS_BLANK(CUR)) {
3351 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3353 ctxt->sax->error(ctxt->userData,
3354 "Space required after '<!ENTITY'\n");
3355 ctxt->wellFormed = 0;
3356 ctxt->disableSAX = 1;
3357 }
3358 SKIP_BLANKS;
3359
3360 if (RAW == '%') {
3361 NEXT;
3362 if (!IS_BLANK(CUR)) {
3363 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3365 ctxt->sax->error(ctxt->userData,
3366 "Space required after '%'\n");
3367 ctxt->wellFormed = 0;
3368 ctxt->disableSAX = 1;
3369 }
3370 SKIP_BLANKS;
3371 isParameter = 1;
3372 }
3373
Daniel Veillard76d66f42001-05-16 21:05:17 +00003374 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003375 if (name == NULL) {
3376 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3378 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3379 ctxt->wellFormed = 0;
3380 ctxt->disableSAX = 1;
3381 return;
3382 }
3383 if (!IS_BLANK(CUR)) {
3384 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3386 ctxt->sax->error(ctxt->userData,
3387 "Space required after the entity name\n");
3388 ctxt->wellFormed = 0;
3389 ctxt->disableSAX = 1;
3390 }
3391 SKIP_BLANKS;
3392
3393 /*
3394 * handle the various case of definitions...
3395 */
3396 if (isParameter) {
3397 if ((RAW == '"') || (RAW == '\'')) {
3398 value = xmlParseEntityValue(ctxt, &orig);
3399 if (value) {
3400 if ((ctxt->sax != NULL) &&
3401 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3402 ctxt->sax->entityDecl(ctxt->userData, name,
3403 XML_INTERNAL_PARAMETER_ENTITY,
3404 NULL, NULL, value);
3405 }
3406 } else {
3407 URI = xmlParseExternalID(ctxt, &literal, 1);
3408 if ((URI == NULL) && (literal == NULL)) {
3409 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3411 ctxt->sax->error(ctxt->userData,
3412 "Entity value required\n");
3413 ctxt->wellFormed = 0;
3414 ctxt->disableSAX = 1;
3415 }
3416 if (URI) {
3417 xmlURIPtr uri;
3418
3419 uri = xmlParseURI((const char *) URI);
3420 if (uri == NULL) {
3421 ctxt->errNo = XML_ERR_INVALID_URI;
3422 if ((ctxt->sax != NULL) &&
3423 (!ctxt->disableSAX) &&
3424 (ctxt->sax->error != NULL))
3425 ctxt->sax->error(ctxt->userData,
3426 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003427 /*
3428 * This really ought to be a well formedness error
3429 * but the XML Core WG decided otherwise c.f. issue
3430 * E26 of the XML erratas.
3431 */
Owen Taylor3473f882001-02-23 17:55:21 +00003432 } else {
3433 if (uri->fragment != NULL) {
3434 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3435 if ((ctxt->sax != NULL) &&
3436 (!ctxt->disableSAX) &&
3437 (ctxt->sax->error != NULL))
3438 ctxt->sax->error(ctxt->userData,
3439 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003440 /*
3441 * Okay this is foolish to block those but not
3442 * invalid URIs.
3443 */
Owen Taylor3473f882001-02-23 17:55:21 +00003444 ctxt->wellFormed = 0;
3445 } else {
3446 if ((ctxt->sax != NULL) &&
3447 (!ctxt->disableSAX) &&
3448 (ctxt->sax->entityDecl != NULL))
3449 ctxt->sax->entityDecl(ctxt->userData, name,
3450 XML_EXTERNAL_PARAMETER_ENTITY,
3451 literal, URI, NULL);
3452 }
3453 xmlFreeURI(uri);
3454 }
3455 }
3456 }
3457 } else {
3458 if ((RAW == '"') || (RAW == '\'')) {
3459 value = xmlParseEntityValue(ctxt, &orig);
3460 if ((ctxt->sax != NULL) &&
3461 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3462 ctxt->sax->entityDecl(ctxt->userData, name,
3463 XML_INTERNAL_GENERAL_ENTITY,
3464 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003465 /*
3466 * For expat compatibility in SAX mode.
3467 */
3468 if ((ctxt->myDoc == NULL) ||
3469 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3470 if (ctxt->myDoc == NULL) {
3471 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3472 }
3473 if (ctxt->myDoc->intSubset == NULL)
3474 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3475 BAD_CAST "fake", NULL, NULL);
3476
3477 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3478 NULL, NULL, value);
3479 }
Owen Taylor3473f882001-02-23 17:55:21 +00003480 } else {
3481 URI = xmlParseExternalID(ctxt, &literal, 1);
3482 if ((URI == NULL) && (literal == NULL)) {
3483 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3485 ctxt->sax->error(ctxt->userData,
3486 "Entity value required\n");
3487 ctxt->wellFormed = 0;
3488 ctxt->disableSAX = 1;
3489 }
3490 if (URI) {
3491 xmlURIPtr uri;
3492
3493 uri = xmlParseURI((const char *)URI);
3494 if (uri == NULL) {
3495 ctxt->errNo = XML_ERR_INVALID_URI;
3496 if ((ctxt->sax != NULL) &&
3497 (!ctxt->disableSAX) &&
3498 (ctxt->sax->error != NULL))
3499 ctxt->sax->error(ctxt->userData,
3500 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003501 /*
3502 * This really ought to be a well formedness error
3503 * but the XML Core WG decided otherwise c.f. issue
3504 * E26 of the XML erratas.
3505 */
Owen Taylor3473f882001-02-23 17:55:21 +00003506 } else {
3507 if (uri->fragment != NULL) {
3508 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3509 if ((ctxt->sax != NULL) &&
3510 (!ctxt->disableSAX) &&
3511 (ctxt->sax->error != NULL))
3512 ctxt->sax->error(ctxt->userData,
3513 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003514 /*
3515 * Okay this is foolish to block those but not
3516 * invalid URIs.
3517 */
Owen Taylor3473f882001-02-23 17:55:21 +00003518 ctxt->wellFormed = 0;
3519 }
3520 xmlFreeURI(uri);
3521 }
3522 }
3523 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3524 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3526 ctxt->sax->error(ctxt->userData,
3527 "Space required before 'NDATA'\n");
3528 ctxt->wellFormed = 0;
3529 ctxt->disableSAX = 1;
3530 }
3531 SKIP_BLANKS;
3532 if ((RAW == 'N') && (NXT(1) == 'D') &&
3533 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3534 (NXT(4) == 'A')) {
3535 SKIP(5);
3536 if (!IS_BLANK(CUR)) {
3537 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3539 ctxt->sax->error(ctxt->userData,
3540 "Space required after 'NDATA'\n");
3541 ctxt->wellFormed = 0;
3542 ctxt->disableSAX = 1;
3543 }
3544 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003545 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003546 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3547 (ctxt->sax->unparsedEntityDecl != NULL))
3548 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3549 literal, URI, ndata);
3550 } else {
3551 if ((ctxt->sax != NULL) &&
3552 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3553 ctxt->sax->entityDecl(ctxt->userData, name,
3554 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3555 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003556 /*
3557 * For expat compatibility in SAX mode.
3558 * assuming the entity repalcement was asked for
3559 */
3560 if ((ctxt->replaceEntities != 0) &&
3561 ((ctxt->myDoc == NULL) ||
3562 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3563 if (ctxt->myDoc == NULL) {
3564 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3565 }
3566
3567 if (ctxt->myDoc->intSubset == NULL)
3568 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3569 BAD_CAST "fake", NULL, NULL);
3570 entityDecl(ctxt, name,
3571 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3572 literal, URI, NULL);
3573 }
Owen Taylor3473f882001-02-23 17:55:21 +00003574 }
3575 }
3576 }
3577 SKIP_BLANKS;
3578 if (RAW != '>') {
3579 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3581 ctxt->sax->error(ctxt->userData,
3582 "xmlParseEntityDecl: entity %s not terminated\n", name);
3583 ctxt->wellFormed = 0;
3584 ctxt->disableSAX = 1;
3585 } else {
3586 if (input != ctxt->input) {
3587 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3589 ctxt->sax->error(ctxt->userData,
3590"Entity declaration doesn't start and stop in the same entity\n");
3591 ctxt->wellFormed = 0;
3592 ctxt->disableSAX = 1;
3593 }
3594 NEXT;
3595 }
3596 if (orig != NULL) {
3597 /*
3598 * Ugly mechanism to save the raw entity value.
3599 */
3600 xmlEntityPtr cur = NULL;
3601
3602 if (isParameter) {
3603 if ((ctxt->sax != NULL) &&
3604 (ctxt->sax->getParameterEntity != NULL))
3605 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3606 } else {
3607 if ((ctxt->sax != NULL) &&
3608 (ctxt->sax->getEntity != NULL))
3609 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003610 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3611 cur = getEntity(ctxt, name);
3612 }
Owen Taylor3473f882001-02-23 17:55:21 +00003613 }
3614 if (cur != NULL) {
3615 if (cur->orig != NULL)
3616 xmlFree(orig);
3617 else
3618 cur->orig = orig;
3619 } else
3620 xmlFree(orig);
3621 }
3622 if (name != NULL) xmlFree(name);
3623 if (value != NULL) xmlFree(value);
3624 if (URI != NULL) xmlFree(URI);
3625 if (literal != NULL) xmlFree(literal);
3626 if (ndata != NULL) xmlFree(ndata);
3627 }
3628}
3629
3630/**
3631 * xmlParseDefaultDecl:
3632 * @ctxt: an XML parser context
3633 * @value: Receive a possible fixed default value for the attribute
3634 *
3635 * Parse an attribute default declaration
3636 *
3637 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3638 *
3639 * [ VC: Required Attribute ]
3640 * if the default declaration is the keyword #REQUIRED, then the
3641 * attribute must be specified for all elements of the type in the
3642 * attribute-list declaration.
3643 *
3644 * [ VC: Attribute Default Legal ]
3645 * The declared default value must meet the lexical constraints of
3646 * the declared attribute type c.f. xmlValidateAttributeDecl()
3647 *
3648 * [ VC: Fixed Attribute Default ]
3649 * if an attribute has a default value declared with the #FIXED
3650 * keyword, instances of that attribute must match the default value.
3651 *
3652 * [ WFC: No < in Attribute Values ]
3653 * handled in xmlParseAttValue()
3654 *
3655 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3656 * or XML_ATTRIBUTE_FIXED.
3657 */
3658
3659int
3660xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3661 int val;
3662 xmlChar *ret;
3663
3664 *value = NULL;
3665 if ((RAW == '#') && (NXT(1) == 'R') &&
3666 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3667 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3668 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3669 (NXT(8) == 'D')) {
3670 SKIP(9);
3671 return(XML_ATTRIBUTE_REQUIRED);
3672 }
3673 if ((RAW == '#') && (NXT(1) == 'I') &&
3674 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3675 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3676 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3677 SKIP(8);
3678 return(XML_ATTRIBUTE_IMPLIED);
3679 }
3680 val = XML_ATTRIBUTE_NONE;
3681 if ((RAW == '#') && (NXT(1) == 'F') &&
3682 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3683 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3684 SKIP(6);
3685 val = XML_ATTRIBUTE_FIXED;
3686 if (!IS_BLANK(CUR)) {
3687 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3689 ctxt->sax->error(ctxt->userData,
3690 "Space required after '#FIXED'\n");
3691 ctxt->wellFormed = 0;
3692 ctxt->disableSAX = 1;
3693 }
3694 SKIP_BLANKS;
3695 }
3696 ret = xmlParseAttValue(ctxt);
3697 ctxt->instate = XML_PARSER_DTD;
3698 if (ret == NULL) {
3699 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3700 ctxt->sax->error(ctxt->userData,
3701 "Attribute default value declaration error\n");
3702 ctxt->wellFormed = 0;
3703 ctxt->disableSAX = 1;
3704 } else
3705 *value = ret;
3706 return(val);
3707}
3708
3709/**
3710 * xmlParseNotationType:
3711 * @ctxt: an XML parser context
3712 *
3713 * parse an Notation attribute type.
3714 *
3715 * Note: the leading 'NOTATION' S part has already being parsed...
3716 *
3717 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3718 *
3719 * [ VC: Notation Attributes ]
3720 * Values of this type must match one of the notation names included
3721 * in the declaration; all notation names in the declaration must be declared.
3722 *
3723 * Returns: the notation attribute tree built while parsing
3724 */
3725
3726xmlEnumerationPtr
3727xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3728 xmlChar *name;
3729 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3730
3731 if (RAW != '(') {
3732 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3734 ctxt->sax->error(ctxt->userData,
3735 "'(' required to start 'NOTATION'\n");
3736 ctxt->wellFormed = 0;
3737 ctxt->disableSAX = 1;
3738 return(NULL);
3739 }
3740 SHRINK;
3741 do {
3742 NEXT;
3743 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003744 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003745 if (name == NULL) {
3746 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3748 ctxt->sax->error(ctxt->userData,
3749 "Name expected in NOTATION declaration\n");
3750 ctxt->wellFormed = 0;
3751 ctxt->disableSAX = 1;
3752 return(ret);
3753 }
3754 cur = xmlCreateEnumeration(name);
3755 xmlFree(name);
3756 if (cur == NULL) return(ret);
3757 if (last == NULL) ret = last = cur;
3758 else {
3759 last->next = cur;
3760 last = cur;
3761 }
3762 SKIP_BLANKS;
3763 } while (RAW == '|');
3764 if (RAW != ')') {
3765 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3767 ctxt->sax->error(ctxt->userData,
3768 "')' required to finish NOTATION declaration\n");
3769 ctxt->wellFormed = 0;
3770 ctxt->disableSAX = 1;
3771 if ((last != NULL) && (last != ret))
3772 xmlFreeEnumeration(last);
3773 return(ret);
3774 }
3775 NEXT;
3776 return(ret);
3777}
3778
3779/**
3780 * xmlParseEnumerationType:
3781 * @ctxt: an XML parser context
3782 *
3783 * parse an Enumeration attribute type.
3784 *
3785 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3786 *
3787 * [ VC: Enumeration ]
3788 * Values of this type must match one of the Nmtoken tokens in
3789 * the declaration
3790 *
3791 * Returns: the enumeration attribute tree built while parsing
3792 */
3793
3794xmlEnumerationPtr
3795xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3796 xmlChar *name;
3797 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3798
3799 if (RAW != '(') {
3800 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3802 ctxt->sax->error(ctxt->userData,
3803 "'(' required to start ATTLIST enumeration\n");
3804 ctxt->wellFormed = 0;
3805 ctxt->disableSAX = 1;
3806 return(NULL);
3807 }
3808 SHRINK;
3809 do {
3810 NEXT;
3811 SKIP_BLANKS;
3812 name = xmlParseNmtoken(ctxt);
3813 if (name == NULL) {
3814 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3815 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3816 ctxt->sax->error(ctxt->userData,
3817 "NmToken expected in ATTLIST enumeration\n");
3818 ctxt->wellFormed = 0;
3819 ctxt->disableSAX = 1;
3820 return(ret);
3821 }
3822 cur = xmlCreateEnumeration(name);
3823 xmlFree(name);
3824 if (cur == NULL) return(ret);
3825 if (last == NULL) ret = last = cur;
3826 else {
3827 last->next = cur;
3828 last = cur;
3829 }
3830 SKIP_BLANKS;
3831 } while (RAW == '|');
3832 if (RAW != ')') {
3833 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3834 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3835 ctxt->sax->error(ctxt->userData,
3836 "')' required to finish ATTLIST enumeration\n");
3837 ctxt->wellFormed = 0;
3838 ctxt->disableSAX = 1;
3839 return(ret);
3840 }
3841 NEXT;
3842 return(ret);
3843}
3844
3845/**
3846 * xmlParseEnumeratedType:
3847 * @ctxt: an XML parser context
3848 * @tree: the enumeration tree built while parsing
3849 *
3850 * parse an Enumerated attribute type.
3851 *
3852 * [57] EnumeratedType ::= NotationType | Enumeration
3853 *
3854 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3855 *
3856 *
3857 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3858 */
3859
3860int
3861xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3862 if ((RAW == 'N') && (NXT(1) == 'O') &&
3863 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3864 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3865 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3866 SKIP(8);
3867 if (!IS_BLANK(CUR)) {
3868 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3870 ctxt->sax->error(ctxt->userData,
3871 "Space required after 'NOTATION'\n");
3872 ctxt->wellFormed = 0;
3873 ctxt->disableSAX = 1;
3874 return(0);
3875 }
3876 SKIP_BLANKS;
3877 *tree = xmlParseNotationType(ctxt);
3878 if (*tree == NULL) return(0);
3879 return(XML_ATTRIBUTE_NOTATION);
3880 }
3881 *tree = xmlParseEnumerationType(ctxt);
3882 if (*tree == NULL) return(0);
3883 return(XML_ATTRIBUTE_ENUMERATION);
3884}
3885
3886/**
3887 * xmlParseAttributeType:
3888 * @ctxt: an XML parser context
3889 * @tree: the enumeration tree built while parsing
3890 *
3891 * parse the Attribute list def for an element
3892 *
3893 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3894 *
3895 * [55] StringType ::= 'CDATA'
3896 *
3897 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3898 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3899 *
3900 * Validity constraints for attribute values syntax are checked in
3901 * xmlValidateAttributeValue()
3902 *
3903 * [ VC: ID ]
3904 * Values of type ID must match the Name production. A name must not
3905 * appear more than once in an XML document as a value of this type;
3906 * i.e., ID values must uniquely identify the elements which bear them.
3907 *
3908 * [ VC: One ID per Element Type ]
3909 * No element type may have more than one ID attribute specified.
3910 *
3911 * [ VC: ID Attribute Default ]
3912 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3913 *
3914 * [ VC: IDREF ]
3915 * Values of type IDREF must match the Name production, and values
3916 * of type IDREFS must match Names; each IDREF Name must match the value
3917 * of an ID attribute on some element in the XML document; i.e. IDREF
3918 * values must match the value of some ID attribute.
3919 *
3920 * [ VC: Entity Name ]
3921 * Values of type ENTITY must match the Name production, values
3922 * of type ENTITIES must match Names; each Entity Name must match the
3923 * name of an unparsed entity declared in the DTD.
3924 *
3925 * [ VC: Name Token ]
3926 * Values of type NMTOKEN must match the Nmtoken production; values
3927 * of type NMTOKENS must match Nmtokens.
3928 *
3929 * Returns the attribute type
3930 */
3931int
3932xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3933 SHRINK;
3934 if ((RAW == 'C') && (NXT(1) == 'D') &&
3935 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3936 (NXT(4) == 'A')) {
3937 SKIP(5);
3938 return(XML_ATTRIBUTE_CDATA);
3939 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3940 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3941 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3942 SKIP(6);
3943 return(XML_ATTRIBUTE_IDREFS);
3944 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3945 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3946 (NXT(4) == 'F')) {
3947 SKIP(5);
3948 return(XML_ATTRIBUTE_IDREF);
3949 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3950 SKIP(2);
3951 return(XML_ATTRIBUTE_ID);
3952 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3953 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3954 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3955 SKIP(6);
3956 return(XML_ATTRIBUTE_ENTITY);
3957 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3958 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3959 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3960 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3961 SKIP(8);
3962 return(XML_ATTRIBUTE_ENTITIES);
3963 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3964 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3965 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3966 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3967 SKIP(8);
3968 return(XML_ATTRIBUTE_NMTOKENS);
3969 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3970 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3971 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3972 (NXT(6) == 'N')) {
3973 SKIP(7);
3974 return(XML_ATTRIBUTE_NMTOKEN);
3975 }
3976 return(xmlParseEnumeratedType(ctxt, tree));
3977}
3978
3979/**
3980 * xmlParseAttributeListDecl:
3981 * @ctxt: an XML parser context
3982 *
3983 * : parse the Attribute list def for an element
3984 *
3985 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3986 *
3987 * [53] AttDef ::= S Name S AttType S DefaultDecl
3988 *
3989 */
3990void
3991xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3992 xmlChar *elemName;
3993 xmlChar *attrName;
3994 xmlEnumerationPtr tree;
3995
3996 if ((RAW == '<') && (NXT(1) == '!') &&
3997 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3998 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3999 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4000 (NXT(8) == 'T')) {
4001 xmlParserInputPtr input = ctxt->input;
4002
4003 SKIP(9);
4004 if (!IS_BLANK(CUR)) {
4005 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4007 ctxt->sax->error(ctxt->userData,
4008 "Space required after '<!ATTLIST'\n");
4009 ctxt->wellFormed = 0;
4010 ctxt->disableSAX = 1;
4011 }
4012 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004013 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004014 if (elemName == NULL) {
4015 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4017 ctxt->sax->error(ctxt->userData,
4018 "ATTLIST: no name for Element\n");
4019 ctxt->wellFormed = 0;
4020 ctxt->disableSAX = 1;
4021 return;
4022 }
4023 SKIP_BLANKS;
4024 GROW;
4025 while (RAW != '>') {
4026 const xmlChar *check = CUR_PTR;
4027 int type;
4028 int def;
4029 xmlChar *defaultValue = NULL;
4030
4031 GROW;
4032 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004033 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004034 if (attrName == NULL) {
4035 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4037 ctxt->sax->error(ctxt->userData,
4038 "ATTLIST: no name for Attribute\n");
4039 ctxt->wellFormed = 0;
4040 ctxt->disableSAX = 1;
4041 break;
4042 }
4043 GROW;
4044 if (!IS_BLANK(CUR)) {
4045 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4047 ctxt->sax->error(ctxt->userData,
4048 "Space required after the attribute name\n");
4049 ctxt->wellFormed = 0;
4050 ctxt->disableSAX = 1;
4051 if (attrName != NULL)
4052 xmlFree(attrName);
4053 if (defaultValue != NULL)
4054 xmlFree(defaultValue);
4055 break;
4056 }
4057 SKIP_BLANKS;
4058
4059 type = xmlParseAttributeType(ctxt, &tree);
4060 if (type <= 0) {
4061 if (attrName != NULL)
4062 xmlFree(attrName);
4063 if (defaultValue != NULL)
4064 xmlFree(defaultValue);
4065 break;
4066 }
4067
4068 GROW;
4069 if (!IS_BLANK(CUR)) {
4070 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4072 ctxt->sax->error(ctxt->userData,
4073 "Space required after the attribute type\n");
4074 ctxt->wellFormed = 0;
4075 ctxt->disableSAX = 1;
4076 if (attrName != NULL)
4077 xmlFree(attrName);
4078 if (defaultValue != NULL)
4079 xmlFree(defaultValue);
4080 if (tree != NULL)
4081 xmlFreeEnumeration(tree);
4082 break;
4083 }
4084 SKIP_BLANKS;
4085
4086 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4087 if (def <= 0) {
4088 if (attrName != NULL)
4089 xmlFree(attrName);
4090 if (defaultValue != NULL)
4091 xmlFree(defaultValue);
4092 if (tree != NULL)
4093 xmlFreeEnumeration(tree);
4094 break;
4095 }
4096
4097 GROW;
4098 if (RAW != '>') {
4099 if (!IS_BLANK(CUR)) {
4100 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4102 ctxt->sax->error(ctxt->userData,
4103 "Space required after the attribute default value\n");
4104 ctxt->wellFormed = 0;
4105 ctxt->disableSAX = 1;
4106 if (attrName != NULL)
4107 xmlFree(attrName);
4108 if (defaultValue != NULL)
4109 xmlFree(defaultValue);
4110 if (tree != NULL)
4111 xmlFreeEnumeration(tree);
4112 break;
4113 }
4114 SKIP_BLANKS;
4115 }
4116 if (check == CUR_PTR) {
4117 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4119 ctxt->sax->error(ctxt->userData,
4120 "xmlParseAttributeListDecl: detected internal error\n");
4121 if (attrName != NULL)
4122 xmlFree(attrName);
4123 if (defaultValue != NULL)
4124 xmlFree(defaultValue);
4125 if (tree != NULL)
4126 xmlFreeEnumeration(tree);
4127 break;
4128 }
4129 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4130 (ctxt->sax->attributeDecl != NULL))
4131 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4132 type, def, defaultValue, tree);
4133 if (attrName != NULL)
4134 xmlFree(attrName);
4135 if (defaultValue != NULL)
4136 xmlFree(defaultValue);
4137 GROW;
4138 }
4139 if (RAW == '>') {
4140 if (input != ctxt->input) {
4141 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4143 ctxt->sax->error(ctxt->userData,
4144"Attribute list declaration doesn't start and stop in the same entity\n");
4145 ctxt->wellFormed = 0;
4146 ctxt->disableSAX = 1;
4147 }
4148 NEXT;
4149 }
4150
4151 xmlFree(elemName);
4152 }
4153}
4154
4155/**
4156 * xmlParseElementMixedContentDecl:
4157 * @ctxt: an XML parser context
4158 *
4159 * parse the declaration for a Mixed Element content
4160 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4161 *
4162 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4163 * '(' S? '#PCDATA' S? ')'
4164 *
4165 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4166 *
4167 * [ VC: No Duplicate Types ]
4168 * The same name must not appear more than once in a single
4169 * mixed-content declaration.
4170 *
4171 * returns: the list of the xmlElementContentPtr describing the element choices
4172 */
4173xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004174xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004175 xmlElementContentPtr ret = NULL, cur = NULL, n;
4176 xmlChar *elem = NULL;
4177
4178 GROW;
4179 if ((RAW == '#') && (NXT(1) == 'P') &&
4180 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4181 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4182 (NXT(6) == 'A')) {
4183 SKIP(7);
4184 SKIP_BLANKS;
4185 SHRINK;
4186 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004187 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4188 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4189 if (ctxt->vctxt.error != NULL)
4190 ctxt->vctxt.error(ctxt->vctxt.userData,
4191"Element content declaration doesn't start and stop in the same entity\n");
4192 ctxt->valid = 0;
4193 }
Owen Taylor3473f882001-02-23 17:55:21 +00004194 NEXT;
4195 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4196 if (RAW == '*') {
4197 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4198 NEXT;
4199 }
4200 return(ret);
4201 }
4202 if ((RAW == '(') || (RAW == '|')) {
4203 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4204 if (ret == NULL) return(NULL);
4205 }
4206 while (RAW == '|') {
4207 NEXT;
4208 if (elem == NULL) {
4209 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4210 if (ret == NULL) return(NULL);
4211 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004212 if (cur != NULL)
4213 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004214 cur = ret;
4215 } else {
4216 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4217 if (n == NULL) return(NULL);
4218 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004219 if (n->c1 != NULL)
4220 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004221 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004222 if (n != NULL)
4223 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004224 cur = n;
4225 xmlFree(elem);
4226 }
4227 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004228 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004229 if (elem == NULL) {
4230 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4231 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4232 ctxt->sax->error(ctxt->userData,
4233 "xmlParseElementMixedContentDecl : Name expected\n");
4234 ctxt->wellFormed = 0;
4235 ctxt->disableSAX = 1;
4236 xmlFreeElementContent(cur);
4237 return(NULL);
4238 }
4239 SKIP_BLANKS;
4240 GROW;
4241 }
4242 if ((RAW == ')') && (NXT(1) == '*')) {
4243 if (elem != NULL) {
4244 cur->c2 = xmlNewElementContent(elem,
4245 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004246 if (cur->c2 != NULL)
4247 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004248 xmlFree(elem);
4249 }
4250 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004251 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4252 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4253 if (ctxt->vctxt.error != NULL)
4254 ctxt->vctxt.error(ctxt->vctxt.userData,
4255"Element content declaration doesn't start and stop in the same entity\n");
4256 ctxt->valid = 0;
4257 }
Owen Taylor3473f882001-02-23 17:55:21 +00004258 SKIP(2);
4259 } else {
4260 if (elem != NULL) xmlFree(elem);
4261 xmlFreeElementContent(ret);
4262 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4264 ctxt->sax->error(ctxt->userData,
4265 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4266 ctxt->wellFormed = 0;
4267 ctxt->disableSAX = 1;
4268 return(NULL);
4269 }
4270
4271 } else {
4272 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4274 ctxt->sax->error(ctxt->userData,
4275 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4276 ctxt->wellFormed = 0;
4277 ctxt->disableSAX = 1;
4278 }
4279 return(ret);
4280}
4281
4282/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004283 * xmlParseElementChildrenContentD:
4284 * @ctxt: an XML parser context
4285 *
4286 * VMS version of xmlParseElementChildrenContentDecl()
4287 *
4288 * Returns the tree of xmlElementContentPtr describing the element
4289 * hierarchy.
4290 */
4291/**
Owen Taylor3473f882001-02-23 17:55:21 +00004292 * xmlParseElementChildrenContentDecl:
4293 * @ctxt: an XML parser context
4294 *
4295 * parse the declaration for a Mixed Element content
4296 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4297 *
4298 *
4299 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4300 *
4301 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4302 *
4303 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4304 *
4305 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4306 *
4307 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4308 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004309 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004310 * opening or closing parentheses in a choice, seq, or Mixed
4311 * construct is contained in the replacement text for a parameter
4312 * entity, both must be contained in the same replacement text. For
4313 * interoperability, if a parameter-entity reference appears in a
4314 * choice, seq, or Mixed construct, its replacement text should not
4315 * be empty, and neither the first nor last non-blank character of
4316 * the replacement text should be a connector (| or ,).
4317 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004318 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004319 * hierarchy.
4320 */
4321xmlElementContentPtr
4322#ifdef VMS
4323xmlParseElementChildrenContentD
4324#else
4325xmlParseElementChildrenContentDecl
4326#endif
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004327(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004328 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4329 xmlChar *elem;
4330 xmlChar type = 0;
4331
4332 SKIP_BLANKS;
4333 GROW;
4334 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004335 xmlParserInputPtr input = ctxt->input;
4336
Owen Taylor3473f882001-02-23 17:55:21 +00004337 /* Recurse on first child */
4338 NEXT;
4339 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004340 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004341 SKIP_BLANKS;
4342 GROW;
4343 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004344 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004345 if (elem == NULL) {
4346 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4348 ctxt->sax->error(ctxt->userData,
4349 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4350 ctxt->wellFormed = 0;
4351 ctxt->disableSAX = 1;
4352 return(NULL);
4353 }
4354 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4355 GROW;
4356 if (RAW == '?') {
4357 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4358 NEXT;
4359 } else if (RAW == '*') {
4360 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4361 NEXT;
4362 } else if (RAW == '+') {
4363 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4364 NEXT;
4365 } else {
4366 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4367 }
4368 xmlFree(elem);
4369 GROW;
4370 }
4371 SKIP_BLANKS;
4372 SHRINK;
4373 while (RAW != ')') {
4374 /*
4375 * Each loop we parse one separator and one element.
4376 */
4377 if (RAW == ',') {
4378 if (type == 0) type = CUR;
4379
4380 /*
4381 * Detect "Name | Name , Name" error
4382 */
4383 else if (type != CUR) {
4384 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4386 ctxt->sax->error(ctxt->userData,
4387 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4388 type);
4389 ctxt->wellFormed = 0;
4390 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004391 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004392 xmlFreeElementContent(last);
4393 if (ret != NULL)
4394 xmlFreeElementContent(ret);
4395 return(NULL);
4396 }
4397 NEXT;
4398
4399 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4400 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004401 if ((last != NULL) && (last != ret))
4402 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004403 xmlFreeElementContent(ret);
4404 return(NULL);
4405 }
4406 if (last == NULL) {
4407 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004408 if (ret != NULL)
4409 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004410 ret = cur = op;
4411 } else {
4412 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004413 if (op != NULL)
4414 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004415 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004416 if (last != NULL)
4417 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004418 cur =op;
4419 last = NULL;
4420 }
4421 } else if (RAW == '|') {
4422 if (type == 0) type = CUR;
4423
4424 /*
4425 * Detect "Name , Name | Name" error
4426 */
4427 else if (type != CUR) {
4428 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4430 ctxt->sax->error(ctxt->userData,
4431 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4432 type);
4433 ctxt->wellFormed = 0;
4434 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004435 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004436 xmlFreeElementContent(last);
4437 if (ret != NULL)
4438 xmlFreeElementContent(ret);
4439 return(NULL);
4440 }
4441 NEXT;
4442
4443 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4444 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004445 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004446 xmlFreeElementContent(last);
4447 if (ret != NULL)
4448 xmlFreeElementContent(ret);
4449 return(NULL);
4450 }
4451 if (last == NULL) {
4452 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004453 if (ret != NULL)
4454 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004455 ret = cur = op;
4456 } else {
4457 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004458 if (op != NULL)
4459 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004460 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004461 if (last != NULL)
4462 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004463 cur =op;
4464 last = NULL;
4465 }
4466 } else {
4467 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4469 ctxt->sax->error(ctxt->userData,
4470 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4471 ctxt->wellFormed = 0;
4472 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004473 if (ret != NULL)
4474 xmlFreeElementContent(ret);
4475 return(NULL);
4476 }
4477 GROW;
4478 SKIP_BLANKS;
4479 GROW;
4480 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004481 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004482 /* Recurse on second child */
4483 NEXT;
4484 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004485 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004486 SKIP_BLANKS;
4487 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004488 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004489 if (elem == NULL) {
4490 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4492 ctxt->sax->error(ctxt->userData,
4493 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4494 ctxt->wellFormed = 0;
4495 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004496 if (ret != NULL)
4497 xmlFreeElementContent(ret);
4498 return(NULL);
4499 }
4500 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4501 xmlFree(elem);
4502 if (RAW == '?') {
4503 last->ocur = XML_ELEMENT_CONTENT_OPT;
4504 NEXT;
4505 } else if (RAW == '*') {
4506 last->ocur = XML_ELEMENT_CONTENT_MULT;
4507 NEXT;
4508 } else if (RAW == '+') {
4509 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4510 NEXT;
4511 } else {
4512 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4513 }
4514 }
4515 SKIP_BLANKS;
4516 GROW;
4517 }
4518 if ((cur != NULL) && (last != NULL)) {
4519 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004520 if (last != NULL)
4521 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004522 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004523 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4524 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4525 if (ctxt->vctxt.error != NULL)
4526 ctxt->vctxt.error(ctxt->vctxt.userData,
4527"Element content declaration doesn't start and stop in the same entity\n");
4528 ctxt->valid = 0;
4529 }
Owen Taylor3473f882001-02-23 17:55:21 +00004530 NEXT;
4531 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004532 if (ret != NULL)
4533 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004534 NEXT;
4535 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004536 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004537 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004538 cur = ret;
4539 /*
4540 * Some normalization:
4541 * (a | b* | c?)* == (a | b | c)*
4542 */
4543 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4544 if ((cur->c1 != NULL) &&
4545 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4546 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4547 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4548 if ((cur->c2 != NULL) &&
4549 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4550 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4551 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4552 cur = cur->c2;
4553 }
4554 }
Owen Taylor3473f882001-02-23 17:55:21 +00004555 NEXT;
4556 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004557 if (ret != NULL) {
4558 int found = 0;
4559
Daniel Veillarde470df72001-04-18 21:41:07 +00004560 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004561 /*
4562 * Some normalization:
4563 * (a | b*)+ == (a | b)*
4564 * (a | b?)+ == (a | b)*
4565 */
4566 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4567 if ((cur->c1 != NULL) &&
4568 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4569 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4570 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4571 found = 1;
4572 }
4573 if ((cur->c2 != NULL) &&
4574 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4575 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4576 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4577 found = 1;
4578 }
4579 cur = cur->c2;
4580 }
4581 if (found)
4582 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4583 }
Owen Taylor3473f882001-02-23 17:55:21 +00004584 NEXT;
4585 }
4586 return(ret);
4587}
4588
4589/**
4590 * xmlParseElementContentDecl:
4591 * @ctxt: an XML parser context
4592 * @name: the name of the element being defined.
4593 * @result: the Element Content pointer will be stored here if any
4594 *
4595 * parse the declaration for an Element content either Mixed or Children,
4596 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4597 *
4598 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4599 *
4600 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4601 */
4602
4603int
4604xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4605 xmlElementContentPtr *result) {
4606
4607 xmlElementContentPtr tree = NULL;
4608 xmlParserInputPtr input = ctxt->input;
4609 int res;
4610
4611 *result = NULL;
4612
4613 if (RAW != '(') {
4614 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4616 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004617 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004618 ctxt->wellFormed = 0;
4619 ctxt->disableSAX = 1;
4620 return(-1);
4621 }
4622 NEXT;
4623 GROW;
4624 SKIP_BLANKS;
4625 if ((RAW == '#') && (NXT(1) == 'P') &&
4626 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4627 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4628 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004629 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004630 res = XML_ELEMENT_TYPE_MIXED;
4631 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004632 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004633 res = XML_ELEMENT_TYPE_ELEMENT;
4634 }
Owen Taylor3473f882001-02-23 17:55:21 +00004635 SKIP_BLANKS;
4636 *result = tree;
4637 return(res);
4638}
4639
4640/**
4641 * xmlParseElementDecl:
4642 * @ctxt: an XML parser context
4643 *
4644 * parse an Element declaration.
4645 *
4646 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4647 *
4648 * [ VC: Unique Element Type Declaration ]
4649 * No element type may be declared more than once
4650 *
4651 * Returns the type of the element, or -1 in case of error
4652 */
4653int
4654xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4655 xmlChar *name;
4656 int ret = -1;
4657 xmlElementContentPtr content = NULL;
4658
4659 GROW;
4660 if ((RAW == '<') && (NXT(1) == '!') &&
4661 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4662 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4663 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4664 (NXT(8) == 'T')) {
4665 xmlParserInputPtr input = ctxt->input;
4666
4667 SKIP(9);
4668 if (!IS_BLANK(CUR)) {
4669 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4671 ctxt->sax->error(ctxt->userData,
4672 "Space required after 'ELEMENT'\n");
4673 ctxt->wellFormed = 0;
4674 ctxt->disableSAX = 1;
4675 }
4676 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004677 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004678 if (name == NULL) {
4679 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4681 ctxt->sax->error(ctxt->userData,
4682 "xmlParseElementDecl: no name for Element\n");
4683 ctxt->wellFormed = 0;
4684 ctxt->disableSAX = 1;
4685 return(-1);
4686 }
4687 while ((RAW == 0) && (ctxt->inputNr > 1))
4688 xmlPopInput(ctxt);
4689 if (!IS_BLANK(CUR)) {
4690 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4692 ctxt->sax->error(ctxt->userData,
4693 "Space required after the element name\n");
4694 ctxt->wellFormed = 0;
4695 ctxt->disableSAX = 1;
4696 }
4697 SKIP_BLANKS;
4698 if ((RAW == 'E') && (NXT(1) == 'M') &&
4699 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4700 (NXT(4) == 'Y')) {
4701 SKIP(5);
4702 /*
4703 * Element must always be empty.
4704 */
4705 ret = XML_ELEMENT_TYPE_EMPTY;
4706 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4707 (NXT(2) == 'Y')) {
4708 SKIP(3);
4709 /*
4710 * Element is a generic container.
4711 */
4712 ret = XML_ELEMENT_TYPE_ANY;
4713 } else if (RAW == '(') {
4714 ret = xmlParseElementContentDecl(ctxt, name, &content);
4715 } else {
4716 /*
4717 * [ WFC: PEs in Internal Subset ] error handling.
4718 */
4719 if ((RAW == '%') && (ctxt->external == 0) &&
4720 (ctxt->inputNr == 1)) {
4721 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4723 ctxt->sax->error(ctxt->userData,
4724 "PEReference: forbidden within markup decl in internal subset\n");
4725 } else {
4726 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4728 ctxt->sax->error(ctxt->userData,
4729 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4730 }
4731 ctxt->wellFormed = 0;
4732 ctxt->disableSAX = 1;
4733 if (name != NULL) xmlFree(name);
4734 return(-1);
4735 }
4736
4737 SKIP_BLANKS;
4738 /*
4739 * Pop-up of finished entities.
4740 */
4741 while ((RAW == 0) && (ctxt->inputNr > 1))
4742 xmlPopInput(ctxt);
4743 SKIP_BLANKS;
4744
4745 if (RAW != '>') {
4746 ctxt->errNo = XML_ERR_GT_REQUIRED;
4747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4748 ctxt->sax->error(ctxt->userData,
4749 "xmlParseElementDecl: expected '>' at the end\n");
4750 ctxt->wellFormed = 0;
4751 ctxt->disableSAX = 1;
4752 } else {
4753 if (input != ctxt->input) {
4754 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4756 ctxt->sax->error(ctxt->userData,
4757"Element declaration doesn't start and stop in the same entity\n");
4758 ctxt->wellFormed = 0;
4759 ctxt->disableSAX = 1;
4760 }
4761
4762 NEXT;
4763 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4764 (ctxt->sax->elementDecl != NULL))
4765 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4766 content);
4767 }
4768 if (content != NULL) {
4769 xmlFreeElementContent(content);
4770 }
4771 if (name != NULL) {
4772 xmlFree(name);
4773 }
4774 }
4775 return(ret);
4776}
4777
4778/**
Owen Taylor3473f882001-02-23 17:55:21 +00004779 * xmlParseConditionalSections
4780 * @ctxt: an XML parser context
4781 *
4782 * [61] conditionalSect ::= includeSect | ignoreSect
4783 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4784 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4785 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4786 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4787 */
4788
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004789static void
Owen Taylor3473f882001-02-23 17:55:21 +00004790xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4791 SKIP(3);
4792 SKIP_BLANKS;
4793 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4794 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4795 (NXT(6) == 'E')) {
4796 SKIP(7);
4797 SKIP_BLANKS;
4798 if (RAW != '[') {
4799 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4801 ctxt->sax->error(ctxt->userData,
4802 "XML conditional section '[' expected\n");
4803 ctxt->wellFormed = 0;
4804 ctxt->disableSAX = 1;
4805 } else {
4806 NEXT;
4807 }
4808 if (xmlParserDebugEntities) {
4809 if ((ctxt->input != NULL) && (ctxt->input->filename))
4810 xmlGenericError(xmlGenericErrorContext,
4811 "%s(%d): ", ctxt->input->filename,
4812 ctxt->input->line);
4813 xmlGenericError(xmlGenericErrorContext,
4814 "Entering INCLUDE Conditional Section\n");
4815 }
4816
4817 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4818 (NXT(2) != '>'))) {
4819 const xmlChar *check = CUR_PTR;
4820 int cons = ctxt->input->consumed;
4821 int tok = ctxt->token;
4822
4823 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4824 xmlParseConditionalSections(ctxt);
4825 } else if (IS_BLANK(CUR)) {
4826 NEXT;
4827 } else if (RAW == '%') {
4828 xmlParsePEReference(ctxt);
4829 } else
4830 xmlParseMarkupDecl(ctxt);
4831
4832 /*
4833 * Pop-up of finished entities.
4834 */
4835 while ((RAW == 0) && (ctxt->inputNr > 1))
4836 xmlPopInput(ctxt);
4837
4838 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4839 (tok == ctxt->token)) {
4840 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4842 ctxt->sax->error(ctxt->userData,
4843 "Content error in the external subset\n");
4844 ctxt->wellFormed = 0;
4845 ctxt->disableSAX = 1;
4846 break;
4847 }
4848 }
4849 if (xmlParserDebugEntities) {
4850 if ((ctxt->input != NULL) && (ctxt->input->filename))
4851 xmlGenericError(xmlGenericErrorContext,
4852 "%s(%d): ", ctxt->input->filename,
4853 ctxt->input->line);
4854 xmlGenericError(xmlGenericErrorContext,
4855 "Leaving INCLUDE Conditional Section\n");
4856 }
4857
4858 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4859 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4860 int state;
4861 int instate;
4862 int depth = 0;
4863
4864 SKIP(6);
4865 SKIP_BLANKS;
4866 if (RAW != '[') {
4867 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4869 ctxt->sax->error(ctxt->userData,
4870 "XML conditional section '[' expected\n");
4871 ctxt->wellFormed = 0;
4872 ctxt->disableSAX = 1;
4873 } else {
4874 NEXT;
4875 }
4876 if (xmlParserDebugEntities) {
4877 if ((ctxt->input != NULL) && (ctxt->input->filename))
4878 xmlGenericError(xmlGenericErrorContext,
4879 "%s(%d): ", ctxt->input->filename,
4880 ctxt->input->line);
4881 xmlGenericError(xmlGenericErrorContext,
4882 "Entering IGNORE Conditional Section\n");
4883 }
4884
4885 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004886 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004887 * But disable SAX event generating DTD building in the meantime
4888 */
4889 state = ctxt->disableSAX;
4890 instate = ctxt->instate;
4891 ctxt->disableSAX = 1;
4892 ctxt->instate = XML_PARSER_IGNORE;
4893
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004894 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004895 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4896 depth++;
4897 SKIP(3);
4898 continue;
4899 }
4900 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4901 if (--depth >= 0) SKIP(3);
4902 continue;
4903 }
4904 NEXT;
4905 continue;
4906 }
4907
4908 ctxt->disableSAX = state;
4909 ctxt->instate = instate;
4910
4911 if (xmlParserDebugEntities) {
4912 if ((ctxt->input != NULL) && (ctxt->input->filename))
4913 xmlGenericError(xmlGenericErrorContext,
4914 "%s(%d): ", ctxt->input->filename,
4915 ctxt->input->line);
4916 xmlGenericError(xmlGenericErrorContext,
4917 "Leaving IGNORE Conditional Section\n");
4918 }
4919
4920 } else {
4921 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4923 ctxt->sax->error(ctxt->userData,
4924 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4925 ctxt->wellFormed = 0;
4926 ctxt->disableSAX = 1;
4927 }
4928
4929 if (RAW == 0)
4930 SHRINK;
4931
4932 if (RAW == 0) {
4933 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4935 ctxt->sax->error(ctxt->userData,
4936 "XML conditional section not closed\n");
4937 ctxt->wellFormed = 0;
4938 ctxt->disableSAX = 1;
4939 } else {
4940 SKIP(3);
4941 }
4942}
4943
4944/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004945 * xmlParseMarkupDecl:
4946 * @ctxt: an XML parser context
4947 *
4948 * parse Markup declarations
4949 *
4950 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4951 * NotationDecl | PI | Comment
4952 *
4953 * [ VC: Proper Declaration/PE Nesting ]
4954 * Parameter-entity replacement text must be properly nested with
4955 * markup declarations. That is to say, if either the first character
4956 * or the last character of a markup declaration (markupdecl above) is
4957 * contained in the replacement text for a parameter-entity reference,
4958 * both must be contained in the same replacement text.
4959 *
4960 * [ WFC: PEs in Internal Subset ]
4961 * In the internal DTD subset, parameter-entity references can occur
4962 * only where markup declarations can occur, not within markup declarations.
4963 * (This does not apply to references that occur in external parameter
4964 * entities or to the external subset.)
4965 */
4966void
4967xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4968 GROW;
4969 xmlParseElementDecl(ctxt);
4970 xmlParseAttributeListDecl(ctxt);
4971 xmlParseEntityDecl(ctxt);
4972 xmlParseNotationDecl(ctxt);
4973 xmlParsePI(ctxt);
4974 xmlParseComment(ctxt);
4975 /*
4976 * This is only for internal subset. On external entities,
4977 * the replacement is done before parsing stage
4978 */
4979 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4980 xmlParsePEReference(ctxt);
4981
4982 /*
4983 * Conditional sections are allowed from entities included
4984 * by PE References in the internal subset.
4985 */
4986 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4987 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4988 xmlParseConditionalSections(ctxt);
4989 }
4990 }
4991
4992 ctxt->instate = XML_PARSER_DTD;
4993}
4994
4995/**
4996 * xmlParseTextDecl:
4997 * @ctxt: an XML parser context
4998 *
4999 * parse an XML declaration header for external entities
5000 *
5001 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5002 *
5003 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5004 */
5005
5006void
5007xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5008 xmlChar *version;
5009
5010 /*
5011 * We know that '<?xml' is here.
5012 */
5013 if ((RAW == '<') && (NXT(1) == '?') &&
5014 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5015 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5016 SKIP(5);
5017 } else {
5018 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5020 ctxt->sax->error(ctxt->userData,
5021 "Text declaration '<?xml' required\n");
5022 ctxt->wellFormed = 0;
5023 ctxt->disableSAX = 1;
5024
5025 return;
5026 }
5027
5028 if (!IS_BLANK(CUR)) {
5029 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5031 ctxt->sax->error(ctxt->userData,
5032 "Space needed after '<?xml'\n");
5033 ctxt->wellFormed = 0;
5034 ctxt->disableSAX = 1;
5035 }
5036 SKIP_BLANKS;
5037
5038 /*
5039 * We may have the VersionInfo here.
5040 */
5041 version = xmlParseVersionInfo(ctxt);
5042 if (version == NULL)
5043 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005044 else {
5045 if (!IS_BLANK(CUR)) {
5046 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5048 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5049 ctxt->wellFormed = 0;
5050 ctxt->disableSAX = 1;
5051 }
5052 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005053 ctxt->input->version = version;
5054
5055 /*
5056 * We must have the encoding declaration
5057 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005058 xmlParseEncodingDecl(ctxt);
5059 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5060 /*
5061 * The XML REC instructs us to stop parsing right here
5062 */
5063 return;
5064 }
5065
5066 SKIP_BLANKS;
5067 if ((RAW == '?') && (NXT(1) == '>')) {
5068 SKIP(2);
5069 } else if (RAW == '>') {
5070 /* Deprecated old WD ... */
5071 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5073 ctxt->sax->error(ctxt->userData,
5074 "XML declaration must end-up with '?>'\n");
5075 ctxt->wellFormed = 0;
5076 ctxt->disableSAX = 1;
5077 NEXT;
5078 } else {
5079 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5081 ctxt->sax->error(ctxt->userData,
5082 "parsing XML declaration: '?>' expected\n");
5083 ctxt->wellFormed = 0;
5084 ctxt->disableSAX = 1;
5085 MOVETO_ENDTAG(CUR_PTR);
5086 NEXT;
5087 }
5088}
5089
5090/**
Owen Taylor3473f882001-02-23 17:55:21 +00005091 * xmlParseExternalSubset:
5092 * @ctxt: an XML parser context
5093 * @ExternalID: the external identifier
5094 * @SystemID: the system identifier (or URL)
5095 *
5096 * parse Markup declarations from an external subset
5097 *
5098 * [30] extSubset ::= textDecl? extSubsetDecl
5099 *
5100 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5101 */
5102void
5103xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5104 const xmlChar *SystemID) {
5105 GROW;
5106 if ((RAW == '<') && (NXT(1) == '?') &&
5107 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5108 (NXT(4) == 'l')) {
5109 xmlParseTextDecl(ctxt);
5110 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5111 /*
5112 * The XML REC instructs us to stop parsing right here
5113 */
5114 ctxt->instate = XML_PARSER_EOF;
5115 return;
5116 }
5117 }
5118 if (ctxt->myDoc == NULL) {
5119 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5120 }
5121 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5122 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5123
5124 ctxt->instate = XML_PARSER_DTD;
5125 ctxt->external = 1;
5126 while (((RAW == '<') && (NXT(1) == '?')) ||
5127 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005128 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005129 const xmlChar *check = CUR_PTR;
5130 int cons = ctxt->input->consumed;
5131 int tok = ctxt->token;
5132
5133 GROW;
5134 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5135 xmlParseConditionalSections(ctxt);
5136 } else if (IS_BLANK(CUR)) {
5137 NEXT;
5138 } else if (RAW == '%') {
5139 xmlParsePEReference(ctxt);
5140 } else
5141 xmlParseMarkupDecl(ctxt);
5142
5143 /*
5144 * Pop-up of finished entities.
5145 */
5146 while ((RAW == 0) && (ctxt->inputNr > 1))
5147 xmlPopInput(ctxt);
5148
5149 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5150 (tok == ctxt->token)) {
5151 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5153 ctxt->sax->error(ctxt->userData,
5154 "Content error in the external subset\n");
5155 ctxt->wellFormed = 0;
5156 ctxt->disableSAX = 1;
5157 break;
5158 }
5159 }
5160
5161 if (RAW != 0) {
5162 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5163 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5164 ctxt->sax->error(ctxt->userData,
5165 "Extra content at the end of the document\n");
5166 ctxt->wellFormed = 0;
5167 ctxt->disableSAX = 1;
5168 }
5169
5170}
5171
5172/**
5173 * xmlParseReference:
5174 * @ctxt: an XML parser context
5175 *
5176 * parse and handle entity references in content, depending on the SAX
5177 * interface, this may end-up in a call to character() if this is a
5178 * CharRef, a predefined entity, if there is no reference() callback.
5179 * or if the parser was asked to switch to that mode.
5180 *
5181 * [67] Reference ::= EntityRef | CharRef
5182 */
5183void
5184xmlParseReference(xmlParserCtxtPtr ctxt) {
5185 xmlEntityPtr ent;
5186 xmlChar *val;
5187 if (RAW != '&') return;
5188
5189 if (NXT(1) == '#') {
5190 int i = 0;
5191 xmlChar out[10];
5192 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005193 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005194
5195 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5196 /*
5197 * So we are using non-UTF-8 buffers
5198 * Check that the char fit on 8bits, if not
5199 * generate a CharRef.
5200 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005201 if (value <= 0xFF) {
5202 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005203 out[1] = 0;
5204 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5205 (!ctxt->disableSAX))
5206 ctxt->sax->characters(ctxt->userData, out, 1);
5207 } else {
5208 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005209 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005210 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005211 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005212 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5213 (!ctxt->disableSAX))
5214 ctxt->sax->reference(ctxt->userData, out);
5215 }
5216 } else {
5217 /*
5218 * Just encode the value in UTF-8
5219 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005220 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005221 out[i] = 0;
5222 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5223 (!ctxt->disableSAX))
5224 ctxt->sax->characters(ctxt->userData, out, i);
5225 }
5226 } else {
5227 ent = xmlParseEntityRef(ctxt);
5228 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005229 if (!ctxt->wellFormed)
5230 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005231 if ((ent->name != NULL) &&
5232 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5233 xmlNodePtr list = NULL;
5234 int ret;
5235
5236
5237 /*
5238 * The first reference to the entity trigger a parsing phase
5239 * where the ent->children is filled with the result from
5240 * the parsing.
5241 */
5242 if (ent->children == NULL) {
5243 xmlChar *value;
5244 value = ent->content;
5245
5246 /*
5247 * Check that this entity is well formed
5248 */
5249 if ((value != NULL) &&
5250 (value[1] == 0) && (value[0] == '<') &&
5251 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5252 /*
5253 * DONE: get definite answer on this !!!
5254 * Lots of entity decls are used to declare a single
5255 * char
5256 * <!ENTITY lt "<">
5257 * Which seems to be valid since
5258 * 2.4: The ampersand character (&) and the left angle
5259 * bracket (<) may appear in their literal form only
5260 * when used ... They are also legal within the literal
5261 * entity value of an internal entity declaration;i
5262 * see "4.3.2 Well-Formed Parsed Entities".
5263 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5264 * Looking at the OASIS test suite and James Clark
5265 * tests, this is broken. However the XML REC uses
5266 * it. Is the XML REC not well-formed ????
5267 * This is a hack to avoid this problem
5268 *
5269 * ANSWER: since lt gt amp .. are already defined,
5270 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005271 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005272 * is lousy but acceptable.
5273 */
5274 list = xmlNewDocText(ctxt->myDoc, value);
5275 if (list != NULL) {
5276 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5277 (ent->children == NULL)) {
5278 ent->children = list;
5279 ent->last = list;
5280 list->parent = (xmlNodePtr) ent;
5281 } else {
5282 xmlFreeNodeList(list);
5283 }
5284 } else if (list != NULL) {
5285 xmlFreeNodeList(list);
5286 }
5287 } else {
5288 /*
5289 * 4.3.2: An internal general parsed entity is well-formed
5290 * if its replacement text matches the production labeled
5291 * content.
5292 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005293
5294 void *user_data;
5295 /*
5296 * This is a bit hackish but this seems the best
5297 * way to make sure both SAX and DOM entity support
5298 * behaves okay.
5299 */
5300 if (ctxt->userData == ctxt)
5301 user_data = NULL;
5302 else
5303 user_data = ctxt->userData;
5304
Owen Taylor3473f882001-02-23 17:55:21 +00005305 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5306 ctxt->depth++;
5307 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005308 ctxt->sax, user_data, ctxt->depth,
Owen Taylor3473f882001-02-23 17:55:21 +00005309 value, &list);
5310 ctxt->depth--;
5311 } else if (ent->etype ==
5312 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5313 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005314 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005315 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005316 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005317 ctxt->depth--;
5318 } else {
5319 ret = -1;
5320 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5321 ctxt->sax->error(ctxt->userData,
5322 "Internal: invalid entity type\n");
5323 }
5324 if (ret == XML_ERR_ENTITY_LOOP) {
5325 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5327 ctxt->sax->error(ctxt->userData,
5328 "Detected entity reference loop\n");
5329 ctxt->wellFormed = 0;
5330 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005331 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005332 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005333 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5334 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005335 (ent->children == NULL)) {
5336 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005337 if (ctxt->replaceEntities) {
5338 /*
5339 * Prune it directly in the generated document
5340 * except for single text nodes.
5341 */
5342 if ((list->type == XML_TEXT_NODE) &&
5343 (list->next == NULL)) {
5344 list->parent = (xmlNodePtr) ent;
5345 list = NULL;
5346 } else {
5347 while (list != NULL) {
5348 list->parent = (xmlNodePtr) ctxt->node;
5349 if (list->next == NULL)
5350 ent->last = list;
5351 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005352 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005353 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005354 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5355 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005356 }
5357 } else {
5358 while (list != NULL) {
5359 list->parent = (xmlNodePtr) ent;
5360 if (list->next == NULL)
5361 ent->last = list;
5362 list = list->next;
5363 }
Owen Taylor3473f882001-02-23 17:55:21 +00005364 }
5365 } else {
5366 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005367 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005368 }
5369 } else if (ret > 0) {
5370 ctxt->errNo = ret;
5371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5372 ctxt->sax->error(ctxt->userData,
5373 "Entity value required\n");
5374 ctxt->wellFormed = 0;
5375 ctxt->disableSAX = 1;
5376 } else if (list != NULL) {
5377 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005378 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005379 }
5380 }
5381 }
5382 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5383 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5384 /*
5385 * Create a node.
5386 */
5387 ctxt->sax->reference(ctxt->userData, ent->name);
5388 return;
5389 } else if (ctxt->replaceEntities) {
5390 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5391 /*
5392 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005393 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005394 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005395 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005396 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005397 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005398 cur = ent->children;
5399 while (cur != NULL) {
5400 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005401 if (firstChild == NULL){
5402 firstChild = new;
5403 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005404 xmlAddChild(ctxt->node, new);
5405 if (cur == ent->last)
5406 break;
5407 cur = cur->next;
5408 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005409 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5410 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005411 } else {
5412 /*
5413 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005414 * node with a possible previous text one which
5415 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005416 */
5417 if (ent->children->type == XML_TEXT_NODE)
5418 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5419 if ((ent->last != ent->children) &&
5420 (ent->last->type == XML_TEXT_NODE))
5421 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5422 xmlAddChildList(ctxt->node, ent->children);
5423 }
5424
Owen Taylor3473f882001-02-23 17:55:21 +00005425 /*
5426 * This is to avoid a nasty side effect, see
5427 * characters() in SAX.c
5428 */
5429 ctxt->nodemem = 0;
5430 ctxt->nodelen = 0;
5431 return;
5432 } else {
5433 /*
5434 * Probably running in SAX mode
5435 */
5436 xmlParserInputPtr input;
5437
5438 input = xmlNewEntityInputStream(ctxt, ent);
5439 xmlPushInput(ctxt, input);
5440 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5441 (RAW == '<') && (NXT(1) == '?') &&
5442 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5443 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5444 xmlParseTextDecl(ctxt);
5445 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5446 /*
5447 * The XML REC instructs us to stop parsing right here
5448 */
5449 ctxt->instate = XML_PARSER_EOF;
5450 return;
5451 }
5452 if (input->standalone == 1) {
5453 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5455 ctxt->sax->error(ctxt->userData,
5456 "external parsed entities cannot be standalone\n");
5457 ctxt->wellFormed = 0;
5458 ctxt->disableSAX = 1;
5459 }
5460 }
5461 return;
5462 }
5463 }
5464 } else {
5465 val = ent->content;
5466 if (val == NULL) return;
5467 /*
5468 * inline the entity.
5469 */
5470 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5471 (!ctxt->disableSAX))
5472 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5473 }
5474 }
5475}
5476
5477/**
5478 * xmlParseEntityRef:
5479 * @ctxt: an XML parser context
5480 *
5481 * parse ENTITY references declarations
5482 *
5483 * [68] EntityRef ::= '&' Name ';'
5484 *
5485 * [ WFC: Entity Declared ]
5486 * In a document without any DTD, a document with only an internal DTD
5487 * subset which contains no parameter entity references, or a document
5488 * with "standalone='yes'", the Name given in the entity reference
5489 * must match that in an entity declaration, except that well-formed
5490 * documents need not declare any of the following entities: amp, lt,
5491 * gt, apos, quot. The declaration of a parameter entity must precede
5492 * any reference to it. Similarly, the declaration of a general entity
5493 * must precede any reference to it which appears in a default value in an
5494 * attribute-list declaration. Note that if entities are declared in the
5495 * external subset or in external parameter entities, a non-validating
5496 * processor is not obligated to read and process their declarations;
5497 * for such documents, the rule that an entity must be declared is a
5498 * well-formedness constraint only if standalone='yes'.
5499 *
5500 * [ WFC: Parsed Entity ]
5501 * An entity reference must not contain the name of an unparsed entity
5502 *
5503 * Returns the xmlEntityPtr if found, or NULL otherwise.
5504 */
5505xmlEntityPtr
5506xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5507 xmlChar *name;
5508 xmlEntityPtr ent = NULL;
5509
5510 GROW;
5511
5512 if (RAW == '&') {
5513 NEXT;
5514 name = xmlParseName(ctxt);
5515 if (name == NULL) {
5516 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5518 ctxt->sax->error(ctxt->userData,
5519 "xmlParseEntityRef: no name\n");
5520 ctxt->wellFormed = 0;
5521 ctxt->disableSAX = 1;
5522 } else {
5523 if (RAW == ';') {
5524 NEXT;
5525 /*
5526 * Ask first SAX for entity resolution, otherwise try the
5527 * predefined set.
5528 */
5529 if (ctxt->sax != NULL) {
5530 if (ctxt->sax->getEntity != NULL)
5531 ent = ctxt->sax->getEntity(ctxt->userData, name);
5532 if (ent == NULL)
5533 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005534 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5535 ent = getEntity(ctxt, name);
5536 }
Owen Taylor3473f882001-02-23 17:55:21 +00005537 }
5538 /*
5539 * [ WFC: Entity Declared ]
5540 * In a document without any DTD, a document with only an
5541 * internal DTD subset which contains no parameter entity
5542 * references, or a document with "standalone='yes'", the
5543 * Name given in the entity reference must match that in an
5544 * entity declaration, except that well-formed documents
5545 * need not declare any of the following entities: amp, lt,
5546 * gt, apos, quot.
5547 * The declaration of a parameter entity must precede any
5548 * reference to it.
5549 * Similarly, the declaration of a general entity must
5550 * precede any reference to it which appears in a default
5551 * value in an attribute-list declaration. Note that if
5552 * entities are declared in the external subset or in
5553 * external parameter entities, a non-validating processor
5554 * is not obligated to read and process their declarations;
5555 * for such documents, the rule that an entity must be
5556 * declared is a well-formedness constraint only if
5557 * standalone='yes'.
5558 */
5559 if (ent == NULL) {
5560 if ((ctxt->standalone == 1) ||
5561 ((ctxt->hasExternalSubset == 0) &&
5562 (ctxt->hasPErefs == 0))) {
5563 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5565 ctxt->sax->error(ctxt->userData,
5566 "Entity '%s' not defined\n", name);
5567 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005568 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005569 ctxt->disableSAX = 1;
5570 } else {
5571 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005573 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005574 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005575 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005576 }
5577 }
5578
5579 /*
5580 * [ WFC: Parsed Entity ]
5581 * An entity reference must not contain the name of an
5582 * unparsed entity
5583 */
5584 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5585 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5587 ctxt->sax->error(ctxt->userData,
5588 "Entity reference to unparsed entity %s\n", name);
5589 ctxt->wellFormed = 0;
5590 ctxt->disableSAX = 1;
5591 }
5592
5593 /*
5594 * [ WFC: No External Entity References ]
5595 * Attribute values cannot contain direct or indirect
5596 * entity references to external entities.
5597 */
5598 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5599 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5600 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5602 ctxt->sax->error(ctxt->userData,
5603 "Attribute references external entity '%s'\n", name);
5604 ctxt->wellFormed = 0;
5605 ctxt->disableSAX = 1;
5606 }
5607 /*
5608 * [ WFC: No < in Attribute Values ]
5609 * The replacement text of any entity referred to directly or
5610 * indirectly in an attribute value (other than "&lt;") must
5611 * not contain a <.
5612 */
5613 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5614 (ent != NULL) &&
5615 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5616 (ent->content != NULL) &&
5617 (xmlStrchr(ent->content, '<'))) {
5618 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5620 ctxt->sax->error(ctxt->userData,
5621 "'<' in entity '%s' is not allowed in attributes values\n", name);
5622 ctxt->wellFormed = 0;
5623 ctxt->disableSAX = 1;
5624 }
5625
5626 /*
5627 * Internal check, no parameter entities here ...
5628 */
5629 else {
5630 switch (ent->etype) {
5631 case XML_INTERNAL_PARAMETER_ENTITY:
5632 case XML_EXTERNAL_PARAMETER_ENTITY:
5633 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5635 ctxt->sax->error(ctxt->userData,
5636 "Attempt to reference the parameter entity '%s'\n", name);
5637 ctxt->wellFormed = 0;
5638 ctxt->disableSAX = 1;
5639 break;
5640 default:
5641 break;
5642 }
5643 }
5644
5645 /*
5646 * [ WFC: No Recursion ]
5647 * A parsed entity must not contain a recursive reference
5648 * to itself, either directly or indirectly.
5649 * Done somewhere else
5650 */
5651
5652 } else {
5653 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5655 ctxt->sax->error(ctxt->userData,
5656 "xmlParseEntityRef: expecting ';'\n");
5657 ctxt->wellFormed = 0;
5658 ctxt->disableSAX = 1;
5659 }
5660 xmlFree(name);
5661 }
5662 }
5663 return(ent);
5664}
5665
5666/**
5667 * xmlParseStringEntityRef:
5668 * @ctxt: an XML parser context
5669 * @str: a pointer to an index in the string
5670 *
5671 * parse ENTITY references declarations, but this version parses it from
5672 * a string value.
5673 *
5674 * [68] EntityRef ::= '&' Name ';'
5675 *
5676 * [ WFC: Entity Declared ]
5677 * In a document without any DTD, a document with only an internal DTD
5678 * subset which contains no parameter entity references, or a document
5679 * with "standalone='yes'", the Name given in the entity reference
5680 * must match that in an entity declaration, except that well-formed
5681 * documents need not declare any of the following entities: amp, lt,
5682 * gt, apos, quot. The declaration of a parameter entity must precede
5683 * any reference to it. Similarly, the declaration of a general entity
5684 * must precede any reference to it which appears in a default value in an
5685 * attribute-list declaration. Note that if entities are declared in the
5686 * external subset or in external parameter entities, a non-validating
5687 * processor is not obligated to read and process their declarations;
5688 * for such documents, the rule that an entity must be declared is a
5689 * well-formedness constraint only if standalone='yes'.
5690 *
5691 * [ WFC: Parsed Entity ]
5692 * An entity reference must not contain the name of an unparsed entity
5693 *
5694 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5695 * is updated to the current location in the string.
5696 */
5697xmlEntityPtr
5698xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5699 xmlChar *name;
5700 const xmlChar *ptr;
5701 xmlChar cur;
5702 xmlEntityPtr ent = NULL;
5703
5704 if ((str == NULL) || (*str == NULL))
5705 return(NULL);
5706 ptr = *str;
5707 cur = *ptr;
5708 if (cur == '&') {
5709 ptr++;
5710 cur = *ptr;
5711 name = xmlParseStringName(ctxt, &ptr);
5712 if (name == NULL) {
5713 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5715 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005716 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005717 ctxt->wellFormed = 0;
5718 ctxt->disableSAX = 1;
5719 } else {
5720 if (*ptr == ';') {
5721 ptr++;
5722 /*
5723 * Ask first SAX for entity resolution, otherwise try the
5724 * predefined set.
5725 */
5726 if (ctxt->sax != NULL) {
5727 if (ctxt->sax->getEntity != NULL)
5728 ent = ctxt->sax->getEntity(ctxt->userData, name);
5729 if (ent == NULL)
5730 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005731 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5732 ent = getEntity(ctxt, name);
5733 }
Owen Taylor3473f882001-02-23 17:55:21 +00005734 }
5735 /*
5736 * [ WFC: Entity Declared ]
5737 * In a document without any DTD, a document with only an
5738 * internal DTD subset which contains no parameter entity
5739 * references, or a document with "standalone='yes'", the
5740 * Name given in the entity reference must match that in an
5741 * entity declaration, except that well-formed documents
5742 * need not declare any of the following entities: amp, lt,
5743 * gt, apos, quot.
5744 * The declaration of a parameter entity must precede any
5745 * reference to it.
5746 * Similarly, the declaration of a general entity must
5747 * precede any reference to it which appears in a default
5748 * value in an attribute-list declaration. Note that if
5749 * entities are declared in the external subset or in
5750 * external parameter entities, a non-validating processor
5751 * is not obligated to read and process their declarations;
5752 * for such documents, the rule that an entity must be
5753 * declared is a well-formedness constraint only if
5754 * standalone='yes'.
5755 */
5756 if (ent == NULL) {
5757 if ((ctxt->standalone == 1) ||
5758 ((ctxt->hasExternalSubset == 0) &&
5759 (ctxt->hasPErefs == 0))) {
5760 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5762 ctxt->sax->error(ctxt->userData,
5763 "Entity '%s' not defined\n", name);
5764 ctxt->wellFormed = 0;
5765 ctxt->disableSAX = 1;
5766 } else {
5767 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5768 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5769 ctxt->sax->warning(ctxt->userData,
5770 "Entity '%s' not defined\n", name);
5771 }
5772 }
5773
5774 /*
5775 * [ WFC: Parsed Entity ]
5776 * An entity reference must not contain the name of an
5777 * unparsed entity
5778 */
5779 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5780 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5782 ctxt->sax->error(ctxt->userData,
5783 "Entity reference to unparsed entity %s\n", name);
5784 ctxt->wellFormed = 0;
5785 ctxt->disableSAX = 1;
5786 }
5787
5788 /*
5789 * [ WFC: No External Entity References ]
5790 * Attribute values cannot contain direct or indirect
5791 * entity references to external entities.
5792 */
5793 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5794 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5795 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5797 ctxt->sax->error(ctxt->userData,
5798 "Attribute references external entity '%s'\n", name);
5799 ctxt->wellFormed = 0;
5800 ctxt->disableSAX = 1;
5801 }
5802 /*
5803 * [ WFC: No < in Attribute Values ]
5804 * The replacement text of any entity referred to directly or
5805 * indirectly in an attribute value (other than "&lt;") must
5806 * not contain a <.
5807 */
5808 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5809 (ent != NULL) &&
5810 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5811 (ent->content != NULL) &&
5812 (xmlStrchr(ent->content, '<'))) {
5813 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5815 ctxt->sax->error(ctxt->userData,
5816 "'<' in entity '%s' is not allowed in attributes values\n", name);
5817 ctxt->wellFormed = 0;
5818 ctxt->disableSAX = 1;
5819 }
5820
5821 /*
5822 * Internal check, no parameter entities here ...
5823 */
5824 else {
5825 switch (ent->etype) {
5826 case XML_INTERNAL_PARAMETER_ENTITY:
5827 case XML_EXTERNAL_PARAMETER_ENTITY:
5828 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5829 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5830 ctxt->sax->error(ctxt->userData,
5831 "Attempt to reference the parameter entity '%s'\n", name);
5832 ctxt->wellFormed = 0;
5833 ctxt->disableSAX = 1;
5834 break;
5835 default:
5836 break;
5837 }
5838 }
5839
5840 /*
5841 * [ WFC: No Recursion ]
5842 * A parsed entity must not contain a recursive reference
5843 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005844 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005845 */
5846
5847 } else {
5848 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5849 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5850 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005851 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005852 ctxt->wellFormed = 0;
5853 ctxt->disableSAX = 1;
5854 }
5855 xmlFree(name);
5856 }
5857 }
5858 *str = ptr;
5859 return(ent);
5860}
5861
5862/**
5863 * xmlParsePEReference:
5864 * @ctxt: an XML parser context
5865 *
5866 * parse PEReference declarations
5867 * The entity content is handled directly by pushing it's content as
5868 * a new input stream.
5869 *
5870 * [69] PEReference ::= '%' Name ';'
5871 *
5872 * [ WFC: No Recursion ]
5873 * A parsed entity must not contain a recursive
5874 * reference to itself, either directly or indirectly.
5875 *
5876 * [ WFC: Entity Declared ]
5877 * In a document without any DTD, a document with only an internal DTD
5878 * subset which contains no parameter entity references, or a document
5879 * with "standalone='yes'", ... ... The declaration of a parameter
5880 * entity must precede any reference to it...
5881 *
5882 * [ VC: Entity Declared ]
5883 * In a document with an external subset or external parameter entities
5884 * with "standalone='no'", ... ... The declaration of a parameter entity
5885 * must precede any reference to it...
5886 *
5887 * [ WFC: In DTD ]
5888 * Parameter-entity references may only appear in the DTD.
5889 * NOTE: misleading but this is handled.
5890 */
5891void
5892xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5893 xmlChar *name;
5894 xmlEntityPtr entity = NULL;
5895 xmlParserInputPtr input;
5896
5897 if (RAW == '%') {
5898 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005899 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005900 if (name == NULL) {
5901 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5903 ctxt->sax->error(ctxt->userData,
5904 "xmlParsePEReference: no name\n");
5905 ctxt->wellFormed = 0;
5906 ctxt->disableSAX = 1;
5907 } else {
5908 if (RAW == ';') {
5909 NEXT;
5910 if ((ctxt->sax != NULL) &&
5911 (ctxt->sax->getParameterEntity != NULL))
5912 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5913 name);
5914 if (entity == NULL) {
5915 /*
5916 * [ WFC: Entity Declared ]
5917 * In a document without any DTD, a document with only an
5918 * internal DTD subset which contains no parameter entity
5919 * references, or a document with "standalone='yes'", ...
5920 * ... The declaration of a parameter entity must precede
5921 * any reference to it...
5922 */
5923 if ((ctxt->standalone == 1) ||
5924 ((ctxt->hasExternalSubset == 0) &&
5925 (ctxt->hasPErefs == 0))) {
5926 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5927 if ((!ctxt->disableSAX) &&
5928 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5929 ctxt->sax->error(ctxt->userData,
5930 "PEReference: %%%s; not found\n", name);
5931 ctxt->wellFormed = 0;
5932 ctxt->disableSAX = 1;
5933 } else {
5934 /*
5935 * [ VC: Entity Declared ]
5936 * In a document with an external subset or external
5937 * parameter entities with "standalone='no'", ...
5938 * ... The declaration of a parameter entity must precede
5939 * any reference to it...
5940 */
5941 if ((!ctxt->disableSAX) &&
5942 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5943 ctxt->sax->warning(ctxt->userData,
5944 "PEReference: %%%s; not found\n", name);
5945 ctxt->valid = 0;
5946 }
5947 } else {
5948 /*
5949 * Internal checking in case the entity quest barfed
5950 */
5951 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5952 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5953 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5954 ctxt->sax->warning(ctxt->userData,
5955 "Internal: %%%s; is not a parameter entity\n", name);
5956 } else {
5957 /*
5958 * TODO !!!
5959 * handle the extra spaces added before and after
5960 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5961 */
5962 input = xmlNewEntityInputStream(ctxt, entity);
5963 xmlPushInput(ctxt, input);
5964 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5965 (RAW == '<') && (NXT(1) == '?') &&
5966 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5967 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5968 xmlParseTextDecl(ctxt);
5969 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5970 /*
5971 * The XML REC instructs us to stop parsing
5972 * right here
5973 */
5974 ctxt->instate = XML_PARSER_EOF;
5975 xmlFree(name);
5976 return;
5977 }
5978 }
5979 if (ctxt->token == 0)
5980 ctxt->token = ' ';
5981 }
5982 }
5983 ctxt->hasPErefs = 1;
5984 } else {
5985 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5987 ctxt->sax->error(ctxt->userData,
5988 "xmlParsePEReference: expecting ';'\n");
5989 ctxt->wellFormed = 0;
5990 ctxt->disableSAX = 1;
5991 }
5992 xmlFree(name);
5993 }
5994 }
5995}
5996
5997/**
5998 * xmlParseStringPEReference:
5999 * @ctxt: an XML parser context
6000 * @str: a pointer to an index in the string
6001 *
6002 * parse PEReference declarations
6003 *
6004 * [69] PEReference ::= '%' Name ';'
6005 *
6006 * [ WFC: No Recursion ]
6007 * A parsed entity must not contain a recursive
6008 * reference to itself, either directly or indirectly.
6009 *
6010 * [ WFC: Entity Declared ]
6011 * In a document without any DTD, a document with only an internal DTD
6012 * subset which contains no parameter entity references, or a document
6013 * with "standalone='yes'", ... ... The declaration of a parameter
6014 * entity must precede any reference to it...
6015 *
6016 * [ VC: Entity Declared ]
6017 * In a document with an external subset or external parameter entities
6018 * with "standalone='no'", ... ... The declaration of a parameter entity
6019 * must precede any reference to it...
6020 *
6021 * [ WFC: In DTD ]
6022 * Parameter-entity references may only appear in the DTD.
6023 * NOTE: misleading but this is handled.
6024 *
6025 * Returns the string of the entity content.
6026 * str is updated to the current value of the index
6027 */
6028xmlEntityPtr
6029xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6030 const xmlChar *ptr;
6031 xmlChar cur;
6032 xmlChar *name;
6033 xmlEntityPtr entity = NULL;
6034
6035 if ((str == NULL) || (*str == NULL)) return(NULL);
6036 ptr = *str;
6037 cur = *ptr;
6038 if (cur == '%') {
6039 ptr++;
6040 cur = *ptr;
6041 name = xmlParseStringName(ctxt, &ptr);
6042 if (name == NULL) {
6043 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6045 ctxt->sax->error(ctxt->userData,
6046 "xmlParseStringPEReference: no name\n");
6047 ctxt->wellFormed = 0;
6048 ctxt->disableSAX = 1;
6049 } else {
6050 cur = *ptr;
6051 if (cur == ';') {
6052 ptr++;
6053 cur = *ptr;
6054 if ((ctxt->sax != NULL) &&
6055 (ctxt->sax->getParameterEntity != NULL))
6056 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6057 name);
6058 if (entity == NULL) {
6059 /*
6060 * [ WFC: Entity Declared ]
6061 * In a document without any DTD, a document with only an
6062 * internal DTD subset which contains no parameter entity
6063 * references, or a document with "standalone='yes'", ...
6064 * ... The declaration of a parameter entity must precede
6065 * any reference to it...
6066 */
6067 if ((ctxt->standalone == 1) ||
6068 ((ctxt->hasExternalSubset == 0) &&
6069 (ctxt->hasPErefs == 0))) {
6070 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6072 ctxt->sax->error(ctxt->userData,
6073 "PEReference: %%%s; not found\n", name);
6074 ctxt->wellFormed = 0;
6075 ctxt->disableSAX = 1;
6076 } else {
6077 /*
6078 * [ VC: Entity Declared ]
6079 * In a document with an external subset or external
6080 * parameter entities with "standalone='no'", ...
6081 * ... The declaration of a parameter entity must
6082 * precede any reference to it...
6083 */
6084 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6085 ctxt->sax->warning(ctxt->userData,
6086 "PEReference: %%%s; not found\n", name);
6087 ctxt->valid = 0;
6088 }
6089 } else {
6090 /*
6091 * Internal checking in case the entity quest barfed
6092 */
6093 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6094 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6095 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6096 ctxt->sax->warning(ctxt->userData,
6097 "Internal: %%%s; is not a parameter entity\n", name);
6098 }
6099 }
6100 ctxt->hasPErefs = 1;
6101 } else {
6102 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6104 ctxt->sax->error(ctxt->userData,
6105 "xmlParseStringPEReference: expecting ';'\n");
6106 ctxt->wellFormed = 0;
6107 ctxt->disableSAX = 1;
6108 }
6109 xmlFree(name);
6110 }
6111 }
6112 *str = ptr;
6113 return(entity);
6114}
6115
6116/**
6117 * xmlParseDocTypeDecl:
6118 * @ctxt: an XML parser context
6119 *
6120 * parse a DOCTYPE declaration
6121 *
6122 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6123 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6124 *
6125 * [ VC: Root Element Type ]
6126 * The Name in the document type declaration must match the element
6127 * type of the root element.
6128 */
6129
6130void
6131xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6132 xmlChar *name = NULL;
6133 xmlChar *ExternalID = NULL;
6134 xmlChar *URI = NULL;
6135
6136 /*
6137 * We know that '<!DOCTYPE' has been detected.
6138 */
6139 SKIP(9);
6140
6141 SKIP_BLANKS;
6142
6143 /*
6144 * Parse the DOCTYPE name.
6145 */
6146 name = xmlParseName(ctxt);
6147 if (name == NULL) {
6148 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6150 ctxt->sax->error(ctxt->userData,
6151 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6152 ctxt->wellFormed = 0;
6153 ctxt->disableSAX = 1;
6154 }
6155 ctxt->intSubName = name;
6156
6157 SKIP_BLANKS;
6158
6159 /*
6160 * Check for SystemID and ExternalID
6161 */
6162 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6163
6164 if ((URI != NULL) || (ExternalID != NULL)) {
6165 ctxt->hasExternalSubset = 1;
6166 }
6167 ctxt->extSubURI = URI;
6168 ctxt->extSubSystem = ExternalID;
6169
6170 SKIP_BLANKS;
6171
6172 /*
6173 * Create and update the internal subset.
6174 */
6175 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6176 (!ctxt->disableSAX))
6177 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6178
6179 /*
6180 * Is there any internal subset declarations ?
6181 * they are handled separately in xmlParseInternalSubset()
6182 */
6183 if (RAW == '[')
6184 return;
6185
6186 /*
6187 * We should be at the end of the DOCTYPE declaration.
6188 */
6189 if (RAW != '>') {
6190 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006192 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006193 ctxt->wellFormed = 0;
6194 ctxt->disableSAX = 1;
6195 }
6196 NEXT;
6197}
6198
6199/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006200 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006201 * @ctxt: an XML parser context
6202 *
6203 * parse the internal subset declaration
6204 *
6205 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6206 */
6207
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006208static void
Owen Taylor3473f882001-02-23 17:55:21 +00006209xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6210 /*
6211 * Is there any DTD definition ?
6212 */
6213 if (RAW == '[') {
6214 ctxt->instate = XML_PARSER_DTD;
6215 NEXT;
6216 /*
6217 * Parse the succession of Markup declarations and
6218 * PEReferences.
6219 * Subsequence (markupdecl | PEReference | S)*
6220 */
6221 while (RAW != ']') {
6222 const xmlChar *check = CUR_PTR;
6223 int cons = ctxt->input->consumed;
6224
6225 SKIP_BLANKS;
6226 xmlParseMarkupDecl(ctxt);
6227 xmlParsePEReference(ctxt);
6228
6229 /*
6230 * Pop-up of finished entities.
6231 */
6232 while ((RAW == 0) && (ctxt->inputNr > 1))
6233 xmlPopInput(ctxt);
6234
6235 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6236 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6238 ctxt->sax->error(ctxt->userData,
6239 "xmlParseInternalSubset: error detected in Markup declaration\n");
6240 ctxt->wellFormed = 0;
6241 ctxt->disableSAX = 1;
6242 break;
6243 }
6244 }
6245 if (RAW == ']') {
6246 NEXT;
6247 SKIP_BLANKS;
6248 }
6249 }
6250
6251 /*
6252 * We should be at the end of the DOCTYPE declaration.
6253 */
6254 if (RAW != '>') {
6255 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006257 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006258 ctxt->wellFormed = 0;
6259 ctxt->disableSAX = 1;
6260 }
6261 NEXT;
6262}
6263
6264/**
6265 * xmlParseAttribute:
6266 * @ctxt: an XML parser context
6267 * @value: a xmlChar ** used to store the value of the attribute
6268 *
6269 * parse an attribute
6270 *
6271 * [41] Attribute ::= Name Eq AttValue
6272 *
6273 * [ WFC: No External Entity References ]
6274 * Attribute values cannot contain direct or indirect entity references
6275 * to external entities.
6276 *
6277 * [ WFC: No < in Attribute Values ]
6278 * The replacement text of any entity referred to directly or indirectly in
6279 * an attribute value (other than "&lt;") must not contain a <.
6280 *
6281 * [ VC: Attribute Value Type ]
6282 * The attribute must have been declared; the value must be of the type
6283 * declared for it.
6284 *
6285 * [25] Eq ::= S? '=' S?
6286 *
6287 * With namespace:
6288 *
6289 * [NS 11] Attribute ::= QName Eq AttValue
6290 *
6291 * Also the case QName == xmlns:??? is handled independently as a namespace
6292 * definition.
6293 *
6294 * Returns the attribute name, and the value in *value.
6295 */
6296
6297xmlChar *
6298xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6299 xmlChar *name, *val;
6300
6301 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006302 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006303 name = xmlParseName(ctxt);
6304 if (name == NULL) {
6305 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6307 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6308 ctxt->wellFormed = 0;
6309 ctxt->disableSAX = 1;
6310 return(NULL);
6311 }
6312
6313 /*
6314 * read the value
6315 */
6316 SKIP_BLANKS;
6317 if (RAW == '=') {
6318 NEXT;
6319 SKIP_BLANKS;
6320 val = xmlParseAttValue(ctxt);
6321 ctxt->instate = XML_PARSER_CONTENT;
6322 } else {
6323 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6325 ctxt->sax->error(ctxt->userData,
6326 "Specification mandate value for attribute %s\n", name);
6327 ctxt->wellFormed = 0;
6328 ctxt->disableSAX = 1;
6329 xmlFree(name);
6330 return(NULL);
6331 }
6332
6333 /*
6334 * Check that xml:lang conforms to the specification
6335 * No more registered as an error, just generate a warning now
6336 * since this was deprecated in XML second edition
6337 */
6338 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6339 if (!xmlCheckLanguageID(val)) {
6340 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6341 ctxt->sax->warning(ctxt->userData,
6342 "Malformed value for xml:lang : %s\n", val);
6343 }
6344 }
6345
6346 /*
6347 * Check that xml:space conforms to the specification
6348 */
6349 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6350 if (xmlStrEqual(val, BAD_CAST "default"))
6351 *(ctxt->space) = 0;
6352 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6353 *(ctxt->space) = 1;
6354 else {
6355 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6357 ctxt->sax->error(ctxt->userData,
6358"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6359 val);
6360 ctxt->wellFormed = 0;
6361 ctxt->disableSAX = 1;
6362 }
6363 }
6364
6365 *value = val;
6366 return(name);
6367}
6368
6369/**
6370 * xmlParseStartTag:
6371 * @ctxt: an XML parser context
6372 *
6373 * parse a start of tag either for rule element or
6374 * EmptyElement. In both case we don't parse the tag closing chars.
6375 *
6376 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6377 *
6378 * [ WFC: Unique Att Spec ]
6379 * No attribute name may appear more than once in the same start-tag or
6380 * empty-element tag.
6381 *
6382 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6383 *
6384 * [ WFC: Unique Att Spec ]
6385 * No attribute name may appear more than once in the same start-tag or
6386 * empty-element tag.
6387 *
6388 * With namespace:
6389 *
6390 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6391 *
6392 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6393 *
6394 * Returns the element name parsed
6395 */
6396
6397xmlChar *
6398xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6399 xmlChar *name;
6400 xmlChar *attname;
6401 xmlChar *attvalue;
6402 const xmlChar **atts = NULL;
6403 int nbatts = 0;
6404 int maxatts = 0;
6405 int i;
6406
6407 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006408 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006409
6410 name = xmlParseName(ctxt);
6411 if (name == NULL) {
6412 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6414 ctxt->sax->error(ctxt->userData,
6415 "xmlParseStartTag: invalid element name\n");
6416 ctxt->wellFormed = 0;
6417 ctxt->disableSAX = 1;
6418 return(NULL);
6419 }
6420
6421 /*
6422 * Now parse the attributes, it ends up with the ending
6423 *
6424 * (S Attribute)* S?
6425 */
6426 SKIP_BLANKS;
6427 GROW;
6428
Daniel Veillard21a0f912001-02-25 19:54:14 +00006429 while ((RAW != '>') &&
6430 ((RAW != '/') || (NXT(1) != '>')) &&
6431 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006432 const xmlChar *q = CUR_PTR;
6433 int cons = ctxt->input->consumed;
6434
6435 attname = xmlParseAttribute(ctxt, &attvalue);
6436 if ((attname != NULL) && (attvalue != NULL)) {
6437 /*
6438 * [ WFC: Unique Att Spec ]
6439 * No attribute name may appear more than once in the same
6440 * start-tag or empty-element tag.
6441 */
6442 for (i = 0; i < nbatts;i += 2) {
6443 if (xmlStrEqual(atts[i], attname)) {
6444 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6445 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6446 ctxt->sax->error(ctxt->userData,
6447 "Attribute %s redefined\n",
6448 attname);
6449 ctxt->wellFormed = 0;
6450 ctxt->disableSAX = 1;
6451 xmlFree(attname);
6452 xmlFree(attvalue);
6453 goto failed;
6454 }
6455 }
6456
6457 /*
6458 * Add the pair to atts
6459 */
6460 if (atts == NULL) {
6461 maxatts = 10;
6462 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6463 if (atts == NULL) {
6464 xmlGenericError(xmlGenericErrorContext,
6465 "malloc of %ld byte failed\n",
6466 maxatts * (long)sizeof(xmlChar *));
6467 return(NULL);
6468 }
6469 } else if (nbatts + 4 > maxatts) {
6470 maxatts *= 2;
6471 atts = (const xmlChar **) xmlRealloc((void *) atts,
6472 maxatts * sizeof(xmlChar *));
6473 if (atts == NULL) {
6474 xmlGenericError(xmlGenericErrorContext,
6475 "realloc of %ld byte failed\n",
6476 maxatts * (long)sizeof(xmlChar *));
6477 return(NULL);
6478 }
6479 }
6480 atts[nbatts++] = attname;
6481 atts[nbatts++] = attvalue;
6482 atts[nbatts] = NULL;
6483 atts[nbatts + 1] = NULL;
6484 } else {
6485 if (attname != NULL)
6486 xmlFree(attname);
6487 if (attvalue != NULL)
6488 xmlFree(attvalue);
6489 }
6490
6491failed:
6492
6493 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6494 break;
6495 if (!IS_BLANK(RAW)) {
6496 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6498 ctxt->sax->error(ctxt->userData,
6499 "attributes construct error\n");
6500 ctxt->wellFormed = 0;
6501 ctxt->disableSAX = 1;
6502 }
6503 SKIP_BLANKS;
6504 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6505 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6507 ctxt->sax->error(ctxt->userData,
6508 "xmlParseStartTag: problem parsing attributes\n");
6509 ctxt->wellFormed = 0;
6510 ctxt->disableSAX = 1;
6511 break;
6512 }
6513 GROW;
6514 }
6515
6516 /*
6517 * SAX: Start of Element !
6518 */
6519 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6520 (!ctxt->disableSAX))
6521 ctxt->sax->startElement(ctxt->userData, name, atts);
6522
6523 if (atts != NULL) {
6524 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6525 xmlFree((void *) atts);
6526 }
6527 return(name);
6528}
6529
6530/**
6531 * xmlParseEndTag:
6532 * @ctxt: an XML parser context
6533 *
6534 * parse an end of tag
6535 *
6536 * [42] ETag ::= '</' Name S? '>'
6537 *
6538 * With namespace
6539 *
6540 * [NS 9] ETag ::= '</' QName S? '>'
6541 */
6542
6543void
6544xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6545 xmlChar *name;
6546 xmlChar *oldname;
6547
6548 GROW;
6549 if ((RAW != '<') || (NXT(1) != '/')) {
6550 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6552 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6553 ctxt->wellFormed = 0;
6554 ctxt->disableSAX = 1;
6555 return;
6556 }
6557 SKIP(2);
6558
6559 name = xmlParseName(ctxt);
6560
6561 /*
6562 * We should definitely be at the ending "S? '>'" part
6563 */
6564 GROW;
6565 SKIP_BLANKS;
6566 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6567 ctxt->errNo = XML_ERR_GT_REQUIRED;
6568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6569 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6570 ctxt->wellFormed = 0;
6571 ctxt->disableSAX = 1;
6572 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006573 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006574
6575 /*
6576 * [ WFC: Element Type Match ]
6577 * The Name in an element's end-tag must match the element type in the
6578 * start-tag.
6579 *
6580 */
6581 if ((name == NULL) || (ctxt->name == NULL) ||
6582 (!xmlStrEqual(name, ctxt->name))) {
6583 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6585 if ((name != NULL) && (ctxt->name != NULL)) {
6586 ctxt->sax->error(ctxt->userData,
6587 "Opening and ending tag mismatch: %s and %s\n",
6588 ctxt->name, name);
6589 } else if (ctxt->name != NULL) {
6590 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006591 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006592 } else {
6593 ctxt->sax->error(ctxt->userData,
6594 "Ending tag error: internal error ???\n");
6595 }
6596
6597 }
6598 ctxt->wellFormed = 0;
6599 ctxt->disableSAX = 1;
6600 }
6601
6602 /*
6603 * SAX: End of Tag
6604 */
6605 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6606 (!ctxt->disableSAX))
6607 ctxt->sax->endElement(ctxt->userData, name);
6608
6609 if (name != NULL)
6610 xmlFree(name);
6611 oldname = namePop(ctxt);
6612 spacePop(ctxt);
6613 if (oldname != NULL) {
6614#ifdef DEBUG_STACK
6615 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6616#endif
6617 xmlFree(oldname);
6618 }
6619 return;
6620}
6621
6622/**
6623 * xmlParseCDSect:
6624 * @ctxt: an XML parser context
6625 *
6626 * Parse escaped pure raw content.
6627 *
6628 * [18] CDSect ::= CDStart CData CDEnd
6629 *
6630 * [19] CDStart ::= '<![CDATA['
6631 *
6632 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6633 *
6634 * [21] CDEnd ::= ']]>'
6635 */
6636void
6637xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6638 xmlChar *buf = NULL;
6639 int len = 0;
6640 int size = XML_PARSER_BUFFER_SIZE;
6641 int r, rl;
6642 int s, sl;
6643 int cur, l;
6644 int count = 0;
6645
6646 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6647 (NXT(2) == '[') && (NXT(3) == 'C') &&
6648 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6649 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6650 (NXT(8) == '[')) {
6651 SKIP(9);
6652 } else
6653 return;
6654
6655 ctxt->instate = XML_PARSER_CDATA_SECTION;
6656 r = CUR_CHAR(rl);
6657 if (!IS_CHAR(r)) {
6658 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6660 ctxt->sax->error(ctxt->userData,
6661 "CData section not finished\n");
6662 ctxt->wellFormed = 0;
6663 ctxt->disableSAX = 1;
6664 ctxt->instate = XML_PARSER_CONTENT;
6665 return;
6666 }
6667 NEXTL(rl);
6668 s = CUR_CHAR(sl);
6669 if (!IS_CHAR(s)) {
6670 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6672 ctxt->sax->error(ctxt->userData,
6673 "CData section not finished\n");
6674 ctxt->wellFormed = 0;
6675 ctxt->disableSAX = 1;
6676 ctxt->instate = XML_PARSER_CONTENT;
6677 return;
6678 }
6679 NEXTL(sl);
6680 cur = CUR_CHAR(l);
6681 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6682 if (buf == NULL) {
6683 xmlGenericError(xmlGenericErrorContext,
6684 "malloc of %d byte failed\n", size);
6685 return;
6686 }
6687 while (IS_CHAR(cur) &&
6688 ((r != ']') || (s != ']') || (cur != '>'))) {
6689 if (len + 5 >= size) {
6690 size *= 2;
6691 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6692 if (buf == NULL) {
6693 xmlGenericError(xmlGenericErrorContext,
6694 "realloc of %d byte failed\n", size);
6695 return;
6696 }
6697 }
6698 COPY_BUF(rl,buf,len,r);
6699 r = s;
6700 rl = sl;
6701 s = cur;
6702 sl = l;
6703 count++;
6704 if (count > 50) {
6705 GROW;
6706 count = 0;
6707 }
6708 NEXTL(l);
6709 cur = CUR_CHAR(l);
6710 }
6711 buf[len] = 0;
6712 ctxt->instate = XML_PARSER_CONTENT;
6713 if (cur != '>') {
6714 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6716 ctxt->sax->error(ctxt->userData,
6717 "CData section not finished\n%.50s\n", buf);
6718 ctxt->wellFormed = 0;
6719 ctxt->disableSAX = 1;
6720 xmlFree(buf);
6721 return;
6722 }
6723 NEXTL(l);
6724
6725 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006726 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006727 */
6728 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6729 if (ctxt->sax->cdataBlock != NULL)
6730 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006731 else if (ctxt->sax->characters != NULL)
6732 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006733 }
6734 xmlFree(buf);
6735}
6736
6737/**
6738 * xmlParseContent:
6739 * @ctxt: an XML parser context
6740 *
6741 * Parse a content:
6742 *
6743 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6744 */
6745
6746void
6747xmlParseContent(xmlParserCtxtPtr ctxt) {
6748 GROW;
Daniel Veillard561b7f82002-03-20 21:55:57 +00006749 while (((RAW != 0) || (ctxt->token != 0)) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006750 ((RAW != '<') || (NXT(1) != '/'))) {
6751 const xmlChar *test = CUR_PTR;
6752 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006753 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006754 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006755
6756 /*
6757 * Handle possible processed charrefs.
6758 */
6759 if (ctxt->token != 0) {
6760 xmlParseCharData(ctxt, 0);
6761 }
6762 /*
6763 * First case : a Processing Instruction.
6764 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006765 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006766 xmlParsePI(ctxt);
6767 }
6768
6769 /*
6770 * Second case : a CDSection
6771 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006772 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006773 (NXT(2) == '[') && (NXT(3) == 'C') &&
6774 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6775 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6776 (NXT(8) == '[')) {
6777 xmlParseCDSect(ctxt);
6778 }
6779
6780 /*
6781 * Third case : a comment
6782 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006783 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006784 (NXT(2) == '-') && (NXT(3) == '-')) {
6785 xmlParseComment(ctxt);
6786 ctxt->instate = XML_PARSER_CONTENT;
6787 }
6788
6789 /*
6790 * Fourth case : a sub-element.
6791 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006792 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006793 xmlParseElement(ctxt);
6794 }
6795
6796 /*
6797 * Fifth case : a reference. If if has not been resolved,
6798 * parsing returns it's Name, create the node
6799 */
6800
Daniel Veillard21a0f912001-02-25 19:54:14 +00006801 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006802 xmlParseReference(ctxt);
6803 }
6804
6805 /*
6806 * Last case, text. Note that References are handled directly.
6807 */
6808 else {
6809 xmlParseCharData(ctxt, 0);
6810 }
6811
6812 GROW;
6813 /*
6814 * Pop-up of finished entities.
6815 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00006816 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00006817 xmlPopInput(ctxt);
6818 SHRINK;
6819
6820 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6821 (tok == ctxt->token)) {
6822 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6824 ctxt->sax->error(ctxt->userData,
6825 "detected an error in element content\n");
6826 ctxt->wellFormed = 0;
6827 ctxt->disableSAX = 1;
6828 ctxt->instate = XML_PARSER_EOF;
6829 break;
6830 }
6831 }
6832}
6833
6834/**
6835 * xmlParseElement:
6836 * @ctxt: an XML parser context
6837 *
6838 * parse an XML element, this is highly recursive
6839 *
6840 * [39] element ::= EmptyElemTag | STag content ETag
6841 *
6842 * [ WFC: Element Type Match ]
6843 * The Name in an element's end-tag must match the element type in the
6844 * start-tag.
6845 *
6846 * [ VC: Element Valid ]
6847 * An element is valid if there is a declaration matching elementdecl
6848 * where the Name matches the element type and one of the following holds:
6849 * - The declaration matches EMPTY and the element has no content.
6850 * - The declaration matches children and the sequence of child elements
6851 * belongs to the language generated by the regular expression in the
6852 * content model, with optional white space (characters matching the
6853 * nonterminal S) between each pair of child elements.
6854 * - The declaration matches Mixed and the content consists of character
6855 * data and child elements whose types match names in the content model.
6856 * - The declaration matches ANY, and the types of any child elements have
6857 * been declared.
6858 */
6859
6860void
6861xmlParseElement(xmlParserCtxtPtr ctxt) {
6862 const xmlChar *openTag = CUR_PTR;
6863 xmlChar *name;
6864 xmlChar *oldname;
6865 xmlParserNodeInfo node_info;
6866 xmlNodePtr ret;
6867
6868 /* Capture start position */
6869 if (ctxt->record_info) {
6870 node_info.begin_pos = ctxt->input->consumed +
6871 (CUR_PTR - ctxt->input->base);
6872 node_info.begin_line = ctxt->input->line;
6873 }
6874
6875 if (ctxt->spaceNr == 0)
6876 spacePush(ctxt, -1);
6877 else
6878 spacePush(ctxt, *ctxt->space);
6879
6880 name = xmlParseStartTag(ctxt);
6881 if (name == NULL) {
6882 spacePop(ctxt);
6883 return;
6884 }
6885 namePush(ctxt, name);
6886 ret = ctxt->node;
6887
6888 /*
6889 * [ VC: Root Element Type ]
6890 * The Name in the document type declaration must match the element
6891 * type of the root element.
6892 */
6893 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6894 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6895 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6896
6897 /*
6898 * Check for an Empty Element.
6899 */
6900 if ((RAW == '/') && (NXT(1) == '>')) {
6901 SKIP(2);
6902 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6903 (!ctxt->disableSAX))
6904 ctxt->sax->endElement(ctxt->userData, name);
6905 oldname = namePop(ctxt);
6906 spacePop(ctxt);
6907 if (oldname != NULL) {
6908#ifdef DEBUG_STACK
6909 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6910#endif
6911 xmlFree(oldname);
6912 }
6913 if ( ret != NULL && ctxt->record_info ) {
6914 node_info.end_pos = ctxt->input->consumed +
6915 (CUR_PTR - ctxt->input->base);
6916 node_info.end_line = ctxt->input->line;
6917 node_info.node = ret;
6918 xmlParserAddNodeInfo(ctxt, &node_info);
6919 }
6920 return;
6921 }
6922 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006923 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006924 } else {
6925 ctxt->errNo = XML_ERR_GT_REQUIRED;
6926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6927 ctxt->sax->error(ctxt->userData,
6928 "Couldn't find end of Start Tag\n%.30s\n",
6929 openTag);
6930 ctxt->wellFormed = 0;
6931 ctxt->disableSAX = 1;
6932
6933 /*
6934 * end of parsing of this node.
6935 */
6936 nodePop(ctxt);
6937 oldname = namePop(ctxt);
6938 spacePop(ctxt);
6939 if (oldname != NULL) {
6940#ifdef DEBUG_STACK
6941 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6942#endif
6943 xmlFree(oldname);
6944 }
6945
6946 /*
6947 * Capture end position and add node
6948 */
6949 if ( ret != NULL && ctxt->record_info ) {
6950 node_info.end_pos = ctxt->input->consumed +
6951 (CUR_PTR - ctxt->input->base);
6952 node_info.end_line = ctxt->input->line;
6953 node_info.node = ret;
6954 xmlParserAddNodeInfo(ctxt, &node_info);
6955 }
6956 return;
6957 }
6958
6959 /*
6960 * Parse the content of the element:
6961 */
6962 xmlParseContent(ctxt);
6963 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00006964 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00006965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6966 ctxt->sax->error(ctxt->userData,
6967 "Premature end of data in tag %.30s\n", openTag);
6968 ctxt->wellFormed = 0;
6969 ctxt->disableSAX = 1;
6970
6971 /*
6972 * end of parsing of this node.
6973 */
6974 nodePop(ctxt);
6975 oldname = namePop(ctxt);
6976 spacePop(ctxt);
6977 if (oldname != NULL) {
6978#ifdef DEBUG_STACK
6979 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6980#endif
6981 xmlFree(oldname);
6982 }
6983 return;
6984 }
6985
6986 /*
6987 * parse the end of tag: '</' should be here.
6988 */
6989 xmlParseEndTag(ctxt);
6990
6991 /*
6992 * Capture end position and add node
6993 */
6994 if ( ret != NULL && ctxt->record_info ) {
6995 node_info.end_pos = ctxt->input->consumed +
6996 (CUR_PTR - ctxt->input->base);
6997 node_info.end_line = ctxt->input->line;
6998 node_info.node = ret;
6999 xmlParserAddNodeInfo(ctxt, &node_info);
7000 }
7001}
7002
7003/**
7004 * xmlParseVersionNum:
7005 * @ctxt: an XML parser context
7006 *
7007 * parse the XML version value.
7008 *
7009 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7010 *
7011 * Returns the string giving the XML version number, or NULL
7012 */
7013xmlChar *
7014xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7015 xmlChar *buf = NULL;
7016 int len = 0;
7017 int size = 10;
7018 xmlChar cur;
7019
7020 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7021 if (buf == NULL) {
7022 xmlGenericError(xmlGenericErrorContext,
7023 "malloc of %d byte failed\n", size);
7024 return(NULL);
7025 }
7026 cur = CUR;
7027 while (((cur >= 'a') && (cur <= 'z')) ||
7028 ((cur >= 'A') && (cur <= 'Z')) ||
7029 ((cur >= '0') && (cur <= '9')) ||
7030 (cur == '_') || (cur == '.') ||
7031 (cur == ':') || (cur == '-')) {
7032 if (len + 1 >= size) {
7033 size *= 2;
7034 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7035 if (buf == NULL) {
7036 xmlGenericError(xmlGenericErrorContext,
7037 "realloc of %d byte failed\n", size);
7038 return(NULL);
7039 }
7040 }
7041 buf[len++] = cur;
7042 NEXT;
7043 cur=CUR;
7044 }
7045 buf[len] = 0;
7046 return(buf);
7047}
7048
7049/**
7050 * xmlParseVersionInfo:
7051 * @ctxt: an XML parser context
7052 *
7053 * parse the XML version.
7054 *
7055 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7056 *
7057 * [25] Eq ::= S? '=' S?
7058 *
7059 * Returns the version string, e.g. "1.0"
7060 */
7061
7062xmlChar *
7063xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7064 xmlChar *version = NULL;
7065 const xmlChar *q;
7066
7067 if ((RAW == 'v') && (NXT(1) == 'e') &&
7068 (NXT(2) == 'r') && (NXT(3) == 's') &&
7069 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7070 (NXT(6) == 'n')) {
7071 SKIP(7);
7072 SKIP_BLANKS;
7073 if (RAW != '=') {
7074 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7076 ctxt->sax->error(ctxt->userData,
7077 "xmlParseVersionInfo : expected '='\n");
7078 ctxt->wellFormed = 0;
7079 ctxt->disableSAX = 1;
7080 return(NULL);
7081 }
7082 NEXT;
7083 SKIP_BLANKS;
7084 if (RAW == '"') {
7085 NEXT;
7086 q = CUR_PTR;
7087 version = xmlParseVersionNum(ctxt);
7088 if (RAW != '"') {
7089 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7091 ctxt->sax->error(ctxt->userData,
7092 "String not closed\n%.50s\n", q);
7093 ctxt->wellFormed = 0;
7094 ctxt->disableSAX = 1;
7095 } else
7096 NEXT;
7097 } else if (RAW == '\''){
7098 NEXT;
7099 q = CUR_PTR;
7100 version = xmlParseVersionNum(ctxt);
7101 if (RAW != '\'') {
7102 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7104 ctxt->sax->error(ctxt->userData,
7105 "String not closed\n%.50s\n", q);
7106 ctxt->wellFormed = 0;
7107 ctxt->disableSAX = 1;
7108 } else
7109 NEXT;
7110 } else {
7111 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7112 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7113 ctxt->sax->error(ctxt->userData,
7114 "xmlParseVersionInfo : expected ' or \"\n");
7115 ctxt->wellFormed = 0;
7116 ctxt->disableSAX = 1;
7117 }
7118 }
7119 return(version);
7120}
7121
7122/**
7123 * xmlParseEncName:
7124 * @ctxt: an XML parser context
7125 *
7126 * parse the XML encoding name
7127 *
7128 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7129 *
7130 * Returns the encoding name value or NULL
7131 */
7132xmlChar *
7133xmlParseEncName(xmlParserCtxtPtr ctxt) {
7134 xmlChar *buf = NULL;
7135 int len = 0;
7136 int size = 10;
7137 xmlChar cur;
7138
7139 cur = CUR;
7140 if (((cur >= 'a') && (cur <= 'z')) ||
7141 ((cur >= 'A') && (cur <= 'Z'))) {
7142 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7143 if (buf == NULL) {
7144 xmlGenericError(xmlGenericErrorContext,
7145 "malloc of %d byte failed\n", size);
7146 return(NULL);
7147 }
7148
7149 buf[len++] = cur;
7150 NEXT;
7151 cur = CUR;
7152 while (((cur >= 'a') && (cur <= 'z')) ||
7153 ((cur >= 'A') && (cur <= 'Z')) ||
7154 ((cur >= '0') && (cur <= '9')) ||
7155 (cur == '.') || (cur == '_') ||
7156 (cur == '-')) {
7157 if (len + 1 >= size) {
7158 size *= 2;
7159 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7160 if (buf == NULL) {
7161 xmlGenericError(xmlGenericErrorContext,
7162 "realloc of %d byte failed\n", size);
7163 return(NULL);
7164 }
7165 }
7166 buf[len++] = cur;
7167 NEXT;
7168 cur = CUR;
7169 if (cur == 0) {
7170 SHRINK;
7171 GROW;
7172 cur = CUR;
7173 }
7174 }
7175 buf[len] = 0;
7176 } else {
7177 ctxt->errNo = XML_ERR_ENCODING_NAME;
7178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7179 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7180 ctxt->wellFormed = 0;
7181 ctxt->disableSAX = 1;
7182 }
7183 return(buf);
7184}
7185
7186/**
7187 * xmlParseEncodingDecl:
7188 * @ctxt: an XML parser context
7189 *
7190 * parse the XML encoding declaration
7191 *
7192 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7193 *
7194 * this setups the conversion filters.
7195 *
7196 * Returns the encoding value or NULL
7197 */
7198
7199xmlChar *
7200xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7201 xmlChar *encoding = NULL;
7202 const xmlChar *q;
7203
7204 SKIP_BLANKS;
7205 if ((RAW == 'e') && (NXT(1) == 'n') &&
7206 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7207 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7208 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7209 SKIP(8);
7210 SKIP_BLANKS;
7211 if (RAW != '=') {
7212 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7214 ctxt->sax->error(ctxt->userData,
7215 "xmlParseEncodingDecl : expected '='\n");
7216 ctxt->wellFormed = 0;
7217 ctxt->disableSAX = 1;
7218 return(NULL);
7219 }
7220 NEXT;
7221 SKIP_BLANKS;
7222 if (RAW == '"') {
7223 NEXT;
7224 q = CUR_PTR;
7225 encoding = xmlParseEncName(ctxt);
7226 if (RAW != '"') {
7227 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7229 ctxt->sax->error(ctxt->userData,
7230 "String not closed\n%.50s\n", q);
7231 ctxt->wellFormed = 0;
7232 ctxt->disableSAX = 1;
7233 } else
7234 NEXT;
7235 } else if (RAW == '\''){
7236 NEXT;
7237 q = CUR_PTR;
7238 encoding = xmlParseEncName(ctxt);
7239 if (RAW != '\'') {
7240 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7242 ctxt->sax->error(ctxt->userData,
7243 "String not closed\n%.50s\n", q);
7244 ctxt->wellFormed = 0;
7245 ctxt->disableSAX = 1;
7246 } else
7247 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007248 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007249 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7251 ctxt->sax->error(ctxt->userData,
7252 "xmlParseEncodingDecl : expected ' or \"\n");
7253 ctxt->wellFormed = 0;
7254 ctxt->disableSAX = 1;
7255 }
7256 if (encoding != NULL) {
7257 xmlCharEncoding enc;
7258 xmlCharEncodingHandlerPtr handler;
7259
7260 if (ctxt->input->encoding != NULL)
7261 xmlFree((xmlChar *) ctxt->input->encoding);
7262 ctxt->input->encoding = encoding;
7263
7264 enc = xmlParseCharEncoding((const char *) encoding);
7265 /*
7266 * registered set of known encodings
7267 */
7268 if (enc != XML_CHAR_ENCODING_ERROR) {
7269 xmlSwitchEncoding(ctxt, enc);
7270 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007271 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007272 xmlFree(encoding);
7273 return(NULL);
7274 }
7275 } else {
7276 /*
7277 * fallback for unknown encodings
7278 */
7279 handler = xmlFindCharEncodingHandler((const char *) encoding);
7280 if (handler != NULL) {
7281 xmlSwitchToEncoding(ctxt, handler);
7282 } else {
7283 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7285 ctxt->sax->error(ctxt->userData,
7286 "Unsupported encoding %s\n", encoding);
7287 return(NULL);
7288 }
7289 }
7290 }
7291 }
7292 return(encoding);
7293}
7294
7295/**
7296 * xmlParseSDDecl:
7297 * @ctxt: an XML parser context
7298 *
7299 * parse the XML standalone declaration
7300 *
7301 * [32] SDDecl ::= S 'standalone' Eq
7302 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7303 *
7304 * [ VC: Standalone Document Declaration ]
7305 * TODO The standalone document declaration must have the value "no"
7306 * if any external markup declarations contain declarations of:
7307 * - attributes with default values, if elements to which these
7308 * attributes apply appear in the document without specifications
7309 * of values for these attributes, or
7310 * - entities (other than amp, lt, gt, apos, quot), if references
7311 * to those entities appear in the document, or
7312 * - attributes with values subject to normalization, where the
7313 * attribute appears in the document with a value which will change
7314 * as a result of normalization, or
7315 * - element types with element content, if white space occurs directly
7316 * within any instance of those types.
7317 *
7318 * Returns 1 if standalone, 0 otherwise
7319 */
7320
7321int
7322xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7323 int standalone = -1;
7324
7325 SKIP_BLANKS;
7326 if ((RAW == 's') && (NXT(1) == 't') &&
7327 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7328 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7329 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7330 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7331 SKIP(10);
7332 SKIP_BLANKS;
7333 if (RAW != '=') {
7334 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7335 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7336 ctxt->sax->error(ctxt->userData,
7337 "XML standalone declaration : expected '='\n");
7338 ctxt->wellFormed = 0;
7339 ctxt->disableSAX = 1;
7340 return(standalone);
7341 }
7342 NEXT;
7343 SKIP_BLANKS;
7344 if (RAW == '\''){
7345 NEXT;
7346 if ((RAW == 'n') && (NXT(1) == 'o')) {
7347 standalone = 0;
7348 SKIP(2);
7349 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7350 (NXT(2) == 's')) {
7351 standalone = 1;
7352 SKIP(3);
7353 } else {
7354 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7356 ctxt->sax->error(ctxt->userData,
7357 "standalone accepts only 'yes' or 'no'\n");
7358 ctxt->wellFormed = 0;
7359 ctxt->disableSAX = 1;
7360 }
7361 if (RAW != '\'') {
7362 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7364 ctxt->sax->error(ctxt->userData, "String not closed\n");
7365 ctxt->wellFormed = 0;
7366 ctxt->disableSAX = 1;
7367 } else
7368 NEXT;
7369 } else if (RAW == '"'){
7370 NEXT;
7371 if ((RAW == 'n') && (NXT(1) == 'o')) {
7372 standalone = 0;
7373 SKIP(2);
7374 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7375 (NXT(2) == 's')) {
7376 standalone = 1;
7377 SKIP(3);
7378 } else {
7379 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7381 ctxt->sax->error(ctxt->userData,
7382 "standalone accepts only 'yes' or 'no'\n");
7383 ctxt->wellFormed = 0;
7384 ctxt->disableSAX = 1;
7385 }
7386 if (RAW != '"') {
7387 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7389 ctxt->sax->error(ctxt->userData, "String not closed\n");
7390 ctxt->wellFormed = 0;
7391 ctxt->disableSAX = 1;
7392 } else
7393 NEXT;
7394 } else {
7395 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7397 ctxt->sax->error(ctxt->userData,
7398 "Standalone value not found\n");
7399 ctxt->wellFormed = 0;
7400 ctxt->disableSAX = 1;
7401 }
7402 }
7403 return(standalone);
7404}
7405
7406/**
7407 * xmlParseXMLDecl:
7408 * @ctxt: an XML parser context
7409 *
7410 * parse an XML declaration header
7411 *
7412 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7413 */
7414
7415void
7416xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7417 xmlChar *version;
7418
7419 /*
7420 * We know that '<?xml' is here.
7421 */
7422 SKIP(5);
7423
7424 if (!IS_BLANK(RAW)) {
7425 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7427 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7428 ctxt->wellFormed = 0;
7429 ctxt->disableSAX = 1;
7430 }
7431 SKIP_BLANKS;
7432
7433 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007434 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007435 */
7436 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007437 if (version == NULL) {
7438 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7439 ctxt->sax->error(ctxt->userData,
7440 "Malformed declaration expecting version\n");
7441 ctxt->wellFormed = 0;
7442 ctxt->disableSAX = 1;
7443 } else {
7444 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7445 /*
7446 * TODO: Blueberry should be detected here
7447 */
7448 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7449 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7450 version);
7451 }
7452 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007453 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007454 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007455 }
Owen Taylor3473f882001-02-23 17:55:21 +00007456
7457 /*
7458 * We may have the encoding declaration
7459 */
7460 if (!IS_BLANK(RAW)) {
7461 if ((RAW == '?') && (NXT(1) == '>')) {
7462 SKIP(2);
7463 return;
7464 }
7465 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7467 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7468 ctxt->wellFormed = 0;
7469 ctxt->disableSAX = 1;
7470 }
7471 xmlParseEncodingDecl(ctxt);
7472 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7473 /*
7474 * The XML REC instructs us to stop parsing right here
7475 */
7476 return;
7477 }
7478
7479 /*
7480 * We may have the standalone status.
7481 */
7482 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7483 if ((RAW == '?') && (NXT(1) == '>')) {
7484 SKIP(2);
7485 return;
7486 }
7487 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7488 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7489 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7490 ctxt->wellFormed = 0;
7491 ctxt->disableSAX = 1;
7492 }
7493 SKIP_BLANKS;
7494 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7495
7496 SKIP_BLANKS;
7497 if ((RAW == '?') && (NXT(1) == '>')) {
7498 SKIP(2);
7499 } else if (RAW == '>') {
7500 /* Deprecated old WD ... */
7501 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7503 ctxt->sax->error(ctxt->userData,
7504 "XML declaration must end-up with '?>'\n");
7505 ctxt->wellFormed = 0;
7506 ctxt->disableSAX = 1;
7507 NEXT;
7508 } else {
7509 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7511 ctxt->sax->error(ctxt->userData,
7512 "parsing XML declaration: '?>' expected\n");
7513 ctxt->wellFormed = 0;
7514 ctxt->disableSAX = 1;
7515 MOVETO_ENDTAG(CUR_PTR);
7516 NEXT;
7517 }
7518}
7519
7520/**
7521 * xmlParseMisc:
7522 * @ctxt: an XML parser context
7523 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007524 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007525 *
7526 * [27] Misc ::= Comment | PI | S
7527 */
7528
7529void
7530xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007531 while (((RAW == '<') && (NXT(1) == '?')) ||
7532 ((RAW == '<') && (NXT(1) == '!') &&
7533 (NXT(2) == '-') && (NXT(3) == '-')) ||
7534 IS_BLANK(CUR)) {
7535 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007536 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007537 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007538 NEXT;
7539 } else
7540 xmlParseComment(ctxt);
7541 }
7542}
7543
7544/**
7545 * xmlParseDocument:
7546 * @ctxt: an XML parser context
7547 *
7548 * parse an XML document (and build a tree if using the standard SAX
7549 * interface).
7550 *
7551 * [1] document ::= prolog element Misc*
7552 *
7553 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7554 *
7555 * Returns 0, -1 in case of error. the parser context is augmented
7556 * as a result of the parsing.
7557 */
7558
7559int
7560xmlParseDocument(xmlParserCtxtPtr ctxt) {
7561 xmlChar start[4];
7562 xmlCharEncoding enc;
7563
7564 xmlInitParser();
7565
7566 GROW;
7567
7568 /*
7569 * SAX: beginning of the document processing.
7570 */
7571 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7572 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7573
Daniel Veillard50f34372001-08-03 12:06:36 +00007574 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007575 /*
7576 * Get the 4 first bytes and decode the charset
7577 * if enc != XML_CHAR_ENCODING_NONE
7578 * plug some encoding conversion routines.
7579 */
7580 start[0] = RAW;
7581 start[1] = NXT(1);
7582 start[2] = NXT(2);
7583 start[3] = NXT(3);
7584 enc = xmlDetectCharEncoding(start, 4);
7585 if (enc != XML_CHAR_ENCODING_NONE) {
7586 xmlSwitchEncoding(ctxt, enc);
7587 }
Owen Taylor3473f882001-02-23 17:55:21 +00007588 }
7589
7590
7591 if (CUR == 0) {
7592 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7594 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7595 ctxt->wellFormed = 0;
7596 ctxt->disableSAX = 1;
7597 }
7598
7599 /*
7600 * Check for the XMLDecl in the Prolog.
7601 */
7602 GROW;
7603 if ((RAW == '<') && (NXT(1) == '?') &&
7604 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7605 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7606
7607 /*
7608 * Note that we will switch encoding on the fly.
7609 */
7610 xmlParseXMLDecl(ctxt);
7611 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7612 /*
7613 * The XML REC instructs us to stop parsing right here
7614 */
7615 return(-1);
7616 }
7617 ctxt->standalone = ctxt->input->standalone;
7618 SKIP_BLANKS;
7619 } else {
7620 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7621 }
7622 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7623 ctxt->sax->startDocument(ctxt->userData);
7624
7625 /*
7626 * The Misc part of the Prolog
7627 */
7628 GROW;
7629 xmlParseMisc(ctxt);
7630
7631 /*
7632 * Then possibly doc type declaration(s) and more Misc
7633 * (doctypedecl Misc*)?
7634 */
7635 GROW;
7636 if ((RAW == '<') && (NXT(1) == '!') &&
7637 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7638 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7639 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7640 (NXT(8) == 'E')) {
7641
7642 ctxt->inSubset = 1;
7643 xmlParseDocTypeDecl(ctxt);
7644 if (RAW == '[') {
7645 ctxt->instate = XML_PARSER_DTD;
7646 xmlParseInternalSubset(ctxt);
7647 }
7648
7649 /*
7650 * Create and update the external subset.
7651 */
7652 ctxt->inSubset = 2;
7653 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7654 (!ctxt->disableSAX))
7655 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7656 ctxt->extSubSystem, ctxt->extSubURI);
7657 ctxt->inSubset = 0;
7658
7659
7660 ctxt->instate = XML_PARSER_PROLOG;
7661 xmlParseMisc(ctxt);
7662 }
7663
7664 /*
7665 * Time to start parsing the tree itself
7666 */
7667 GROW;
7668 if (RAW != '<') {
7669 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7671 ctxt->sax->error(ctxt->userData,
7672 "Start tag expected, '<' not found\n");
7673 ctxt->wellFormed = 0;
7674 ctxt->disableSAX = 1;
7675 ctxt->instate = XML_PARSER_EOF;
7676 } else {
7677 ctxt->instate = XML_PARSER_CONTENT;
7678 xmlParseElement(ctxt);
7679 ctxt->instate = XML_PARSER_EPILOG;
7680
7681
7682 /*
7683 * The Misc part at the end
7684 */
7685 xmlParseMisc(ctxt);
7686
Daniel Veillard561b7f82002-03-20 21:55:57 +00007687 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007688 ctxt->errNo = XML_ERR_DOCUMENT_END;
7689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7690 ctxt->sax->error(ctxt->userData,
7691 "Extra content at the end of the document\n");
7692 ctxt->wellFormed = 0;
7693 ctxt->disableSAX = 1;
7694 }
7695 ctxt->instate = XML_PARSER_EOF;
7696 }
7697
7698 /*
7699 * SAX: end of the document processing.
7700 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007701 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007702 ctxt->sax->endDocument(ctxt->userData);
7703
Daniel Veillard5997aca2002-03-18 18:36:20 +00007704 /*
7705 * Remove locally kept entity definitions if the tree was not built
7706 */
7707 if ((ctxt->myDoc != NULL) &&
7708 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7709 xmlFreeDoc(ctxt->myDoc);
7710 ctxt->myDoc = NULL;
7711 }
7712
Daniel Veillardc7612992002-02-17 22:47:37 +00007713 if (! ctxt->wellFormed) {
7714 ctxt->valid = 0;
7715 return(-1);
7716 }
Owen Taylor3473f882001-02-23 17:55:21 +00007717 return(0);
7718}
7719
7720/**
7721 * xmlParseExtParsedEnt:
7722 * @ctxt: an XML parser context
7723 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007724 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007725 * An external general parsed entity is well-formed if it matches the
7726 * production labeled extParsedEnt.
7727 *
7728 * [78] extParsedEnt ::= TextDecl? content
7729 *
7730 * Returns 0, -1 in case of error. the parser context is augmented
7731 * as a result of the parsing.
7732 */
7733
7734int
7735xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7736 xmlChar start[4];
7737 xmlCharEncoding enc;
7738
7739 xmlDefaultSAXHandlerInit();
7740
7741 GROW;
7742
7743 /*
7744 * SAX: beginning of the document processing.
7745 */
7746 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7747 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7748
7749 /*
7750 * Get the 4 first bytes and decode the charset
7751 * if enc != XML_CHAR_ENCODING_NONE
7752 * plug some encoding conversion routines.
7753 */
7754 start[0] = RAW;
7755 start[1] = NXT(1);
7756 start[2] = NXT(2);
7757 start[3] = NXT(3);
7758 enc = xmlDetectCharEncoding(start, 4);
7759 if (enc != XML_CHAR_ENCODING_NONE) {
7760 xmlSwitchEncoding(ctxt, enc);
7761 }
7762
7763
7764 if (CUR == 0) {
7765 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7767 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7768 ctxt->wellFormed = 0;
7769 ctxt->disableSAX = 1;
7770 }
7771
7772 /*
7773 * Check for the XMLDecl in the Prolog.
7774 */
7775 GROW;
7776 if ((RAW == '<') && (NXT(1) == '?') &&
7777 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7778 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7779
7780 /*
7781 * Note that we will switch encoding on the fly.
7782 */
7783 xmlParseXMLDecl(ctxt);
7784 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7785 /*
7786 * The XML REC instructs us to stop parsing right here
7787 */
7788 return(-1);
7789 }
7790 SKIP_BLANKS;
7791 } else {
7792 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7793 }
7794 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7795 ctxt->sax->startDocument(ctxt->userData);
7796
7797 /*
7798 * Doing validity checking on chunk doesn't make sense
7799 */
7800 ctxt->instate = XML_PARSER_CONTENT;
7801 ctxt->validate = 0;
7802 ctxt->loadsubset = 0;
7803 ctxt->depth = 0;
7804
7805 xmlParseContent(ctxt);
7806
7807 if ((RAW == '<') && (NXT(1) == '/')) {
7808 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7810 ctxt->sax->error(ctxt->userData,
7811 "chunk is not well balanced\n");
7812 ctxt->wellFormed = 0;
7813 ctxt->disableSAX = 1;
7814 } else if (RAW != 0) {
7815 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7817 ctxt->sax->error(ctxt->userData,
7818 "extra content at the end of well balanced chunk\n");
7819 ctxt->wellFormed = 0;
7820 ctxt->disableSAX = 1;
7821 }
7822
7823 /*
7824 * SAX: end of the document processing.
7825 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007826 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007827 ctxt->sax->endDocument(ctxt->userData);
7828
7829 if (! ctxt->wellFormed) return(-1);
7830 return(0);
7831}
7832
7833/************************************************************************
7834 * *
7835 * Progressive parsing interfaces *
7836 * *
7837 ************************************************************************/
7838
7839/**
7840 * xmlParseLookupSequence:
7841 * @ctxt: an XML parser context
7842 * @first: the first char to lookup
7843 * @next: the next char to lookup or zero
7844 * @third: the next char to lookup or zero
7845 *
7846 * Try to find if a sequence (first, next, third) or just (first next) or
7847 * (first) is available in the input stream.
7848 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7849 * to avoid rescanning sequences of bytes, it DOES change the state of the
7850 * parser, do not use liberally.
7851 *
7852 * Returns the index to the current parsing point if the full sequence
7853 * is available, -1 otherwise.
7854 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007855static int
Owen Taylor3473f882001-02-23 17:55:21 +00007856xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7857 xmlChar next, xmlChar third) {
7858 int base, len;
7859 xmlParserInputPtr in;
7860 const xmlChar *buf;
7861
7862 in = ctxt->input;
7863 if (in == NULL) return(-1);
7864 base = in->cur - in->base;
7865 if (base < 0) return(-1);
7866 if (ctxt->checkIndex > base)
7867 base = ctxt->checkIndex;
7868 if (in->buf == NULL) {
7869 buf = in->base;
7870 len = in->length;
7871 } else {
7872 buf = in->buf->buffer->content;
7873 len = in->buf->buffer->use;
7874 }
7875 /* take into account the sequence length */
7876 if (third) len -= 2;
7877 else if (next) len --;
7878 for (;base < len;base++) {
7879 if (buf[base] == first) {
7880 if (third != 0) {
7881 if ((buf[base + 1] != next) ||
7882 (buf[base + 2] != third)) continue;
7883 } else if (next != 0) {
7884 if (buf[base + 1] != next) continue;
7885 }
7886 ctxt->checkIndex = 0;
7887#ifdef DEBUG_PUSH
7888 if (next == 0)
7889 xmlGenericError(xmlGenericErrorContext,
7890 "PP: lookup '%c' found at %d\n",
7891 first, base);
7892 else if (third == 0)
7893 xmlGenericError(xmlGenericErrorContext,
7894 "PP: lookup '%c%c' found at %d\n",
7895 first, next, base);
7896 else
7897 xmlGenericError(xmlGenericErrorContext,
7898 "PP: lookup '%c%c%c' found at %d\n",
7899 first, next, third, base);
7900#endif
7901 return(base - (in->cur - in->base));
7902 }
7903 }
7904 ctxt->checkIndex = base;
7905#ifdef DEBUG_PUSH
7906 if (next == 0)
7907 xmlGenericError(xmlGenericErrorContext,
7908 "PP: lookup '%c' failed\n", first);
7909 else if (third == 0)
7910 xmlGenericError(xmlGenericErrorContext,
7911 "PP: lookup '%c%c' failed\n", first, next);
7912 else
7913 xmlGenericError(xmlGenericErrorContext,
7914 "PP: lookup '%c%c%c' failed\n", first, next, third);
7915#endif
7916 return(-1);
7917}
7918
7919/**
7920 * xmlParseTryOrFinish:
7921 * @ctxt: an XML parser context
7922 * @terminate: last chunk indicator
7923 *
7924 * Try to progress on parsing
7925 *
7926 * Returns zero if no parsing was possible
7927 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007928static int
Owen Taylor3473f882001-02-23 17:55:21 +00007929xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7930 int ret = 0;
7931 int avail;
7932 xmlChar cur, next;
7933
7934#ifdef DEBUG_PUSH
7935 switch (ctxt->instate) {
7936 case XML_PARSER_EOF:
7937 xmlGenericError(xmlGenericErrorContext,
7938 "PP: try EOF\n"); break;
7939 case XML_PARSER_START:
7940 xmlGenericError(xmlGenericErrorContext,
7941 "PP: try START\n"); break;
7942 case XML_PARSER_MISC:
7943 xmlGenericError(xmlGenericErrorContext,
7944 "PP: try MISC\n");break;
7945 case XML_PARSER_COMMENT:
7946 xmlGenericError(xmlGenericErrorContext,
7947 "PP: try COMMENT\n");break;
7948 case XML_PARSER_PROLOG:
7949 xmlGenericError(xmlGenericErrorContext,
7950 "PP: try PROLOG\n");break;
7951 case XML_PARSER_START_TAG:
7952 xmlGenericError(xmlGenericErrorContext,
7953 "PP: try START_TAG\n");break;
7954 case XML_PARSER_CONTENT:
7955 xmlGenericError(xmlGenericErrorContext,
7956 "PP: try CONTENT\n");break;
7957 case XML_PARSER_CDATA_SECTION:
7958 xmlGenericError(xmlGenericErrorContext,
7959 "PP: try CDATA_SECTION\n");break;
7960 case XML_PARSER_END_TAG:
7961 xmlGenericError(xmlGenericErrorContext,
7962 "PP: try END_TAG\n");break;
7963 case XML_PARSER_ENTITY_DECL:
7964 xmlGenericError(xmlGenericErrorContext,
7965 "PP: try ENTITY_DECL\n");break;
7966 case XML_PARSER_ENTITY_VALUE:
7967 xmlGenericError(xmlGenericErrorContext,
7968 "PP: try ENTITY_VALUE\n");break;
7969 case XML_PARSER_ATTRIBUTE_VALUE:
7970 xmlGenericError(xmlGenericErrorContext,
7971 "PP: try ATTRIBUTE_VALUE\n");break;
7972 case XML_PARSER_DTD:
7973 xmlGenericError(xmlGenericErrorContext,
7974 "PP: try DTD\n");break;
7975 case XML_PARSER_EPILOG:
7976 xmlGenericError(xmlGenericErrorContext,
7977 "PP: try EPILOG\n");break;
7978 case XML_PARSER_PI:
7979 xmlGenericError(xmlGenericErrorContext,
7980 "PP: try PI\n");break;
7981 case XML_PARSER_IGNORE:
7982 xmlGenericError(xmlGenericErrorContext,
7983 "PP: try IGNORE\n");break;
7984 }
7985#endif
7986
7987 while (1) {
7988 /*
7989 * Pop-up of finished entities.
7990 */
7991 while ((RAW == 0) && (ctxt->inputNr > 1))
7992 xmlPopInput(ctxt);
7993
7994 if (ctxt->input ==NULL) break;
7995 if (ctxt->input->buf == NULL)
7996 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00007997 else {
7998 /*
7999 * If we are operating on converted input, try to flush
8000 * remainng chars to avoid them stalling in the non-converted
8001 * buffer.
8002 */
8003 if ((ctxt->input->buf->raw != NULL) &&
8004 (ctxt->input->buf->raw->use > 0)) {
8005 int base = ctxt->input->base -
8006 ctxt->input->buf->buffer->content;
8007 int current = ctxt->input->cur - ctxt->input->base;
8008
8009 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8010 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8011 ctxt->input->cur = ctxt->input->base + current;
8012 ctxt->input->end =
8013 &ctxt->input->buf->buffer->content[
8014 ctxt->input->buf->buffer->use];
8015 }
8016 avail = ctxt->input->buf->buffer->use -
8017 (ctxt->input->cur - ctxt->input->base);
8018 }
Owen Taylor3473f882001-02-23 17:55:21 +00008019 if (avail < 1)
8020 goto done;
8021 switch (ctxt->instate) {
8022 case XML_PARSER_EOF:
8023 /*
8024 * Document parsing is done !
8025 */
8026 goto done;
8027 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008028 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8029 xmlChar start[4];
8030 xmlCharEncoding enc;
8031
8032 /*
8033 * Very first chars read from the document flow.
8034 */
8035 if (avail < 4)
8036 goto done;
8037
8038 /*
8039 * Get the 4 first bytes and decode the charset
8040 * if enc != XML_CHAR_ENCODING_NONE
8041 * plug some encoding conversion routines.
8042 */
8043 start[0] = RAW;
8044 start[1] = NXT(1);
8045 start[2] = NXT(2);
8046 start[3] = NXT(3);
8047 enc = xmlDetectCharEncoding(start, 4);
8048 if (enc != XML_CHAR_ENCODING_NONE) {
8049 xmlSwitchEncoding(ctxt, enc);
8050 }
8051 break;
8052 }
Owen Taylor3473f882001-02-23 17:55:21 +00008053
8054 cur = ctxt->input->cur[0];
8055 next = ctxt->input->cur[1];
8056 if (cur == 0) {
8057 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8058 ctxt->sax->setDocumentLocator(ctxt->userData,
8059 &xmlDefaultSAXLocator);
8060 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8062 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8063 ctxt->wellFormed = 0;
8064 ctxt->disableSAX = 1;
8065 ctxt->instate = XML_PARSER_EOF;
8066#ifdef DEBUG_PUSH
8067 xmlGenericError(xmlGenericErrorContext,
8068 "PP: entering EOF\n");
8069#endif
8070 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8071 ctxt->sax->endDocument(ctxt->userData);
8072 goto done;
8073 }
8074 if ((cur == '<') && (next == '?')) {
8075 /* PI or XML decl */
8076 if (avail < 5) return(ret);
8077 if ((!terminate) &&
8078 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8079 return(ret);
8080 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8081 ctxt->sax->setDocumentLocator(ctxt->userData,
8082 &xmlDefaultSAXLocator);
8083 if ((ctxt->input->cur[2] == 'x') &&
8084 (ctxt->input->cur[3] == 'm') &&
8085 (ctxt->input->cur[4] == 'l') &&
8086 (IS_BLANK(ctxt->input->cur[5]))) {
8087 ret += 5;
8088#ifdef DEBUG_PUSH
8089 xmlGenericError(xmlGenericErrorContext,
8090 "PP: Parsing XML Decl\n");
8091#endif
8092 xmlParseXMLDecl(ctxt);
8093 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8094 /*
8095 * The XML REC instructs us to stop parsing right
8096 * here
8097 */
8098 ctxt->instate = XML_PARSER_EOF;
8099 return(0);
8100 }
8101 ctxt->standalone = ctxt->input->standalone;
8102 if ((ctxt->encoding == NULL) &&
8103 (ctxt->input->encoding != NULL))
8104 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8105 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8106 (!ctxt->disableSAX))
8107 ctxt->sax->startDocument(ctxt->userData);
8108 ctxt->instate = XML_PARSER_MISC;
8109#ifdef DEBUG_PUSH
8110 xmlGenericError(xmlGenericErrorContext,
8111 "PP: entering MISC\n");
8112#endif
8113 } else {
8114 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8115 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8116 (!ctxt->disableSAX))
8117 ctxt->sax->startDocument(ctxt->userData);
8118 ctxt->instate = XML_PARSER_MISC;
8119#ifdef DEBUG_PUSH
8120 xmlGenericError(xmlGenericErrorContext,
8121 "PP: entering MISC\n");
8122#endif
8123 }
8124 } else {
8125 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8126 ctxt->sax->setDocumentLocator(ctxt->userData,
8127 &xmlDefaultSAXLocator);
8128 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8129 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8130 (!ctxt->disableSAX))
8131 ctxt->sax->startDocument(ctxt->userData);
8132 ctxt->instate = XML_PARSER_MISC;
8133#ifdef DEBUG_PUSH
8134 xmlGenericError(xmlGenericErrorContext,
8135 "PP: entering MISC\n");
8136#endif
8137 }
8138 break;
8139 case XML_PARSER_MISC:
8140 SKIP_BLANKS;
8141 if (ctxt->input->buf == NULL)
8142 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8143 else
8144 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8145 if (avail < 2)
8146 goto done;
8147 cur = ctxt->input->cur[0];
8148 next = ctxt->input->cur[1];
8149 if ((cur == '<') && (next == '?')) {
8150 if ((!terminate) &&
8151 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8152 goto done;
8153#ifdef DEBUG_PUSH
8154 xmlGenericError(xmlGenericErrorContext,
8155 "PP: Parsing PI\n");
8156#endif
8157 xmlParsePI(ctxt);
8158 } else if ((cur == '<') && (next == '!') &&
8159 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8160 if ((!terminate) &&
8161 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8162 goto done;
8163#ifdef DEBUG_PUSH
8164 xmlGenericError(xmlGenericErrorContext,
8165 "PP: Parsing Comment\n");
8166#endif
8167 xmlParseComment(ctxt);
8168 ctxt->instate = XML_PARSER_MISC;
8169 } else if ((cur == '<') && (next == '!') &&
8170 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8171 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8172 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8173 (ctxt->input->cur[8] == 'E')) {
8174 if ((!terminate) &&
8175 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8176 goto done;
8177#ifdef DEBUG_PUSH
8178 xmlGenericError(xmlGenericErrorContext,
8179 "PP: Parsing internal subset\n");
8180#endif
8181 ctxt->inSubset = 1;
8182 xmlParseDocTypeDecl(ctxt);
8183 if (RAW == '[') {
8184 ctxt->instate = XML_PARSER_DTD;
8185#ifdef DEBUG_PUSH
8186 xmlGenericError(xmlGenericErrorContext,
8187 "PP: entering DTD\n");
8188#endif
8189 } else {
8190 /*
8191 * Create and update the external subset.
8192 */
8193 ctxt->inSubset = 2;
8194 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8195 (ctxt->sax->externalSubset != NULL))
8196 ctxt->sax->externalSubset(ctxt->userData,
8197 ctxt->intSubName, ctxt->extSubSystem,
8198 ctxt->extSubURI);
8199 ctxt->inSubset = 0;
8200 ctxt->instate = XML_PARSER_PROLOG;
8201#ifdef DEBUG_PUSH
8202 xmlGenericError(xmlGenericErrorContext,
8203 "PP: entering PROLOG\n");
8204#endif
8205 }
8206 } else if ((cur == '<') && (next == '!') &&
8207 (avail < 9)) {
8208 goto done;
8209 } else {
8210 ctxt->instate = XML_PARSER_START_TAG;
8211#ifdef DEBUG_PUSH
8212 xmlGenericError(xmlGenericErrorContext,
8213 "PP: entering START_TAG\n");
8214#endif
8215 }
8216 break;
8217 case XML_PARSER_IGNORE:
8218 xmlGenericError(xmlGenericErrorContext,
8219 "PP: internal error, state == IGNORE");
8220 ctxt->instate = XML_PARSER_DTD;
8221#ifdef DEBUG_PUSH
8222 xmlGenericError(xmlGenericErrorContext,
8223 "PP: entering DTD\n");
8224#endif
8225 break;
8226 case XML_PARSER_PROLOG:
8227 SKIP_BLANKS;
8228 if (ctxt->input->buf == NULL)
8229 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8230 else
8231 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8232 if (avail < 2)
8233 goto done;
8234 cur = ctxt->input->cur[0];
8235 next = ctxt->input->cur[1];
8236 if ((cur == '<') && (next == '?')) {
8237 if ((!terminate) &&
8238 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8239 goto done;
8240#ifdef DEBUG_PUSH
8241 xmlGenericError(xmlGenericErrorContext,
8242 "PP: Parsing PI\n");
8243#endif
8244 xmlParsePI(ctxt);
8245 } else if ((cur == '<') && (next == '!') &&
8246 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8247 if ((!terminate) &&
8248 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8249 goto done;
8250#ifdef DEBUG_PUSH
8251 xmlGenericError(xmlGenericErrorContext,
8252 "PP: Parsing Comment\n");
8253#endif
8254 xmlParseComment(ctxt);
8255 ctxt->instate = XML_PARSER_PROLOG;
8256 } else if ((cur == '<') && (next == '!') &&
8257 (avail < 4)) {
8258 goto done;
8259 } else {
8260 ctxt->instate = XML_PARSER_START_TAG;
8261#ifdef DEBUG_PUSH
8262 xmlGenericError(xmlGenericErrorContext,
8263 "PP: entering START_TAG\n");
8264#endif
8265 }
8266 break;
8267 case XML_PARSER_EPILOG:
8268 SKIP_BLANKS;
8269 if (ctxt->input->buf == NULL)
8270 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8271 else
8272 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8273 if (avail < 2)
8274 goto done;
8275 cur = ctxt->input->cur[0];
8276 next = ctxt->input->cur[1];
8277 if ((cur == '<') && (next == '?')) {
8278 if ((!terminate) &&
8279 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8280 goto done;
8281#ifdef DEBUG_PUSH
8282 xmlGenericError(xmlGenericErrorContext,
8283 "PP: Parsing PI\n");
8284#endif
8285 xmlParsePI(ctxt);
8286 ctxt->instate = XML_PARSER_EPILOG;
8287 } else if ((cur == '<') && (next == '!') &&
8288 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8289 if ((!terminate) &&
8290 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8291 goto done;
8292#ifdef DEBUG_PUSH
8293 xmlGenericError(xmlGenericErrorContext,
8294 "PP: Parsing Comment\n");
8295#endif
8296 xmlParseComment(ctxt);
8297 ctxt->instate = XML_PARSER_EPILOG;
8298 } else if ((cur == '<') && (next == '!') &&
8299 (avail < 4)) {
8300 goto done;
8301 } else {
8302 ctxt->errNo = XML_ERR_DOCUMENT_END;
8303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8304 ctxt->sax->error(ctxt->userData,
8305 "Extra content at the end of the document\n");
8306 ctxt->wellFormed = 0;
8307 ctxt->disableSAX = 1;
8308 ctxt->instate = XML_PARSER_EOF;
8309#ifdef DEBUG_PUSH
8310 xmlGenericError(xmlGenericErrorContext,
8311 "PP: entering EOF\n");
8312#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008313 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008314 ctxt->sax->endDocument(ctxt->userData);
8315 goto done;
8316 }
8317 break;
8318 case XML_PARSER_START_TAG: {
8319 xmlChar *name, *oldname;
8320
8321 if ((avail < 2) && (ctxt->inputNr == 1))
8322 goto done;
8323 cur = ctxt->input->cur[0];
8324 if (cur != '<') {
8325 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8327 ctxt->sax->error(ctxt->userData,
8328 "Start tag expect, '<' not found\n");
8329 ctxt->wellFormed = 0;
8330 ctxt->disableSAX = 1;
8331 ctxt->instate = XML_PARSER_EOF;
8332#ifdef DEBUG_PUSH
8333 xmlGenericError(xmlGenericErrorContext,
8334 "PP: entering EOF\n");
8335#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008336 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008337 ctxt->sax->endDocument(ctxt->userData);
8338 goto done;
8339 }
8340 if ((!terminate) &&
8341 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8342 goto done;
8343 if (ctxt->spaceNr == 0)
8344 spacePush(ctxt, -1);
8345 else
8346 spacePush(ctxt, *ctxt->space);
8347 name = xmlParseStartTag(ctxt);
8348 if (name == NULL) {
8349 spacePop(ctxt);
8350 ctxt->instate = XML_PARSER_EOF;
8351#ifdef DEBUG_PUSH
8352 xmlGenericError(xmlGenericErrorContext,
8353 "PP: entering EOF\n");
8354#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008355 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008356 ctxt->sax->endDocument(ctxt->userData);
8357 goto done;
8358 }
8359 namePush(ctxt, xmlStrdup(name));
8360
8361 /*
8362 * [ VC: Root Element Type ]
8363 * The Name in the document type declaration must match
8364 * the element type of the root element.
8365 */
8366 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8367 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8368 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8369
8370 /*
8371 * Check for an Empty Element.
8372 */
8373 if ((RAW == '/') && (NXT(1) == '>')) {
8374 SKIP(2);
8375 if ((ctxt->sax != NULL) &&
8376 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8377 ctxt->sax->endElement(ctxt->userData, name);
8378 xmlFree(name);
8379 oldname = namePop(ctxt);
8380 spacePop(ctxt);
8381 if (oldname != NULL) {
8382#ifdef DEBUG_STACK
8383 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8384#endif
8385 xmlFree(oldname);
8386 }
8387 if (ctxt->name == NULL) {
8388 ctxt->instate = XML_PARSER_EPILOG;
8389#ifdef DEBUG_PUSH
8390 xmlGenericError(xmlGenericErrorContext,
8391 "PP: entering EPILOG\n");
8392#endif
8393 } else {
8394 ctxt->instate = XML_PARSER_CONTENT;
8395#ifdef DEBUG_PUSH
8396 xmlGenericError(xmlGenericErrorContext,
8397 "PP: entering CONTENT\n");
8398#endif
8399 }
8400 break;
8401 }
8402 if (RAW == '>') {
8403 NEXT;
8404 } else {
8405 ctxt->errNo = XML_ERR_GT_REQUIRED;
8406 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8407 ctxt->sax->error(ctxt->userData,
8408 "Couldn't find end of Start Tag %s\n",
8409 name);
8410 ctxt->wellFormed = 0;
8411 ctxt->disableSAX = 1;
8412
8413 /*
8414 * end of parsing of this node.
8415 */
8416 nodePop(ctxt);
8417 oldname = namePop(ctxt);
8418 spacePop(ctxt);
8419 if (oldname != NULL) {
8420#ifdef DEBUG_STACK
8421 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8422#endif
8423 xmlFree(oldname);
8424 }
8425 }
8426 xmlFree(name);
8427 ctxt->instate = XML_PARSER_CONTENT;
8428#ifdef DEBUG_PUSH
8429 xmlGenericError(xmlGenericErrorContext,
8430 "PP: entering CONTENT\n");
8431#endif
8432 break;
8433 }
8434 case XML_PARSER_CONTENT: {
8435 const xmlChar *test;
8436 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008437 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008438
8439 /*
8440 * Handle preparsed entities and charRef
8441 */
8442 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008443 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008444
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008445 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008446 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8447 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008448 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008449 ctxt->token = 0;
8450 }
8451 if ((avail < 2) && (ctxt->inputNr == 1))
8452 goto done;
8453 cur = ctxt->input->cur[0];
8454 next = ctxt->input->cur[1];
8455
8456 test = CUR_PTR;
8457 cons = ctxt->input->consumed;
8458 tok = ctxt->token;
8459 if ((cur == '<') && (next == '?')) {
8460 if ((!terminate) &&
8461 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8462 goto done;
8463#ifdef DEBUG_PUSH
8464 xmlGenericError(xmlGenericErrorContext,
8465 "PP: Parsing PI\n");
8466#endif
8467 xmlParsePI(ctxt);
8468 } else if ((cur == '<') && (next == '!') &&
8469 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8470 if ((!terminate) &&
8471 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8472 goto done;
8473#ifdef DEBUG_PUSH
8474 xmlGenericError(xmlGenericErrorContext,
8475 "PP: Parsing Comment\n");
8476#endif
8477 xmlParseComment(ctxt);
8478 ctxt->instate = XML_PARSER_CONTENT;
8479 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8480 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8481 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8482 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8483 (ctxt->input->cur[8] == '[')) {
8484 SKIP(9);
8485 ctxt->instate = XML_PARSER_CDATA_SECTION;
8486#ifdef DEBUG_PUSH
8487 xmlGenericError(xmlGenericErrorContext,
8488 "PP: entering CDATA_SECTION\n");
8489#endif
8490 break;
8491 } else if ((cur == '<') && (next == '!') &&
8492 (avail < 9)) {
8493 goto done;
8494 } else if ((cur == '<') && (next == '/')) {
8495 ctxt->instate = XML_PARSER_END_TAG;
8496#ifdef DEBUG_PUSH
8497 xmlGenericError(xmlGenericErrorContext,
8498 "PP: entering END_TAG\n");
8499#endif
8500 break;
8501 } else if (cur == '<') {
8502 ctxt->instate = XML_PARSER_START_TAG;
8503#ifdef DEBUG_PUSH
8504 xmlGenericError(xmlGenericErrorContext,
8505 "PP: entering START_TAG\n");
8506#endif
8507 break;
8508 } else if (cur == '&') {
8509 if ((!terminate) &&
8510 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8511 goto done;
8512#ifdef DEBUG_PUSH
8513 xmlGenericError(xmlGenericErrorContext,
8514 "PP: Parsing Reference\n");
8515#endif
8516 xmlParseReference(ctxt);
8517 } else {
8518 /* TODO Avoid the extra copy, handle directly !!! */
8519 /*
8520 * Goal of the following test is:
8521 * - minimize calls to the SAX 'character' callback
8522 * when they are mergeable
8523 * - handle an problem for isBlank when we only parse
8524 * a sequence of blank chars and the next one is
8525 * not available to check against '<' presence.
8526 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008527 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008528 * of the parser.
8529 */
8530 if ((ctxt->inputNr == 1) &&
8531 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8532 if ((!terminate) &&
8533 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8534 goto done;
8535 }
8536 ctxt->checkIndex = 0;
8537#ifdef DEBUG_PUSH
8538 xmlGenericError(xmlGenericErrorContext,
8539 "PP: Parsing char data\n");
8540#endif
8541 xmlParseCharData(ctxt, 0);
8542 }
8543 /*
8544 * Pop-up of finished entities.
8545 */
8546 while ((RAW == 0) && (ctxt->inputNr > 1))
8547 xmlPopInput(ctxt);
8548 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8549 (tok == ctxt->token)) {
8550 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8552 ctxt->sax->error(ctxt->userData,
8553 "detected an error in element content\n");
8554 ctxt->wellFormed = 0;
8555 ctxt->disableSAX = 1;
8556 ctxt->instate = XML_PARSER_EOF;
8557 break;
8558 }
8559 break;
8560 }
8561 case XML_PARSER_CDATA_SECTION: {
8562 /*
8563 * The Push mode need to have the SAX callback for
8564 * cdataBlock merge back contiguous callbacks.
8565 */
8566 int base;
8567
8568 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8569 if (base < 0) {
8570 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8571 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8572 if (ctxt->sax->cdataBlock != NULL)
8573 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8574 XML_PARSER_BIG_BUFFER_SIZE);
8575 }
8576 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8577 ctxt->checkIndex = 0;
8578 }
8579 goto done;
8580 } else {
8581 if ((ctxt->sax != NULL) && (base > 0) &&
8582 (!ctxt->disableSAX)) {
8583 if (ctxt->sax->cdataBlock != NULL)
8584 ctxt->sax->cdataBlock(ctxt->userData,
8585 ctxt->input->cur, base);
8586 }
8587 SKIP(base + 3);
8588 ctxt->checkIndex = 0;
8589 ctxt->instate = XML_PARSER_CONTENT;
8590#ifdef DEBUG_PUSH
8591 xmlGenericError(xmlGenericErrorContext,
8592 "PP: entering CONTENT\n");
8593#endif
8594 }
8595 break;
8596 }
8597 case XML_PARSER_END_TAG:
8598 if (avail < 2)
8599 goto done;
8600 if ((!terminate) &&
8601 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8602 goto done;
8603 xmlParseEndTag(ctxt);
8604 if (ctxt->name == NULL) {
8605 ctxt->instate = XML_PARSER_EPILOG;
8606#ifdef DEBUG_PUSH
8607 xmlGenericError(xmlGenericErrorContext,
8608 "PP: entering EPILOG\n");
8609#endif
8610 } else {
8611 ctxt->instate = XML_PARSER_CONTENT;
8612#ifdef DEBUG_PUSH
8613 xmlGenericError(xmlGenericErrorContext,
8614 "PP: entering CONTENT\n");
8615#endif
8616 }
8617 break;
8618 case XML_PARSER_DTD: {
8619 /*
8620 * Sorry but progressive parsing of the internal subset
8621 * is not expected to be supported. We first check that
8622 * the full content of the internal subset is available and
8623 * the parsing is launched only at that point.
8624 * Internal subset ends up with "']' S? '>'" in an unescaped
8625 * section and not in a ']]>' sequence which are conditional
8626 * sections (whoever argued to keep that crap in XML deserve
8627 * a place in hell !).
8628 */
8629 int base, i;
8630 xmlChar *buf;
8631 xmlChar quote = 0;
8632
8633 base = ctxt->input->cur - ctxt->input->base;
8634 if (base < 0) return(0);
8635 if (ctxt->checkIndex > base)
8636 base = ctxt->checkIndex;
8637 buf = ctxt->input->buf->buffer->content;
8638 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8639 base++) {
8640 if (quote != 0) {
8641 if (buf[base] == quote)
8642 quote = 0;
8643 continue;
8644 }
8645 if (buf[base] == '"') {
8646 quote = '"';
8647 continue;
8648 }
8649 if (buf[base] == '\'') {
8650 quote = '\'';
8651 continue;
8652 }
8653 if (buf[base] == ']') {
8654 if ((unsigned int) base +1 >=
8655 ctxt->input->buf->buffer->use)
8656 break;
8657 if (buf[base + 1] == ']') {
8658 /* conditional crap, skip both ']' ! */
8659 base++;
8660 continue;
8661 }
8662 for (i = 0;
8663 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8664 i++) {
8665 if (buf[base + i] == '>')
8666 goto found_end_int_subset;
8667 }
8668 break;
8669 }
8670 }
8671 /*
8672 * We didn't found the end of the Internal subset
8673 */
8674 if (quote == 0)
8675 ctxt->checkIndex = base;
8676#ifdef DEBUG_PUSH
8677 if (next == 0)
8678 xmlGenericError(xmlGenericErrorContext,
8679 "PP: lookup of int subset end filed\n");
8680#endif
8681 goto done;
8682
8683found_end_int_subset:
8684 xmlParseInternalSubset(ctxt);
8685 ctxt->inSubset = 2;
8686 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8687 (ctxt->sax->externalSubset != NULL))
8688 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8689 ctxt->extSubSystem, ctxt->extSubURI);
8690 ctxt->inSubset = 0;
8691 ctxt->instate = XML_PARSER_PROLOG;
8692 ctxt->checkIndex = 0;
8693#ifdef DEBUG_PUSH
8694 xmlGenericError(xmlGenericErrorContext,
8695 "PP: entering PROLOG\n");
8696#endif
8697 break;
8698 }
8699 case XML_PARSER_COMMENT:
8700 xmlGenericError(xmlGenericErrorContext,
8701 "PP: internal error, state == COMMENT\n");
8702 ctxt->instate = XML_PARSER_CONTENT;
8703#ifdef DEBUG_PUSH
8704 xmlGenericError(xmlGenericErrorContext,
8705 "PP: entering CONTENT\n");
8706#endif
8707 break;
8708 case XML_PARSER_PI:
8709 xmlGenericError(xmlGenericErrorContext,
8710 "PP: internal error, state == PI\n");
8711 ctxt->instate = XML_PARSER_CONTENT;
8712#ifdef DEBUG_PUSH
8713 xmlGenericError(xmlGenericErrorContext,
8714 "PP: entering CONTENT\n");
8715#endif
8716 break;
8717 case XML_PARSER_ENTITY_DECL:
8718 xmlGenericError(xmlGenericErrorContext,
8719 "PP: internal error, state == ENTITY_DECL\n");
8720 ctxt->instate = XML_PARSER_DTD;
8721#ifdef DEBUG_PUSH
8722 xmlGenericError(xmlGenericErrorContext,
8723 "PP: entering DTD\n");
8724#endif
8725 break;
8726 case XML_PARSER_ENTITY_VALUE:
8727 xmlGenericError(xmlGenericErrorContext,
8728 "PP: internal error, state == ENTITY_VALUE\n");
8729 ctxt->instate = XML_PARSER_CONTENT;
8730#ifdef DEBUG_PUSH
8731 xmlGenericError(xmlGenericErrorContext,
8732 "PP: entering DTD\n");
8733#endif
8734 break;
8735 case XML_PARSER_ATTRIBUTE_VALUE:
8736 xmlGenericError(xmlGenericErrorContext,
8737 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8738 ctxt->instate = XML_PARSER_START_TAG;
8739#ifdef DEBUG_PUSH
8740 xmlGenericError(xmlGenericErrorContext,
8741 "PP: entering START_TAG\n");
8742#endif
8743 break;
8744 case XML_PARSER_SYSTEM_LITERAL:
8745 xmlGenericError(xmlGenericErrorContext,
8746 "PP: internal error, state == SYSTEM_LITERAL\n");
8747 ctxt->instate = XML_PARSER_START_TAG;
8748#ifdef DEBUG_PUSH
8749 xmlGenericError(xmlGenericErrorContext,
8750 "PP: entering START_TAG\n");
8751#endif
8752 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008753 case XML_PARSER_PUBLIC_LITERAL:
8754 xmlGenericError(xmlGenericErrorContext,
8755 "PP: internal error, state == PUBLIC_LITERAL\n");
8756 ctxt->instate = XML_PARSER_START_TAG;
8757#ifdef DEBUG_PUSH
8758 xmlGenericError(xmlGenericErrorContext,
8759 "PP: entering START_TAG\n");
8760#endif
8761 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008762 }
8763 }
8764done:
8765#ifdef DEBUG_PUSH
8766 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8767#endif
8768 return(ret);
8769}
8770
8771/**
Owen Taylor3473f882001-02-23 17:55:21 +00008772 * xmlParseChunk:
8773 * @ctxt: an XML parser context
8774 * @chunk: an char array
8775 * @size: the size in byte of the chunk
8776 * @terminate: last chunk indicator
8777 *
8778 * Parse a Chunk of memory
8779 *
8780 * Returns zero if no error, the xmlParserErrors otherwise.
8781 */
8782int
8783xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8784 int terminate) {
8785 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8786 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8787 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8788 int cur = ctxt->input->cur - ctxt->input->base;
8789
8790 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8791 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8792 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008793 ctxt->input->end =
8794 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008795#ifdef DEBUG_PUSH
8796 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8797#endif
8798
8799 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8800 xmlParseTryOrFinish(ctxt, terminate);
8801 } else if (ctxt->instate != XML_PARSER_EOF) {
8802 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8803 xmlParserInputBufferPtr in = ctxt->input->buf;
8804 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8805 (in->raw != NULL)) {
8806 int nbchars;
8807
8808 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8809 if (nbchars < 0) {
8810 xmlGenericError(xmlGenericErrorContext,
8811 "xmlParseChunk: encoder error\n");
8812 return(XML_ERR_INVALID_ENCODING);
8813 }
8814 }
8815 }
8816 }
8817 xmlParseTryOrFinish(ctxt, terminate);
8818 if (terminate) {
8819 /*
8820 * Check for termination
8821 */
8822 if ((ctxt->instate != XML_PARSER_EOF) &&
8823 (ctxt->instate != XML_PARSER_EPILOG)) {
8824 ctxt->errNo = XML_ERR_DOCUMENT_END;
8825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8826 ctxt->sax->error(ctxt->userData,
8827 "Extra content at the end of the document\n");
8828 ctxt->wellFormed = 0;
8829 ctxt->disableSAX = 1;
8830 }
8831 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008832 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008833 ctxt->sax->endDocument(ctxt->userData);
8834 }
8835 ctxt->instate = XML_PARSER_EOF;
8836 }
8837 return((xmlParserErrors) ctxt->errNo);
8838}
8839
8840/************************************************************************
8841 * *
8842 * I/O front end functions to the parser *
8843 * *
8844 ************************************************************************/
8845
8846/**
8847 * xmlStopParser:
8848 * @ctxt: an XML parser context
8849 *
8850 * Blocks further parser processing
8851 */
8852void
8853xmlStopParser(xmlParserCtxtPtr ctxt) {
8854 ctxt->instate = XML_PARSER_EOF;
8855 if (ctxt->input != NULL)
8856 ctxt->input->cur = BAD_CAST"";
8857}
8858
8859/**
8860 * xmlCreatePushParserCtxt:
8861 * @sax: a SAX handler
8862 * @user_data: The user data returned on SAX callbacks
8863 * @chunk: a pointer to an array of chars
8864 * @size: number of chars in the array
8865 * @filename: an optional file name or URI
8866 *
8867 * Create a parser context for using the XML parser in push mode
8868 * To allow content encoding detection, @size should be >= 4
8869 * The value of @filename is used for fetching external entities
8870 * and error/warning reports.
8871 *
8872 * Returns the new parser context or NULL
8873 */
8874xmlParserCtxtPtr
8875xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8876 const char *chunk, int size, const char *filename) {
8877 xmlParserCtxtPtr ctxt;
8878 xmlParserInputPtr inputStream;
8879 xmlParserInputBufferPtr buf;
8880 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8881
8882 /*
8883 * plug some encoding conversion routines
8884 */
8885 if ((chunk != NULL) && (size >= 4))
8886 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8887
8888 buf = xmlAllocParserInputBuffer(enc);
8889 if (buf == NULL) return(NULL);
8890
8891 ctxt = xmlNewParserCtxt();
8892 if (ctxt == NULL) {
8893 xmlFree(buf);
8894 return(NULL);
8895 }
8896 if (sax != NULL) {
8897 if (ctxt->sax != &xmlDefaultSAXHandler)
8898 xmlFree(ctxt->sax);
8899 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8900 if (ctxt->sax == NULL) {
8901 xmlFree(buf);
8902 xmlFree(ctxt);
8903 return(NULL);
8904 }
8905 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8906 if (user_data != NULL)
8907 ctxt->userData = user_data;
8908 }
8909 if (filename == NULL) {
8910 ctxt->directory = NULL;
8911 } else {
8912 ctxt->directory = xmlParserGetDirectory(filename);
8913 }
8914
8915 inputStream = xmlNewInputStream(ctxt);
8916 if (inputStream == NULL) {
8917 xmlFreeParserCtxt(ctxt);
8918 return(NULL);
8919 }
8920
8921 if (filename == NULL)
8922 inputStream->filename = NULL;
8923 else
8924 inputStream->filename = xmlMemStrdup(filename);
8925 inputStream->buf = buf;
8926 inputStream->base = inputStream->buf->buffer->content;
8927 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008928 inputStream->end =
8929 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008930
8931 inputPush(ctxt, inputStream);
8932
8933 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8934 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008935 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8936 int cur = ctxt->input->cur - ctxt->input->base;
8937
Owen Taylor3473f882001-02-23 17:55:21 +00008938 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008939
8940 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8941 ctxt->input->cur = ctxt->input->base + cur;
8942 ctxt->input->end =
8943 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008944#ifdef DEBUG_PUSH
8945 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8946#endif
8947 }
8948
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008949 if (enc != XML_CHAR_ENCODING_NONE) {
8950 xmlSwitchEncoding(ctxt, enc);
8951 }
8952
Owen Taylor3473f882001-02-23 17:55:21 +00008953 return(ctxt);
8954}
8955
8956/**
8957 * xmlCreateIOParserCtxt:
8958 * @sax: a SAX handler
8959 * @user_data: The user data returned on SAX callbacks
8960 * @ioread: an I/O read function
8961 * @ioclose: an I/O close function
8962 * @ioctx: an I/O handler
8963 * @enc: the charset encoding if known
8964 *
8965 * Create a parser context for using the XML parser with an existing
8966 * I/O stream
8967 *
8968 * Returns the new parser context or NULL
8969 */
8970xmlParserCtxtPtr
8971xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8972 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8973 void *ioctx, xmlCharEncoding enc) {
8974 xmlParserCtxtPtr ctxt;
8975 xmlParserInputPtr inputStream;
8976 xmlParserInputBufferPtr buf;
8977
8978 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8979 if (buf == NULL) return(NULL);
8980
8981 ctxt = xmlNewParserCtxt();
8982 if (ctxt == NULL) {
8983 xmlFree(buf);
8984 return(NULL);
8985 }
8986 if (sax != NULL) {
8987 if (ctxt->sax != &xmlDefaultSAXHandler)
8988 xmlFree(ctxt->sax);
8989 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8990 if (ctxt->sax == NULL) {
8991 xmlFree(buf);
8992 xmlFree(ctxt);
8993 return(NULL);
8994 }
8995 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8996 if (user_data != NULL)
8997 ctxt->userData = user_data;
8998 }
8999
9000 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9001 if (inputStream == NULL) {
9002 xmlFreeParserCtxt(ctxt);
9003 return(NULL);
9004 }
9005 inputPush(ctxt, inputStream);
9006
9007 return(ctxt);
9008}
9009
9010/************************************************************************
9011 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009012 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009013 * *
9014 ************************************************************************/
9015
9016/**
9017 * xmlIOParseDTD:
9018 * @sax: the SAX handler block or NULL
9019 * @input: an Input Buffer
9020 * @enc: the charset encoding if known
9021 *
9022 * Load and parse a DTD
9023 *
9024 * Returns the resulting xmlDtdPtr or NULL in case of error.
9025 * @input will be freed at parsing end.
9026 */
9027
9028xmlDtdPtr
9029xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9030 xmlCharEncoding enc) {
9031 xmlDtdPtr ret = NULL;
9032 xmlParserCtxtPtr ctxt;
9033 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009034 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009035
9036 if (input == NULL)
9037 return(NULL);
9038
9039 ctxt = xmlNewParserCtxt();
9040 if (ctxt == NULL) {
9041 return(NULL);
9042 }
9043
9044 /*
9045 * Set-up the SAX context
9046 */
9047 if (sax != NULL) {
9048 if (ctxt->sax != NULL)
9049 xmlFree(ctxt->sax);
9050 ctxt->sax = sax;
9051 ctxt->userData = NULL;
9052 }
9053
9054 /*
9055 * generate a parser input from the I/O handler
9056 */
9057
9058 pinput = xmlNewIOInputStream(ctxt, input, enc);
9059 if (pinput == NULL) {
9060 if (sax != NULL) ctxt->sax = NULL;
9061 xmlFreeParserCtxt(ctxt);
9062 return(NULL);
9063 }
9064
9065 /*
9066 * plug some encoding conversion routines here.
9067 */
9068 xmlPushInput(ctxt, pinput);
9069
9070 pinput->filename = NULL;
9071 pinput->line = 1;
9072 pinput->col = 1;
9073 pinput->base = ctxt->input->cur;
9074 pinput->cur = ctxt->input->cur;
9075 pinput->free = NULL;
9076
9077 /*
9078 * let's parse that entity knowing it's an external subset.
9079 */
9080 ctxt->inSubset = 2;
9081 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9082 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9083 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009084
9085 if (enc == XML_CHAR_ENCODING_NONE) {
9086 /*
9087 * Get the 4 first bytes and decode the charset
9088 * if enc != XML_CHAR_ENCODING_NONE
9089 * plug some encoding conversion routines.
9090 */
9091 start[0] = RAW;
9092 start[1] = NXT(1);
9093 start[2] = NXT(2);
9094 start[3] = NXT(3);
9095 enc = xmlDetectCharEncoding(start, 4);
9096 if (enc != XML_CHAR_ENCODING_NONE) {
9097 xmlSwitchEncoding(ctxt, enc);
9098 }
9099 }
9100
Owen Taylor3473f882001-02-23 17:55:21 +00009101 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9102
9103 if (ctxt->myDoc != NULL) {
9104 if (ctxt->wellFormed) {
9105 ret = ctxt->myDoc->extSubset;
9106 ctxt->myDoc->extSubset = NULL;
9107 } else {
9108 ret = NULL;
9109 }
9110 xmlFreeDoc(ctxt->myDoc);
9111 ctxt->myDoc = NULL;
9112 }
9113 if (sax != NULL) ctxt->sax = NULL;
9114 xmlFreeParserCtxt(ctxt);
9115
9116 return(ret);
9117}
9118
9119/**
9120 * xmlSAXParseDTD:
9121 * @sax: the SAX handler block
9122 * @ExternalID: a NAME* containing the External ID of the DTD
9123 * @SystemID: a NAME* containing the URL to the DTD
9124 *
9125 * Load and parse an external subset.
9126 *
9127 * Returns the resulting xmlDtdPtr or NULL in case of error.
9128 */
9129
9130xmlDtdPtr
9131xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9132 const xmlChar *SystemID) {
9133 xmlDtdPtr ret = NULL;
9134 xmlParserCtxtPtr ctxt;
9135 xmlParserInputPtr input = NULL;
9136 xmlCharEncoding enc;
9137
9138 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9139
9140 ctxt = xmlNewParserCtxt();
9141 if (ctxt == NULL) {
9142 return(NULL);
9143 }
9144
9145 /*
9146 * Set-up the SAX context
9147 */
9148 if (sax != NULL) {
9149 if (ctxt->sax != NULL)
9150 xmlFree(ctxt->sax);
9151 ctxt->sax = sax;
9152 ctxt->userData = NULL;
9153 }
9154
9155 /*
9156 * Ask the Entity resolver to load the damn thing
9157 */
9158
9159 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9160 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9161 if (input == NULL) {
9162 if (sax != NULL) ctxt->sax = NULL;
9163 xmlFreeParserCtxt(ctxt);
9164 return(NULL);
9165 }
9166
9167 /*
9168 * plug some encoding conversion routines here.
9169 */
9170 xmlPushInput(ctxt, input);
9171 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9172 xmlSwitchEncoding(ctxt, enc);
9173
9174 if (input->filename == NULL)
9175 input->filename = (char *) xmlStrdup(SystemID);
9176 input->line = 1;
9177 input->col = 1;
9178 input->base = ctxt->input->cur;
9179 input->cur = ctxt->input->cur;
9180 input->free = NULL;
9181
9182 /*
9183 * let's parse that entity knowing it's an external subset.
9184 */
9185 ctxt->inSubset = 2;
9186 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9187 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9188 ExternalID, SystemID);
9189 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9190
9191 if (ctxt->myDoc != NULL) {
9192 if (ctxt->wellFormed) {
9193 ret = ctxt->myDoc->extSubset;
9194 ctxt->myDoc->extSubset = NULL;
9195 } else {
9196 ret = NULL;
9197 }
9198 xmlFreeDoc(ctxt->myDoc);
9199 ctxt->myDoc = NULL;
9200 }
9201 if (sax != NULL) ctxt->sax = NULL;
9202 xmlFreeParserCtxt(ctxt);
9203
9204 return(ret);
9205}
9206
9207/**
9208 * xmlParseDTD:
9209 * @ExternalID: a NAME* containing the External ID of the DTD
9210 * @SystemID: a NAME* containing the URL to the DTD
9211 *
9212 * Load and parse an external subset.
9213 *
9214 * Returns the resulting xmlDtdPtr or NULL in case of error.
9215 */
9216
9217xmlDtdPtr
9218xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9219 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9220}
9221
9222/************************************************************************
9223 * *
9224 * Front ends when parsing an Entity *
9225 * *
9226 ************************************************************************/
9227
9228/**
Owen Taylor3473f882001-02-23 17:55:21 +00009229 * xmlParseCtxtExternalEntity:
9230 * @ctx: the existing parsing context
9231 * @URL: the URL for the entity to load
9232 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009233 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009234 *
9235 * Parse an external general entity within an existing parsing context
9236 * An external general parsed entity is well-formed if it matches the
9237 * production labeled extParsedEnt.
9238 *
9239 * [78] extParsedEnt ::= TextDecl? content
9240 *
9241 * Returns 0 if the entity is well formed, -1 in case of args problem and
9242 * the parser error code otherwise
9243 */
9244
9245int
9246xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009247 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009248 xmlParserCtxtPtr ctxt;
9249 xmlDocPtr newDoc;
9250 xmlSAXHandlerPtr oldsax = NULL;
9251 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009252 xmlChar start[4];
9253 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009254
9255 if (ctx->depth > 40) {
9256 return(XML_ERR_ENTITY_LOOP);
9257 }
9258
Daniel Veillardcda96922001-08-21 10:56:31 +00009259 if (lst != NULL)
9260 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009261 if ((URL == NULL) && (ID == NULL))
9262 return(-1);
9263 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9264 return(-1);
9265
9266
9267 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9268 if (ctxt == NULL) return(-1);
9269 ctxt->userData = ctxt;
9270 oldsax = ctxt->sax;
9271 ctxt->sax = ctx->sax;
9272 newDoc = xmlNewDoc(BAD_CAST "1.0");
9273 if (newDoc == NULL) {
9274 xmlFreeParserCtxt(ctxt);
9275 return(-1);
9276 }
9277 if (ctx->myDoc != NULL) {
9278 newDoc->intSubset = ctx->myDoc->intSubset;
9279 newDoc->extSubset = ctx->myDoc->extSubset;
9280 }
9281 if (ctx->myDoc->URL != NULL) {
9282 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9283 }
9284 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9285 if (newDoc->children == NULL) {
9286 ctxt->sax = oldsax;
9287 xmlFreeParserCtxt(ctxt);
9288 newDoc->intSubset = NULL;
9289 newDoc->extSubset = NULL;
9290 xmlFreeDoc(newDoc);
9291 return(-1);
9292 }
9293 nodePush(ctxt, newDoc->children);
9294 if (ctx->myDoc == NULL) {
9295 ctxt->myDoc = newDoc;
9296 } else {
9297 ctxt->myDoc = ctx->myDoc;
9298 newDoc->children->doc = ctx->myDoc;
9299 }
9300
Daniel Veillard87a764e2001-06-20 17:41:10 +00009301 /*
9302 * Get the 4 first bytes and decode the charset
9303 * if enc != XML_CHAR_ENCODING_NONE
9304 * plug some encoding conversion routines.
9305 */
9306 GROW
9307 start[0] = RAW;
9308 start[1] = NXT(1);
9309 start[2] = NXT(2);
9310 start[3] = NXT(3);
9311 enc = xmlDetectCharEncoding(start, 4);
9312 if (enc != XML_CHAR_ENCODING_NONE) {
9313 xmlSwitchEncoding(ctxt, enc);
9314 }
9315
Owen Taylor3473f882001-02-23 17:55:21 +00009316 /*
9317 * Parse a possible text declaration first
9318 */
Owen Taylor3473f882001-02-23 17:55:21 +00009319 if ((RAW == '<') && (NXT(1) == '?') &&
9320 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9321 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9322 xmlParseTextDecl(ctxt);
9323 }
9324
9325 /*
9326 * Doing validity checking on chunk doesn't make sense
9327 */
9328 ctxt->instate = XML_PARSER_CONTENT;
9329 ctxt->validate = ctx->validate;
9330 ctxt->loadsubset = ctx->loadsubset;
9331 ctxt->depth = ctx->depth + 1;
9332 ctxt->replaceEntities = ctx->replaceEntities;
9333 if (ctxt->validate) {
9334 ctxt->vctxt.error = ctx->vctxt.error;
9335 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009336 } else {
9337 ctxt->vctxt.error = NULL;
9338 ctxt->vctxt.warning = NULL;
9339 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009340 ctxt->vctxt.nodeTab = NULL;
9341 ctxt->vctxt.nodeNr = 0;
9342 ctxt->vctxt.nodeMax = 0;
9343 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009344
9345 xmlParseContent(ctxt);
9346
9347 if ((RAW == '<') && (NXT(1) == '/')) {
9348 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9350 ctxt->sax->error(ctxt->userData,
9351 "chunk is not well balanced\n");
9352 ctxt->wellFormed = 0;
9353 ctxt->disableSAX = 1;
9354 } else if (RAW != 0) {
9355 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9357 ctxt->sax->error(ctxt->userData,
9358 "extra content at the end of well balanced chunk\n");
9359 ctxt->wellFormed = 0;
9360 ctxt->disableSAX = 1;
9361 }
9362 if (ctxt->node != newDoc->children) {
9363 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9365 ctxt->sax->error(ctxt->userData,
9366 "chunk is not well balanced\n");
9367 ctxt->wellFormed = 0;
9368 ctxt->disableSAX = 1;
9369 }
9370
9371 if (!ctxt->wellFormed) {
9372 if (ctxt->errNo == 0)
9373 ret = 1;
9374 else
9375 ret = ctxt->errNo;
9376 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009377 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009378 xmlNodePtr cur;
9379
9380 /*
9381 * Return the newly created nodeset after unlinking it from
9382 * they pseudo parent.
9383 */
9384 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009385 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009386 while (cur != NULL) {
9387 cur->parent = NULL;
9388 cur = cur->next;
9389 }
9390 newDoc->children->children = NULL;
9391 }
9392 ret = 0;
9393 }
9394 ctxt->sax = oldsax;
9395 xmlFreeParserCtxt(ctxt);
9396 newDoc->intSubset = NULL;
9397 newDoc->extSubset = NULL;
9398 xmlFreeDoc(newDoc);
9399
9400 return(ret);
9401}
9402
9403/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009404 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009405 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009406 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009407 * @sax: the SAX handler bloc (possibly NULL)
9408 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9409 * @depth: Used for loop detection, use 0
9410 * @URL: the URL for the entity to load
9411 * @ID: the System ID for the entity to load
9412 * @list: the return value for the set of parsed nodes
9413 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009414 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009415 *
9416 * Returns 0 if the entity is well formed, -1 in case of args problem and
9417 * the parser error code otherwise
9418 */
9419
Daniel Veillard257d9102001-05-08 10:41:44 +00009420static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009421xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9422 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009423 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009424 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009425 xmlParserCtxtPtr ctxt;
9426 xmlDocPtr newDoc;
9427 xmlSAXHandlerPtr oldsax = NULL;
9428 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009429 xmlChar start[4];
9430 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009431
9432 if (depth > 40) {
9433 return(XML_ERR_ENTITY_LOOP);
9434 }
9435
9436
9437
9438 if (list != NULL)
9439 *list = NULL;
9440 if ((URL == NULL) && (ID == NULL))
9441 return(-1);
9442 if (doc == NULL) /* @@ relax but check for dereferences */
9443 return(-1);
9444
9445
9446 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9447 if (ctxt == NULL) return(-1);
9448 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009449 if (oldctxt != NULL) {
9450 ctxt->_private = oldctxt->_private;
9451 ctxt->loadsubset = oldctxt->loadsubset;
9452 ctxt->validate = oldctxt->validate;
9453 ctxt->external = oldctxt->external;
9454 } else {
9455 /*
9456 * Doing validity checking on chunk without context
9457 * doesn't make sense
9458 */
9459 ctxt->_private = NULL;
9460 ctxt->validate = 0;
9461 ctxt->external = 2;
9462 ctxt->loadsubset = 0;
9463 }
Owen Taylor3473f882001-02-23 17:55:21 +00009464 if (sax != NULL) {
9465 oldsax = ctxt->sax;
9466 ctxt->sax = sax;
9467 if (user_data != NULL)
9468 ctxt->userData = user_data;
9469 }
9470 newDoc = xmlNewDoc(BAD_CAST "1.0");
9471 if (newDoc == NULL) {
9472 xmlFreeParserCtxt(ctxt);
9473 return(-1);
9474 }
9475 if (doc != NULL) {
9476 newDoc->intSubset = doc->intSubset;
9477 newDoc->extSubset = doc->extSubset;
9478 }
9479 if (doc->URL != NULL) {
9480 newDoc->URL = xmlStrdup(doc->URL);
9481 }
9482 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9483 if (newDoc->children == NULL) {
9484 if (sax != NULL)
9485 ctxt->sax = oldsax;
9486 xmlFreeParserCtxt(ctxt);
9487 newDoc->intSubset = NULL;
9488 newDoc->extSubset = NULL;
9489 xmlFreeDoc(newDoc);
9490 return(-1);
9491 }
9492 nodePush(ctxt, newDoc->children);
9493 if (doc == NULL) {
9494 ctxt->myDoc = newDoc;
9495 } else {
9496 ctxt->myDoc = doc;
9497 newDoc->children->doc = doc;
9498 }
9499
Daniel Veillard87a764e2001-06-20 17:41:10 +00009500 /*
9501 * Get the 4 first bytes and decode the charset
9502 * if enc != XML_CHAR_ENCODING_NONE
9503 * plug some encoding conversion routines.
9504 */
9505 GROW;
9506 start[0] = RAW;
9507 start[1] = NXT(1);
9508 start[2] = NXT(2);
9509 start[3] = NXT(3);
9510 enc = xmlDetectCharEncoding(start, 4);
9511 if (enc != XML_CHAR_ENCODING_NONE) {
9512 xmlSwitchEncoding(ctxt, enc);
9513 }
9514
Owen Taylor3473f882001-02-23 17:55:21 +00009515 /*
9516 * Parse a possible text declaration first
9517 */
Owen Taylor3473f882001-02-23 17:55:21 +00009518 if ((RAW == '<') && (NXT(1) == '?') &&
9519 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9520 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9521 xmlParseTextDecl(ctxt);
9522 }
9523
Owen Taylor3473f882001-02-23 17:55:21 +00009524 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009525 ctxt->depth = depth;
9526
9527 xmlParseContent(ctxt);
9528
Daniel Veillard561b7f82002-03-20 21:55:57 +00009529 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009530 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9532 ctxt->sax->error(ctxt->userData,
9533 "chunk is not well balanced\n");
9534 ctxt->wellFormed = 0;
9535 ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009536 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009537 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9539 ctxt->sax->error(ctxt->userData,
9540 "extra content at the end of well balanced chunk\n");
9541 ctxt->wellFormed = 0;
9542 ctxt->disableSAX = 1;
9543 }
9544 if (ctxt->node != newDoc->children) {
9545 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9547 ctxt->sax->error(ctxt->userData,
9548 "chunk is not well balanced\n");
9549 ctxt->wellFormed = 0;
9550 ctxt->disableSAX = 1;
9551 }
9552
9553 if (!ctxt->wellFormed) {
9554 if (ctxt->errNo == 0)
9555 ret = 1;
9556 else
9557 ret = ctxt->errNo;
9558 } else {
9559 if (list != NULL) {
9560 xmlNodePtr cur;
9561
9562 /*
9563 * Return the newly created nodeset after unlinking it from
9564 * they pseudo parent.
9565 */
9566 cur = newDoc->children->children;
9567 *list = cur;
9568 while (cur != NULL) {
9569 cur->parent = NULL;
9570 cur = cur->next;
9571 }
9572 newDoc->children->children = NULL;
9573 }
9574 ret = 0;
9575 }
9576 if (sax != NULL)
9577 ctxt->sax = oldsax;
9578 xmlFreeParserCtxt(ctxt);
9579 newDoc->intSubset = NULL;
9580 newDoc->extSubset = NULL;
9581 xmlFreeDoc(newDoc);
9582
9583 return(ret);
9584}
9585
9586/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009587 * xmlParseExternalEntity:
9588 * @doc: the document the chunk pertains to
9589 * @sax: the SAX handler bloc (possibly NULL)
9590 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9591 * @depth: Used for loop detection, use 0
9592 * @URL: the URL for the entity to load
9593 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009594 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009595 *
9596 * Parse an external general entity
9597 * An external general parsed entity is well-formed if it matches the
9598 * production labeled extParsedEnt.
9599 *
9600 * [78] extParsedEnt ::= TextDecl? content
9601 *
9602 * Returns 0 if the entity is well formed, -1 in case of args problem and
9603 * the parser error code otherwise
9604 */
9605
9606int
9607xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009608 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009609 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009610 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009611}
9612
9613/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009614 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009615 * @doc: the document the chunk pertains to
9616 * @sax: the SAX handler bloc (possibly NULL)
9617 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9618 * @depth: Used for loop detection, use 0
9619 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009620 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009621 *
9622 * Parse a well-balanced chunk of an XML document
9623 * called by the parser
9624 * The allowed sequence for the Well Balanced Chunk is the one defined by
9625 * the content production in the XML grammar:
9626 *
9627 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9628 *
9629 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9630 * the parser error code otherwise
9631 */
9632
9633int
9634xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009635 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009636 xmlParserCtxtPtr ctxt;
9637 xmlDocPtr newDoc;
9638 xmlSAXHandlerPtr oldsax = NULL;
9639 int size;
9640 int ret = 0;
9641
9642 if (depth > 40) {
9643 return(XML_ERR_ENTITY_LOOP);
9644 }
9645
9646
Daniel Veillardcda96922001-08-21 10:56:31 +00009647 if (lst != NULL)
9648 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009649 if (string == NULL)
9650 return(-1);
9651
9652 size = xmlStrlen(string);
9653
9654 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9655 if (ctxt == NULL) return(-1);
9656 ctxt->userData = ctxt;
9657 if (sax != NULL) {
9658 oldsax = ctxt->sax;
9659 ctxt->sax = sax;
9660 if (user_data != NULL)
9661 ctxt->userData = user_data;
9662 }
9663 newDoc = xmlNewDoc(BAD_CAST "1.0");
9664 if (newDoc == NULL) {
9665 xmlFreeParserCtxt(ctxt);
9666 return(-1);
9667 }
9668 if (doc != NULL) {
9669 newDoc->intSubset = doc->intSubset;
9670 newDoc->extSubset = doc->extSubset;
9671 }
9672 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9673 if (newDoc->children == NULL) {
9674 if (sax != NULL)
9675 ctxt->sax = oldsax;
9676 xmlFreeParserCtxt(ctxt);
9677 newDoc->intSubset = NULL;
9678 newDoc->extSubset = NULL;
9679 xmlFreeDoc(newDoc);
9680 return(-1);
9681 }
9682 nodePush(ctxt, newDoc->children);
9683 if (doc == NULL) {
9684 ctxt->myDoc = newDoc;
9685 } else {
9686 ctxt->myDoc = doc;
9687 newDoc->children->doc = doc;
9688 }
9689 ctxt->instate = XML_PARSER_CONTENT;
9690 ctxt->depth = depth;
9691
9692 /*
9693 * Doing validity checking on chunk doesn't make sense
9694 */
9695 ctxt->validate = 0;
9696 ctxt->loadsubset = 0;
9697
9698 xmlParseContent(ctxt);
9699
9700 if ((RAW == '<') && (NXT(1) == '/')) {
9701 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9703 ctxt->sax->error(ctxt->userData,
9704 "chunk is not well balanced\n");
9705 ctxt->wellFormed = 0;
9706 ctxt->disableSAX = 1;
9707 } else if (RAW != 0) {
9708 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9710 ctxt->sax->error(ctxt->userData,
9711 "extra content at the end of well balanced chunk\n");
9712 ctxt->wellFormed = 0;
9713 ctxt->disableSAX = 1;
9714 }
9715 if (ctxt->node != newDoc->children) {
9716 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9718 ctxt->sax->error(ctxt->userData,
9719 "chunk is not well balanced\n");
9720 ctxt->wellFormed = 0;
9721 ctxt->disableSAX = 1;
9722 }
9723
9724 if (!ctxt->wellFormed) {
9725 if (ctxt->errNo == 0)
9726 ret = 1;
9727 else
9728 ret = ctxt->errNo;
9729 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009730 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009731 xmlNodePtr cur;
9732
9733 /*
9734 * Return the newly created nodeset after unlinking it from
9735 * they pseudo parent.
9736 */
9737 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009738 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009739 while (cur != NULL) {
9740 cur->parent = NULL;
9741 cur = cur->next;
9742 }
9743 newDoc->children->children = NULL;
9744 }
9745 ret = 0;
9746 }
9747 if (sax != NULL)
9748 ctxt->sax = oldsax;
9749 xmlFreeParserCtxt(ctxt);
9750 newDoc->intSubset = NULL;
9751 newDoc->extSubset = NULL;
9752 xmlFreeDoc(newDoc);
9753
9754 return(ret);
9755}
9756
9757/**
9758 * xmlSAXParseEntity:
9759 * @sax: the SAX handler block
9760 * @filename: the filename
9761 *
9762 * parse an XML external entity out of context and build a tree.
9763 * It use the given SAX function block to handle the parsing callback.
9764 * If sax is NULL, fallback to the default DOM tree building routines.
9765 *
9766 * [78] extParsedEnt ::= TextDecl? content
9767 *
9768 * This correspond to a "Well Balanced" chunk
9769 *
9770 * Returns the resulting document tree
9771 */
9772
9773xmlDocPtr
9774xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9775 xmlDocPtr ret;
9776 xmlParserCtxtPtr ctxt;
9777 char *directory = NULL;
9778
9779 ctxt = xmlCreateFileParserCtxt(filename);
9780 if (ctxt == NULL) {
9781 return(NULL);
9782 }
9783 if (sax != NULL) {
9784 if (ctxt->sax != NULL)
9785 xmlFree(ctxt->sax);
9786 ctxt->sax = sax;
9787 ctxt->userData = NULL;
9788 }
9789
9790 if ((ctxt->directory == NULL) && (directory == NULL))
9791 directory = xmlParserGetDirectory(filename);
9792
9793 xmlParseExtParsedEnt(ctxt);
9794
9795 if (ctxt->wellFormed)
9796 ret = ctxt->myDoc;
9797 else {
9798 ret = NULL;
9799 xmlFreeDoc(ctxt->myDoc);
9800 ctxt->myDoc = NULL;
9801 }
9802 if (sax != NULL)
9803 ctxt->sax = NULL;
9804 xmlFreeParserCtxt(ctxt);
9805
9806 return(ret);
9807}
9808
9809/**
9810 * xmlParseEntity:
9811 * @filename: the filename
9812 *
9813 * parse an XML external entity out of context and build a tree.
9814 *
9815 * [78] extParsedEnt ::= TextDecl? content
9816 *
9817 * This correspond to a "Well Balanced" chunk
9818 *
9819 * Returns the resulting document tree
9820 */
9821
9822xmlDocPtr
9823xmlParseEntity(const char *filename) {
9824 return(xmlSAXParseEntity(NULL, filename));
9825}
9826
9827/**
9828 * xmlCreateEntityParserCtxt:
9829 * @URL: the entity URL
9830 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009831 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009832 *
9833 * Create a parser context for an external entity
9834 * Automatic support for ZLIB/Compress compressed document is provided
9835 * by default if found at compile-time.
9836 *
9837 * Returns the new parser context or NULL
9838 */
9839xmlParserCtxtPtr
9840xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9841 const xmlChar *base) {
9842 xmlParserCtxtPtr ctxt;
9843 xmlParserInputPtr inputStream;
9844 char *directory = NULL;
9845 xmlChar *uri;
9846
9847 ctxt = xmlNewParserCtxt();
9848 if (ctxt == NULL) {
9849 return(NULL);
9850 }
9851
9852 uri = xmlBuildURI(URL, base);
9853
9854 if (uri == NULL) {
9855 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9856 if (inputStream == NULL) {
9857 xmlFreeParserCtxt(ctxt);
9858 return(NULL);
9859 }
9860
9861 inputPush(ctxt, inputStream);
9862
9863 if ((ctxt->directory == NULL) && (directory == NULL))
9864 directory = xmlParserGetDirectory((char *)URL);
9865 if ((ctxt->directory == NULL) && (directory != NULL))
9866 ctxt->directory = directory;
9867 } else {
9868 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9869 if (inputStream == NULL) {
9870 xmlFree(uri);
9871 xmlFreeParserCtxt(ctxt);
9872 return(NULL);
9873 }
9874
9875 inputPush(ctxt, inputStream);
9876
9877 if ((ctxt->directory == NULL) && (directory == NULL))
9878 directory = xmlParserGetDirectory((char *)uri);
9879 if ((ctxt->directory == NULL) && (directory != NULL))
9880 ctxt->directory = directory;
9881 xmlFree(uri);
9882 }
9883
9884 return(ctxt);
9885}
9886
9887/************************************************************************
9888 * *
9889 * Front ends when parsing from a file *
9890 * *
9891 ************************************************************************/
9892
9893/**
9894 * xmlCreateFileParserCtxt:
9895 * @filename: the filename
9896 *
9897 * Create a parser context for a file content.
9898 * Automatic support for ZLIB/Compress compressed document is provided
9899 * by default if found at compile-time.
9900 *
9901 * Returns the new parser context or NULL
9902 */
9903xmlParserCtxtPtr
9904xmlCreateFileParserCtxt(const char *filename)
9905{
9906 xmlParserCtxtPtr ctxt;
9907 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009908 char *directory = NULL;
9909
Owen Taylor3473f882001-02-23 17:55:21 +00009910 ctxt = xmlNewParserCtxt();
9911 if (ctxt == NULL) {
9912 if (xmlDefaultSAXHandler.error != NULL) {
9913 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9914 }
9915 return(NULL);
9916 }
9917
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009918 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009919 if (inputStream == NULL) {
9920 xmlFreeParserCtxt(ctxt);
9921 return(NULL);
9922 }
9923
Owen Taylor3473f882001-02-23 17:55:21 +00009924 inputPush(ctxt, inputStream);
9925 if ((ctxt->directory == NULL) && (directory == NULL))
9926 directory = xmlParserGetDirectory(filename);
9927 if ((ctxt->directory == NULL) && (directory != NULL))
9928 ctxt->directory = directory;
9929
9930 return(ctxt);
9931}
9932
9933/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009934 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009935 * @sax: the SAX handler block
9936 * @filename: the filename
9937 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9938 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009939 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009940 *
9941 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9942 * compressed document is provided by default if found at compile-time.
9943 * It use the given SAX function block to handle the parsing callback.
9944 * If sax is NULL, fallback to the default DOM tree building routines.
9945 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009946 * User data (void *) is stored within the parser context, so it is
9947 * available nearly everywhere in libxml.
9948 *
Owen Taylor3473f882001-02-23 17:55:21 +00009949 * Returns the resulting document tree
9950 */
9951
9952xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009953xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9954 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009955 xmlDocPtr ret;
9956 xmlParserCtxtPtr ctxt;
9957 char *directory = NULL;
9958
Daniel Veillard635ef722001-10-29 11:48:19 +00009959 xmlInitParser();
9960
Owen Taylor3473f882001-02-23 17:55:21 +00009961 ctxt = xmlCreateFileParserCtxt(filename);
9962 if (ctxt == NULL) {
9963 return(NULL);
9964 }
9965 if (sax != NULL) {
9966 if (ctxt->sax != NULL)
9967 xmlFree(ctxt->sax);
9968 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009969 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009970 if (data!=NULL) {
9971 ctxt->_private=data;
9972 }
Owen Taylor3473f882001-02-23 17:55:21 +00009973
9974 if ((ctxt->directory == NULL) && (directory == NULL))
9975 directory = xmlParserGetDirectory(filename);
9976 if ((ctxt->directory == NULL) && (directory != NULL))
9977 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9978
9979 xmlParseDocument(ctxt);
9980
9981 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9982 else {
9983 ret = NULL;
9984 xmlFreeDoc(ctxt->myDoc);
9985 ctxt->myDoc = NULL;
9986 }
9987 if (sax != NULL)
9988 ctxt->sax = NULL;
9989 xmlFreeParserCtxt(ctxt);
9990
9991 return(ret);
9992}
9993
9994/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009995 * xmlSAXParseFile:
9996 * @sax: the SAX handler block
9997 * @filename: the filename
9998 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9999 * documents
10000 *
10001 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10002 * compressed document is provided by default if found at compile-time.
10003 * It use the given SAX function block to handle the parsing callback.
10004 * If sax is NULL, fallback to the default DOM tree building routines.
10005 *
10006 * Returns the resulting document tree
10007 */
10008
10009xmlDocPtr
10010xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10011 int recovery) {
10012 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10013}
10014
10015/**
Owen Taylor3473f882001-02-23 17:55:21 +000010016 * xmlRecoverDoc:
10017 * @cur: a pointer to an array of xmlChar
10018 *
10019 * parse an XML in-memory document and build a tree.
10020 * In the case the document is not Well Formed, a tree is built anyway
10021 *
10022 * Returns the resulting document tree
10023 */
10024
10025xmlDocPtr
10026xmlRecoverDoc(xmlChar *cur) {
10027 return(xmlSAXParseDoc(NULL, cur, 1));
10028}
10029
10030/**
10031 * xmlParseFile:
10032 * @filename: the filename
10033 *
10034 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10035 * compressed document is provided by default if found at compile-time.
10036 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010037 * Returns the resulting document tree if the file was wellformed,
10038 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010039 */
10040
10041xmlDocPtr
10042xmlParseFile(const char *filename) {
10043 return(xmlSAXParseFile(NULL, filename, 0));
10044}
10045
10046/**
10047 * xmlRecoverFile:
10048 * @filename: the filename
10049 *
10050 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10051 * compressed document is provided by default if found at compile-time.
10052 * In the case the document is not Well Formed, a tree is built anyway
10053 *
10054 * Returns the resulting document tree
10055 */
10056
10057xmlDocPtr
10058xmlRecoverFile(const char *filename) {
10059 return(xmlSAXParseFile(NULL, filename, 1));
10060}
10061
10062
10063/**
10064 * xmlSetupParserForBuffer:
10065 * @ctxt: an XML parser context
10066 * @buffer: a xmlChar * buffer
10067 * @filename: a file name
10068 *
10069 * Setup the parser context to parse a new buffer; Clears any prior
10070 * contents from the parser context. The buffer parameter must not be
10071 * NULL, but the filename parameter can be
10072 */
10073void
10074xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10075 const char* filename)
10076{
10077 xmlParserInputPtr input;
10078
10079 input = xmlNewInputStream(ctxt);
10080 if (input == NULL) {
10081 perror("malloc");
10082 xmlFree(ctxt);
10083 return;
10084 }
10085
10086 xmlClearParserCtxt(ctxt);
10087 if (filename != NULL)
10088 input->filename = xmlMemStrdup(filename);
10089 input->base = buffer;
10090 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010091 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010092 inputPush(ctxt, input);
10093}
10094
10095/**
10096 * xmlSAXUserParseFile:
10097 * @sax: a SAX handler
10098 * @user_data: The user data returned on SAX callbacks
10099 * @filename: a file name
10100 *
10101 * parse an XML file and call the given SAX handler routines.
10102 * Automatic support for ZLIB/Compress compressed document is provided
10103 *
10104 * Returns 0 in case of success or a error number otherwise
10105 */
10106int
10107xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10108 const char *filename) {
10109 int ret = 0;
10110 xmlParserCtxtPtr ctxt;
10111
10112 ctxt = xmlCreateFileParserCtxt(filename);
10113 if (ctxt == NULL) return -1;
10114 if (ctxt->sax != &xmlDefaultSAXHandler)
10115 xmlFree(ctxt->sax);
10116 ctxt->sax = sax;
10117 if (user_data != NULL)
10118 ctxt->userData = user_data;
10119
10120 xmlParseDocument(ctxt);
10121
10122 if (ctxt->wellFormed)
10123 ret = 0;
10124 else {
10125 if (ctxt->errNo != 0)
10126 ret = ctxt->errNo;
10127 else
10128 ret = -1;
10129 }
10130 if (sax != NULL)
10131 ctxt->sax = NULL;
10132 xmlFreeParserCtxt(ctxt);
10133
10134 return ret;
10135}
10136
10137/************************************************************************
10138 * *
10139 * Front ends when parsing from memory *
10140 * *
10141 ************************************************************************/
10142
10143/**
10144 * xmlCreateMemoryParserCtxt:
10145 * @buffer: a pointer to a char array
10146 * @size: the size of the array
10147 *
10148 * Create a parser context for an XML in-memory document.
10149 *
10150 * Returns the new parser context or NULL
10151 */
10152xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010153xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010154 xmlParserCtxtPtr ctxt;
10155 xmlParserInputPtr input;
10156 xmlParserInputBufferPtr buf;
10157
10158 if (buffer == NULL)
10159 return(NULL);
10160 if (size <= 0)
10161 return(NULL);
10162
10163 ctxt = xmlNewParserCtxt();
10164 if (ctxt == NULL)
10165 return(NULL);
10166
10167 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10168 if (buf == NULL) return(NULL);
10169
10170 input = xmlNewInputStream(ctxt);
10171 if (input == NULL) {
10172 xmlFreeParserCtxt(ctxt);
10173 return(NULL);
10174 }
10175
10176 input->filename = NULL;
10177 input->buf = buf;
10178 input->base = input->buf->buffer->content;
10179 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010180 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010181
10182 inputPush(ctxt, input);
10183 return(ctxt);
10184}
10185
10186/**
10187 * xmlSAXParseMemory:
10188 * @sax: the SAX handler block
10189 * @buffer: an pointer to a char array
10190 * @size: the size of the array
10191 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10192 * documents
10193 *
10194 * parse an XML in-memory block and use the given SAX function block
10195 * to handle the parsing callback. If sax is NULL, fallback to the default
10196 * DOM tree building routines.
10197 *
10198 * Returns the resulting document tree
10199 */
10200xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010201xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10202 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010203 xmlDocPtr ret;
10204 xmlParserCtxtPtr ctxt;
10205
10206 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10207 if (ctxt == NULL) return(NULL);
10208 if (sax != NULL) {
10209 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010210 }
10211
10212 xmlParseDocument(ctxt);
10213
10214 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10215 else {
10216 ret = NULL;
10217 xmlFreeDoc(ctxt->myDoc);
10218 ctxt->myDoc = NULL;
10219 }
10220 if (sax != NULL)
10221 ctxt->sax = NULL;
10222 xmlFreeParserCtxt(ctxt);
10223
10224 return(ret);
10225}
10226
10227/**
10228 * xmlParseMemory:
10229 * @buffer: an pointer to a char array
10230 * @size: the size of the array
10231 *
10232 * parse an XML in-memory block and build a tree.
10233 *
10234 * Returns the resulting document tree
10235 */
10236
Daniel Veillard50822cb2001-07-26 20:05:51 +000010237xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010238 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10239}
10240
10241/**
10242 * xmlRecoverMemory:
10243 * @buffer: an pointer to a char array
10244 * @size: the size of the array
10245 *
10246 * parse an XML in-memory block and build a tree.
10247 * In the case the document is not Well Formed, a tree is built anyway
10248 *
10249 * Returns the resulting document tree
10250 */
10251
Daniel Veillard50822cb2001-07-26 20:05:51 +000010252xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010253 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10254}
10255
10256/**
10257 * xmlSAXUserParseMemory:
10258 * @sax: a SAX handler
10259 * @user_data: The user data returned on SAX callbacks
10260 * @buffer: an in-memory XML document input
10261 * @size: the length of the XML document in bytes
10262 *
10263 * A better SAX parsing routine.
10264 * parse an XML in-memory buffer and call the given SAX handler routines.
10265 *
10266 * Returns 0 in case of success or a error number otherwise
10267 */
10268int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010269 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010270 int ret = 0;
10271 xmlParserCtxtPtr ctxt;
10272 xmlSAXHandlerPtr oldsax = NULL;
10273
10274 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10275 if (ctxt == NULL) return -1;
10276 if (sax != NULL) {
10277 oldsax = ctxt->sax;
10278 ctxt->sax = sax;
10279 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010280 if (user_data != NULL)
10281 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010282
10283 xmlParseDocument(ctxt);
10284
10285 if (ctxt->wellFormed)
10286 ret = 0;
10287 else {
10288 if (ctxt->errNo != 0)
10289 ret = ctxt->errNo;
10290 else
10291 ret = -1;
10292 }
10293 if (sax != NULL) {
10294 ctxt->sax = oldsax;
10295 }
10296 xmlFreeParserCtxt(ctxt);
10297
10298 return ret;
10299}
10300
10301/**
10302 * xmlCreateDocParserCtxt:
10303 * @cur: a pointer to an array of xmlChar
10304 *
10305 * Creates a parser context for an XML in-memory document.
10306 *
10307 * Returns the new parser context or NULL
10308 */
10309xmlParserCtxtPtr
10310xmlCreateDocParserCtxt(xmlChar *cur) {
10311 int len;
10312
10313 if (cur == NULL)
10314 return(NULL);
10315 len = xmlStrlen(cur);
10316 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10317}
10318
10319/**
10320 * xmlSAXParseDoc:
10321 * @sax: the SAX handler block
10322 * @cur: a pointer to an array of xmlChar
10323 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10324 * documents
10325 *
10326 * parse an XML in-memory document and build a tree.
10327 * It use the given SAX function block to handle the parsing callback.
10328 * If sax is NULL, fallback to the default DOM tree building routines.
10329 *
10330 * Returns the resulting document tree
10331 */
10332
10333xmlDocPtr
10334xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10335 xmlDocPtr ret;
10336 xmlParserCtxtPtr ctxt;
10337
10338 if (cur == NULL) return(NULL);
10339
10340
10341 ctxt = xmlCreateDocParserCtxt(cur);
10342 if (ctxt == NULL) return(NULL);
10343 if (sax != NULL) {
10344 ctxt->sax = sax;
10345 ctxt->userData = NULL;
10346 }
10347
10348 xmlParseDocument(ctxt);
10349 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10350 else {
10351 ret = NULL;
10352 xmlFreeDoc(ctxt->myDoc);
10353 ctxt->myDoc = NULL;
10354 }
10355 if (sax != NULL)
10356 ctxt->sax = NULL;
10357 xmlFreeParserCtxt(ctxt);
10358
10359 return(ret);
10360}
10361
10362/**
10363 * xmlParseDoc:
10364 * @cur: a pointer to an array of xmlChar
10365 *
10366 * parse an XML in-memory document and build a tree.
10367 *
10368 * Returns the resulting document tree
10369 */
10370
10371xmlDocPtr
10372xmlParseDoc(xmlChar *cur) {
10373 return(xmlSAXParseDoc(NULL, cur, 0));
10374}
10375
Daniel Veillard8107a222002-01-13 14:10:10 +000010376/************************************************************************
10377 * *
10378 * Specific function to keep track of entities references *
10379 * and used by the XSLT debugger *
10380 * *
10381 ************************************************************************/
10382
10383static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10384
10385/**
10386 * xmlAddEntityReference:
10387 * @ent : A valid entity
10388 * @firstNode : A valid first node for children of entity
10389 * @lastNode : A valid last node of children entity
10390 *
10391 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10392 */
10393static void
10394xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10395 xmlNodePtr lastNode)
10396{
10397 if (xmlEntityRefFunc != NULL) {
10398 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10399 }
10400}
10401
10402
10403/**
10404 * xmlSetEntityReferenceFunc:
10405 * @func : A valid function
10406 *
10407 * Set the function to call call back when a xml reference has been made
10408 */
10409void
10410xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10411{
10412 xmlEntityRefFunc = func;
10413}
Owen Taylor3473f882001-02-23 17:55:21 +000010414
10415/************************************************************************
10416 * *
10417 * Miscellaneous *
10418 * *
10419 ************************************************************************/
10420
10421#ifdef LIBXML_XPATH_ENABLED
10422#include <libxml/xpath.h>
10423#endif
10424
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010425extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010426static int xmlParserInitialized = 0;
10427
10428/**
10429 * xmlInitParser:
10430 *
10431 * Initialization function for the XML parser.
10432 * This is not reentrant. Call once before processing in case of
10433 * use in multithreaded programs.
10434 */
10435
10436void
10437xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010438 if (xmlParserInitialized != 0)
10439 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010440
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010441 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10442 (xmlGenericError == NULL))
10443 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010444 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010445 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010446 xmlInitCharEncodingHandlers();
10447 xmlInitializePredefinedEntities();
10448 xmlDefaultSAXHandlerInit();
10449 xmlRegisterDefaultInputCallbacks();
10450 xmlRegisterDefaultOutputCallbacks();
10451#ifdef LIBXML_HTML_ENABLED
10452 htmlInitAutoClose();
10453 htmlDefaultSAXHandlerInit();
10454#endif
10455#ifdef LIBXML_XPATH_ENABLED
10456 xmlXPathInit();
10457#endif
10458 xmlParserInitialized = 1;
10459}
10460
10461/**
10462 * xmlCleanupParser:
10463 *
10464 * Cleanup function for the XML parser. It tries to reclaim all
10465 * parsing related global memory allocated for the parser processing.
10466 * It doesn't deallocate any document related memory. Calling this
10467 * function should not prevent reusing the parser.
10468 */
10469
10470void
10471xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010472 xmlCleanupCharEncodingHandlers();
10473 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010474#ifdef LIBXML_CATALOG_ENABLED
10475 xmlCatalogCleanup();
10476#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010477 xmlCleanupThreads();
10478 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010479}