blob: b8a67f19ab5ef2ba09634562997baf6a1c53e121 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Daniel Veillard3c5ed912002-01-08 10:36:16 +000035#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000044#include <libxml/threads.h>
45#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/tree.h>
47#include <libxml/parser.h>
48#include <libxml/parserInternals.h>
49#include <libxml/valid.h>
50#include <libxml/entities.h>
51#include <libxml/xmlerror.h>
52#include <libxml/encoding.h>
53#include <libxml/xmlIO.h>
54#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000055#ifdef LIBXML_CATALOG_ENABLED
56#include <libxml/catalog.h>
57#endif
Owen Taylor3473f882001-02-23 17:55:21 +000058
59#ifdef HAVE_CTYPE_H
60#include <ctype.h>
61#endif
62#ifdef HAVE_STDLIB_H
63#include <stdlib.h>
64#endif
65#ifdef HAVE_SYS_STAT_H
66#include <sys/stat.h>
67#endif
68#ifdef HAVE_FCNTL_H
69#include <fcntl.h>
70#endif
71#ifdef HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74#ifdef HAVE_ZLIB_H
75#include <zlib.h>
76#endif
77
78
Daniel Veillard21a0f912001-02-25 19:54:14 +000079#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000080#define XML_PARSER_BUFFER_SIZE 100
81
82/*
Owen Taylor3473f882001-02-23 17:55:21 +000083 * List of XML prefixed PI allowed by W3C specs
84 */
85
Daniel Veillardb44025c2001-10-11 22:55:55 +000086static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000087 "xml-stylesheet",
88 NULL
89};
90
91/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000092xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
93 const xmlChar **str);
94
Daniel Veillard257d9102001-05-08 10:41:44 +000095static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000096xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
97 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000098 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000100
Daniel Veillard8107a222002-01-13 14:10:10 +0000101static void
102xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
103 xmlNodePtr lastNode);
104
Owen Taylor3473f882001-02-23 17:55:21 +0000105/************************************************************************
106 * *
107 * Parser stacks related functions and macros *
108 * *
109 ************************************************************************/
110
111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
112 const xmlChar ** str);
113
114/*
115 * Generic function for accessing stacks in the Parser Context
116 */
117
118#define PUSH_AND_POP(scope, type, name) \
119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
120 if (ctxt->name##Nr >= ctxt->name##Max) { \
121 ctxt->name##Max *= 2; \
122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
124 if (ctxt->name##Tab == NULL) { \
125 xmlGenericError(xmlGenericErrorContext, \
126 "realloc failed !\n"); \
127 return(0); \
128 } \
129 } \
130 ctxt->name##Tab[ctxt->name##Nr] = value; \
131 ctxt->name = value; \
132 return(ctxt->name##Nr++); \
133} \
134scope type name##Pop(xmlParserCtxtPtr ctxt) { \
135 type ret; \
136 if (ctxt->name##Nr <= 0) return(0); \
137 ctxt->name##Nr--; \
138 if (ctxt->name##Nr > 0) \
139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
140 else \
141 ctxt->name = NULL; \
142 ret = ctxt->name##Tab[ctxt->name##Nr]; \
143 ctxt->name##Tab[ctxt->name##Nr] = 0; \
144 return(ret); \
145} \
146
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
148 * inputPop:
149 * @ctxt: an XML parser context
150 *
151 * Pops the top parser input from the input stack
152 *
153 * Returns the input just removed
154 */
155/**
156 * inputPush:
157 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000158 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000159 *
160 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 *
162 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000163 */
164/**
165 * namePop:
166 * @ctxt: an XML parser context
167 *
168 * Pops the top element name from the name stack
169 *
170 * Returns the name just removed
171 */
172/**
173 * namePush:
174 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000175 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000176 *
177 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 *
179 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000180 */
181/**
182 * nodePop:
183 * @ctxt: an XML parser context
184 *
185 * Pops the top element node from the node stack
186 *
187 * Returns the node just removed
188 */
189/**
190 * nodePush:
191 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000192 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000193 *
194 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 *
196 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000197 */
Owen Taylor3473f882001-02-23 17:55:21 +0000198/*
199 * Those macros actually generate the functions
200 */
201PUSH_AND_POP(extern, xmlParserInputPtr, input)
202PUSH_AND_POP(extern, xmlNodePtr, node)
203PUSH_AND_POP(extern, xmlChar*, name)
204
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000205static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000206 if (ctxt->spaceNr >= ctxt->spaceMax) {
207 ctxt->spaceMax *= 2;
208 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
209 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
210 if (ctxt->spaceTab == NULL) {
211 xmlGenericError(xmlGenericErrorContext,
212 "realloc failed !\n");
213 return(0);
214 }
215 }
216 ctxt->spaceTab[ctxt->spaceNr] = val;
217 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
218 return(ctxt->spaceNr++);
219}
220
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000221static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000222 int ret;
223 if (ctxt->spaceNr <= 0) return(0);
224 ctxt->spaceNr--;
225 if (ctxt->spaceNr > 0)
226 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
227 else
228 ctxt->space = NULL;
229 ret = ctxt->spaceTab[ctxt->spaceNr];
230 ctxt->spaceTab[ctxt->spaceNr] = -1;
231 return(ret);
232}
233
234/*
235 * Macros for accessing the content. Those should be used only by the parser,
236 * and not exported.
237 *
238 * Dirty macros, i.e. one often need to make assumption on the context to
239 * use them
240 *
241 * CUR_PTR return the current pointer to the xmlChar to be parsed.
242 * To be used with extreme caution since operations consuming
243 * characters may move the input buffer to a different location !
244 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
245 * This should be used internally by the parser
246 * only to compare to ASCII values otherwise it would break when
247 * running with UTF-8 encoding.
248 * RAW same as CUR but in the input buffer, bypass any token
249 * extraction that may have been done
250 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
251 * to compare on ASCII based substring.
252 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
253 * strings within the parser.
254 *
255 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
256 *
257 * NEXT Skip to the next character, this does the proper decoding
258 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000259 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000260 * CUR_CHAR(l) returns the current unicode character (int), set l
261 * to the number of xmlChars used for the encoding [0-5].
262 * CUR_SCHAR same but operate on a string instead of the context
263 * COPY_BUF copy the current unicode char to the target buffer, increment
264 * the index
265 * GROW, SHRINK handling of input buffers
266 */
267
268#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
269#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
270#define NXT(val) ctxt->input->cur[(val)]
271#define CUR_PTR ctxt->input->cur
272
273#define SKIP(val) do { \
274 ctxt->nbChars += (val),ctxt->input->cur += (val); \
275 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000276 if ((*ctxt->input->cur == 0) && \
277 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
278 xmlPopInput(ctxt); \
279 } while (0)
280
Daniel Veillard48b2f892001-02-25 16:11:03 +0000281#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000282 xmlParserInputShrink(ctxt->input); \
283 if ((*ctxt->input->cur == 0) && \
284 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
285 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000286 }
Owen Taylor3473f882001-02-23 17:55:21 +0000287
Daniel Veillard48b2f892001-02-25 16:11:03 +0000288#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000289 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
290 if ((*ctxt->input->cur == 0) && \
291 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
292 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000293 }
Owen Taylor3473f882001-02-23 17:55:21 +0000294
295#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
296
297#define NEXT xmlNextChar(ctxt)
298
Daniel Veillard21a0f912001-02-25 19:54:14 +0000299#define NEXT1 { \
300 ctxt->input->cur++; \
301 ctxt->nbChars++; \
302 if (*ctxt->input->cur == 0) \
303 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
304 }
305
Owen Taylor3473f882001-02-23 17:55:21 +0000306#define NEXTL(l) do { \
307 if (*(ctxt->input->cur) == '\n') { \
308 ctxt->input->line++; ctxt->input->col = 1; \
309 } else ctxt->input->col++; \
310 ctxt->token = 0; ctxt->input->cur += l; \
311 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000312 } while (0)
313
314#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
315#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
316
317#define COPY_BUF(l,b,i,v) \
318 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000319 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000320
321/**
322 * xmlSkipBlankChars:
323 * @ctxt: the XML parser context
324 *
325 * skip all blanks character found at that point in the input streams.
326 * It pops up finished entities in the process if allowable at that point.
327 *
328 * Returns the number of space chars skipped
329 */
330
331int
332xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000333 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000334
Daniel Veillard02141ea2001-04-30 11:46:40 +0000335 if (ctxt->token != 0) {
336 if (!IS_BLANK(ctxt->token))
337 return(0);
338 ctxt->token = 0;
339 res++;
340 }
Owen Taylor3473f882001-02-23 17:55:21 +0000341 /*
342 * It's Okay to use CUR/NEXT here since all the blanks are on
343 * the ASCII range.
344 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000345 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
346 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000347 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000348 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000349 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000350 cur = ctxt->input->cur;
351 while (IS_BLANK(*cur)) {
352 if (*cur == '\n') {
353 ctxt->input->line++; ctxt->input->col = 1;
354 }
355 cur++;
356 res++;
357 if (*cur == 0) {
358 ctxt->input->cur = cur;
359 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
360 cur = ctxt->input->cur;
361 }
362 }
363 ctxt->input->cur = cur;
364 } else {
365 int cur;
366 do {
367 cur = CUR;
368 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
369 NEXT;
370 cur = CUR;
371 res++;
372 }
373 while ((cur == 0) && (ctxt->inputNr > 1) &&
374 (ctxt->instate != XML_PARSER_COMMENT)) {
375 xmlPopInput(ctxt);
376 cur = CUR;
377 }
378 /*
379 * Need to handle support of entities branching here
380 */
381 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
382 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
383 }
Owen Taylor3473f882001-02-23 17:55:21 +0000384 return(res);
385}
386
387/************************************************************************
388 * *
389 * Commodity functions to handle entities *
390 * *
391 ************************************************************************/
392
393/**
394 * xmlPopInput:
395 * @ctxt: an XML parser context
396 *
397 * xmlPopInput: the current input pointed by ctxt->input came to an end
398 * pop it and return the next char.
399 *
400 * Returns the current xmlChar in the parser context
401 */
402xmlChar
403xmlPopInput(xmlParserCtxtPtr ctxt) {
404 if (ctxt->inputNr == 1) return(0); /* End of main Input */
405 if (xmlParserDebugEntities)
406 xmlGenericError(xmlGenericErrorContext,
407 "Popping input %d\n", ctxt->inputNr);
408 xmlFreeInputStream(inputPop(ctxt));
409 if ((*ctxt->input->cur == 0) &&
410 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
411 return(xmlPopInput(ctxt));
412 return(CUR);
413}
414
415/**
416 * xmlPushInput:
417 * @ctxt: an XML parser context
418 * @input: an XML parser input fragment (entity, XML fragment ...).
419 *
420 * xmlPushInput: switch to a new input stream which is stacked on top
421 * of the previous one(s).
422 */
423void
424xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
425 if (input == NULL) return;
426
427 if (xmlParserDebugEntities) {
428 if ((ctxt->input != NULL) && (ctxt->input->filename))
429 xmlGenericError(xmlGenericErrorContext,
430 "%s(%d): ", ctxt->input->filename,
431 ctxt->input->line);
432 xmlGenericError(xmlGenericErrorContext,
433 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
434 }
435 inputPush(ctxt, input);
436 GROW;
437}
438
439/**
440 * xmlParseCharRef:
441 * @ctxt: an XML parser context
442 *
443 * parse Reference declarations
444 *
445 * [66] CharRef ::= '&#' [0-9]+ ';' |
446 * '&#x' [0-9a-fA-F]+ ';'
447 *
448 * [ WFC: Legal Character ]
449 * Characters referred to using character references must match the
450 * production for Char.
451 *
452 * Returns the value parsed (as an int), 0 in case of error
453 */
454int
455xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000456 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000457 int count = 0;
458
459 if (ctxt->token != 0) {
460 val = ctxt->token;
461 ctxt->token = 0;
462 return(val);
463 }
464 /*
465 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
466 */
467 if ((RAW == '&') && (NXT(1) == '#') &&
468 (NXT(2) == 'x')) {
469 SKIP(3);
470 GROW;
471 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000472 if (count++ > 20) {
473 count = 0;
474 GROW;
475 }
476 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000477 val = val * 16 + (CUR - '0');
478 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
479 val = val * 16 + (CUR - 'a') + 10;
480 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
481 val = val * 16 + (CUR - 'A') + 10;
482 else {
483 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
485 ctxt->sax->error(ctxt->userData,
486 "xmlParseCharRef: invalid hexadecimal value\n");
487 ctxt->wellFormed = 0;
488 ctxt->disableSAX = 1;
489 val = 0;
490 break;
491 }
492 NEXT;
493 count++;
494 }
495 if (RAW == ';') {
496 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
497 ctxt->nbChars ++;
498 ctxt->input->cur++;
499 }
500 } else if ((RAW == '&') && (NXT(1) == '#')) {
501 SKIP(2);
502 GROW;
503 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000504 if (count++ > 20) {
505 count = 0;
506 GROW;
507 }
508 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000509 val = val * 10 + (CUR - '0');
510 else {
511 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
513 ctxt->sax->error(ctxt->userData,
514 "xmlParseCharRef: invalid decimal value\n");
515 ctxt->wellFormed = 0;
516 ctxt->disableSAX = 1;
517 val = 0;
518 break;
519 }
520 NEXT;
521 count++;
522 }
523 if (RAW == ';') {
524 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
525 ctxt->nbChars ++;
526 ctxt->input->cur++;
527 }
528 } else {
529 ctxt->errNo = XML_ERR_INVALID_CHARREF;
530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
531 ctxt->sax->error(ctxt->userData,
532 "xmlParseCharRef: invalid value\n");
533 ctxt->wellFormed = 0;
534 ctxt->disableSAX = 1;
535 }
536
537 /*
538 * [ WFC: Legal Character ]
539 * Characters referred to using character references must match the
540 * production for Char.
541 */
542 if (IS_CHAR(val)) {
543 return(val);
544 } else {
545 ctxt->errNo = XML_ERR_INVALID_CHAR;
546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000547 ctxt->sax->error(ctxt->userData,
548 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000549 val);
550 ctxt->wellFormed = 0;
551 ctxt->disableSAX = 1;
552 }
553 return(0);
554}
555
556/**
557 * xmlParseStringCharRef:
558 * @ctxt: an XML parser context
559 * @str: a pointer to an index in the string
560 *
561 * parse Reference declarations, variant parsing from a string rather
562 * than an an input flow.
563 *
564 * [66] CharRef ::= '&#' [0-9]+ ';' |
565 * '&#x' [0-9a-fA-F]+ ';'
566 *
567 * [ WFC: Legal Character ]
568 * Characters referred to using character references must match the
569 * production for Char.
570 *
571 * Returns the value parsed (as an int), 0 in case of error, str will be
572 * updated to the current value of the index
573 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000574static int
Owen Taylor3473f882001-02-23 17:55:21 +0000575xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
576 const xmlChar *ptr;
577 xmlChar cur;
578 int val = 0;
579
580 if ((str == NULL) || (*str == NULL)) return(0);
581 ptr = *str;
582 cur = *ptr;
583 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
584 ptr += 3;
585 cur = *ptr;
586 while (cur != ';') { /* Non input consuming loop */
587 if ((cur >= '0') && (cur <= '9'))
588 val = val * 16 + (cur - '0');
589 else if ((cur >= 'a') && (cur <= 'f'))
590 val = val * 16 + (cur - 'a') + 10;
591 else if ((cur >= 'A') && (cur <= 'F'))
592 val = val * 16 + (cur - 'A') + 10;
593 else {
594 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
596 ctxt->sax->error(ctxt->userData,
597 "xmlParseStringCharRef: invalid hexadecimal value\n");
598 ctxt->wellFormed = 0;
599 ctxt->disableSAX = 1;
600 val = 0;
601 break;
602 }
603 ptr++;
604 cur = *ptr;
605 }
606 if (cur == ';')
607 ptr++;
608 } else if ((cur == '&') && (ptr[1] == '#')){
609 ptr += 2;
610 cur = *ptr;
611 while (cur != ';') { /* Non input consuming loops */
612 if ((cur >= '0') && (cur <= '9'))
613 val = val * 10 + (cur - '0');
614 else {
615 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
617 ctxt->sax->error(ctxt->userData,
618 "xmlParseStringCharRef: invalid decimal value\n");
619 ctxt->wellFormed = 0;
620 ctxt->disableSAX = 1;
621 val = 0;
622 break;
623 }
624 ptr++;
625 cur = *ptr;
626 }
627 if (cur == ';')
628 ptr++;
629 } else {
630 ctxt->errNo = XML_ERR_INVALID_CHARREF;
631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
632 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000633 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000634 ctxt->wellFormed = 0;
635 ctxt->disableSAX = 1;
636 return(0);
637 }
638 *str = ptr;
639
640 /*
641 * [ WFC: Legal Character ]
642 * Characters referred to using character references must match the
643 * production for Char.
644 */
645 if (IS_CHAR(val)) {
646 return(val);
647 } else {
648 ctxt->errNo = XML_ERR_INVALID_CHAR;
649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
650 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000651 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000652 ctxt->wellFormed = 0;
653 ctxt->disableSAX = 1;
654 }
655 return(0);
656}
657
658/**
659 * xmlParserHandlePEReference:
660 * @ctxt: the parser context
661 *
662 * [69] PEReference ::= '%' Name ';'
663 *
664 * [ WFC: No Recursion ]
665 * A parsed entity must not contain a recursive
666 * reference to itself, either directly or indirectly.
667 *
668 * [ WFC: Entity Declared ]
669 * In a document without any DTD, a document with only an internal DTD
670 * subset which contains no parameter entity references, or a document
671 * with "standalone='yes'", ... ... The declaration of a parameter
672 * entity must precede any reference to it...
673 *
674 * [ VC: Entity Declared ]
675 * In a document with an external subset or external parameter entities
676 * with "standalone='no'", ... ... The declaration of a parameter entity
677 * must precede any reference to it...
678 *
679 * [ WFC: In DTD ]
680 * Parameter-entity references may only appear in the DTD.
681 * NOTE: misleading but this is handled.
682 *
683 * A PEReference may have been detected in the current input stream
684 * the handling is done accordingly to
685 * http://www.w3.org/TR/REC-xml#entproc
686 * i.e.
687 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000688 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000689 */
690void
691xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
692 xmlChar *name;
693 xmlEntityPtr entity = NULL;
694 xmlParserInputPtr input;
695
696 if (ctxt->token != 0) {
697 return;
698 }
699 if (RAW != '%') return;
700 switch(ctxt->instate) {
701 case XML_PARSER_CDATA_SECTION:
702 return;
703 case XML_PARSER_COMMENT:
704 return;
705 case XML_PARSER_START_TAG:
706 return;
707 case XML_PARSER_END_TAG:
708 return;
709 case XML_PARSER_EOF:
710 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
712 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
713 ctxt->wellFormed = 0;
714 ctxt->disableSAX = 1;
715 return;
716 case XML_PARSER_PROLOG:
717 case XML_PARSER_START:
718 case XML_PARSER_MISC:
719 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
721 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
722 ctxt->wellFormed = 0;
723 ctxt->disableSAX = 1;
724 return;
725 case XML_PARSER_ENTITY_DECL:
726 case XML_PARSER_CONTENT:
727 case XML_PARSER_ATTRIBUTE_VALUE:
728 case XML_PARSER_PI:
729 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000730 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000731 /* we just ignore it there */
732 return;
733 case XML_PARSER_EPILOG:
734 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
736 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
737 ctxt->wellFormed = 0;
738 ctxt->disableSAX = 1;
739 return;
740 case XML_PARSER_ENTITY_VALUE:
741 /*
742 * NOTE: in the case of entity values, we don't do the
743 * substitution here since we need the literal
744 * entity value to be able to save the internal
745 * subset of the document.
746 * This will be handled by xmlStringDecodeEntities
747 */
748 return;
749 case XML_PARSER_DTD:
750 /*
751 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
752 * In the internal DTD subset, parameter-entity references
753 * can occur only where markup declarations can occur, not
754 * within markup declarations.
755 * In that case this is handled in xmlParseMarkupDecl
756 */
757 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
758 return;
759 break;
760 case XML_PARSER_IGNORE:
761 return;
762 }
763
764 NEXT;
765 name = xmlParseName(ctxt);
766 if (xmlParserDebugEntities)
767 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000768 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000769 if (name == NULL) {
770 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000772 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000773 ctxt->wellFormed = 0;
774 ctxt->disableSAX = 1;
775 } else {
776 if (RAW == ';') {
777 NEXT;
778 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
779 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
780 if (entity == NULL) {
781
782 /*
783 * [ WFC: Entity Declared ]
784 * In a document without any DTD, a document with only an
785 * internal DTD subset which contains no parameter entity
786 * references, or a document with "standalone='yes'", ...
787 * ... The declaration of a parameter entity must precede
788 * any reference to it...
789 */
790 if ((ctxt->standalone == 1) ||
791 ((ctxt->hasExternalSubset == 0) &&
792 (ctxt->hasPErefs == 0))) {
793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
794 ctxt->sax->error(ctxt->userData,
795 "PEReference: %%%s; not found\n", name);
796 ctxt->wellFormed = 0;
797 ctxt->disableSAX = 1;
798 } else {
799 /*
800 * [ VC: Entity Declared ]
801 * In a document with an external subset or external
802 * parameter entities with "standalone='no'", ...
803 * ... The declaration of a parameter entity must precede
804 * any reference to it...
805 */
806 if ((!ctxt->disableSAX) &&
807 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
808 ctxt->vctxt.error(ctxt->vctxt.userData,
809 "PEReference: %%%s; not found\n", name);
810 } else if ((!ctxt->disableSAX) &&
811 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
812 ctxt->sax->warning(ctxt->userData,
813 "PEReference: %%%s; not found\n", name);
814 ctxt->valid = 0;
815 }
816 } else {
817 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
818 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000819 xmlChar start[4];
820 xmlCharEncoding enc;
821
Owen Taylor3473f882001-02-23 17:55:21 +0000822 /*
823 * handle the extra spaces added before and after
824 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000825 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000826 */
827 input = xmlNewEntityInputStream(ctxt, entity);
828 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000829
830 /*
831 * Get the 4 first bytes and decode the charset
832 * if enc != XML_CHAR_ENCODING_NONE
833 * plug some encoding conversion routines.
834 */
835 GROW
836 start[0] = RAW;
837 start[1] = NXT(1);
838 start[2] = NXT(2);
839 start[3] = NXT(3);
840 enc = xmlDetectCharEncoding(start, 4);
841 if (enc != XML_CHAR_ENCODING_NONE) {
842 xmlSwitchEncoding(ctxt, enc);
843 }
844
Owen Taylor3473f882001-02-23 17:55:21 +0000845 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
846 (RAW == '<') && (NXT(1) == '?') &&
847 (NXT(2) == 'x') && (NXT(3) == 'm') &&
848 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
849 xmlParseTextDecl(ctxt);
850 }
851 if (ctxt->token == 0)
852 ctxt->token = ' ';
853 } else {
854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
855 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000856 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000857 name);
858 ctxt->wellFormed = 0;
859 ctxt->disableSAX = 1;
860 }
861 }
862 } else {
863 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
865 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000866 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000867 ctxt->wellFormed = 0;
868 ctxt->disableSAX = 1;
869 }
870 xmlFree(name);
871 }
872}
873
874/*
875 * Macro used to grow the current buffer.
876 */
877#define growBuffer(buffer) { \
878 buffer##_size *= 2; \
879 buffer = (xmlChar *) \
880 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
881 if (buffer == NULL) { \
882 perror("realloc failed"); \
883 return(NULL); \
884 } \
885}
886
887/**
888 * xmlStringDecodeEntities:
889 * @ctxt: the parser context
890 * @str: the input string
891 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
892 * @end: an end marker xmlChar, 0 if none
893 * @end2: an end marker xmlChar, 0 if none
894 * @end3: an end marker xmlChar, 0 if none
895 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000896 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000897 *
898 * [67] Reference ::= EntityRef | CharRef
899 *
900 * [69] PEReference ::= '%' Name ';'
901 *
902 * Returns A newly allocated string with the substitution done. The caller
903 * must deallocate it !
904 */
905xmlChar *
906xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
907 xmlChar end, xmlChar end2, xmlChar end3) {
908 xmlChar *buffer = NULL;
909 int buffer_size = 0;
910
911 xmlChar *current = NULL;
912 xmlEntityPtr ent;
913 int c,l;
914 int nbchars = 0;
915
916 if (str == NULL)
917 return(NULL);
918
919 if (ctxt->depth > 40) {
920 ctxt->errNo = XML_ERR_ENTITY_LOOP;
921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
922 ctxt->sax->error(ctxt->userData,
923 "Detected entity reference loop\n");
924 ctxt->wellFormed = 0;
925 ctxt->disableSAX = 1;
926 return(NULL);
927 }
928
929 /*
930 * allocate a translation buffer.
931 */
932 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
933 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
934 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000935 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000936 return(NULL);
937 }
938
939 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000940 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000941 * we are operating on already parsed values.
942 */
943 c = CUR_SCHAR(str, l);
944 while ((c != 0) && (c != end) && /* non input consuming loop */
945 (c != end2) && (c != end3)) {
946
947 if (c == 0) break;
948 if ((c == '&') && (str[1] == '#')) {
949 int val = xmlParseStringCharRef(ctxt, &str);
950 if (val != 0) {
951 COPY_BUF(0,buffer,nbchars,val);
952 }
953 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
954 if (xmlParserDebugEntities)
955 xmlGenericError(xmlGenericErrorContext,
956 "String decoding Entity Reference: %.30s\n",
957 str);
958 ent = xmlParseStringEntityRef(ctxt, &str);
959 if ((ent != NULL) &&
960 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
961 if (ent->content != NULL) {
962 COPY_BUF(0,buffer,nbchars,ent->content[0]);
963 } else {
964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
965 ctxt->sax->error(ctxt->userData,
966 "internal error entity has no content\n");
967 }
968 } else if ((ent != NULL) && (ent->content != NULL)) {
969 xmlChar *rep;
970
971 ctxt->depth++;
972 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
973 0, 0, 0);
974 ctxt->depth--;
975 if (rep != NULL) {
976 current = rep;
977 while (*current != 0) { /* non input consuming loop */
978 buffer[nbchars++] = *current++;
979 if (nbchars >
980 buffer_size - XML_PARSER_BUFFER_SIZE) {
981 growBuffer(buffer);
982 }
983 }
984 xmlFree(rep);
985 }
986 } else if (ent != NULL) {
987 int i = xmlStrlen(ent->name);
988 const xmlChar *cur = ent->name;
989
990 buffer[nbchars++] = '&';
991 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
992 growBuffer(buffer);
993 }
994 for (;i > 0;i--)
995 buffer[nbchars++] = *cur++;
996 buffer[nbchars++] = ';';
997 }
998 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
999 if (xmlParserDebugEntities)
1000 xmlGenericError(xmlGenericErrorContext,
1001 "String decoding PE Reference: %.30s\n", str);
1002 ent = xmlParseStringPEReference(ctxt, &str);
1003 if (ent != NULL) {
1004 xmlChar *rep;
1005
1006 ctxt->depth++;
1007 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1008 0, 0, 0);
1009 ctxt->depth--;
1010 if (rep != NULL) {
1011 current = rep;
1012 while (*current != 0) { /* non input consuming loop */
1013 buffer[nbchars++] = *current++;
1014 if (nbchars >
1015 buffer_size - XML_PARSER_BUFFER_SIZE) {
1016 growBuffer(buffer);
1017 }
1018 }
1019 xmlFree(rep);
1020 }
1021 }
1022 } else {
1023 COPY_BUF(l,buffer,nbchars,c);
1024 str += l;
1025 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1026 growBuffer(buffer);
1027 }
1028 }
1029 c = CUR_SCHAR(str, l);
1030 }
1031 buffer[nbchars++] = 0;
1032 return(buffer);
1033}
1034
1035
1036/************************************************************************
1037 * *
1038 * Commodity functions to handle xmlChars *
1039 * *
1040 ************************************************************************/
1041
1042/**
1043 * xmlStrndup:
1044 * @cur: the input xmlChar *
1045 * @len: the len of @cur
1046 *
1047 * a strndup for array of xmlChar's
1048 *
1049 * Returns a new xmlChar * or NULL
1050 */
1051xmlChar *
1052xmlStrndup(const xmlChar *cur, int len) {
1053 xmlChar *ret;
1054
1055 if ((cur == NULL) || (len < 0)) return(NULL);
1056 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1057 if (ret == NULL) {
1058 xmlGenericError(xmlGenericErrorContext,
1059 "malloc of %ld byte failed\n",
1060 (len + 1) * (long)sizeof(xmlChar));
1061 return(NULL);
1062 }
1063 memcpy(ret, cur, len * sizeof(xmlChar));
1064 ret[len] = 0;
1065 return(ret);
1066}
1067
1068/**
1069 * xmlStrdup:
1070 * @cur: the input xmlChar *
1071 *
1072 * a strdup for array of xmlChar's. Since they are supposed to be
1073 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1074 * a termination mark of '0'.
1075 *
1076 * Returns a new xmlChar * or NULL
1077 */
1078xmlChar *
1079xmlStrdup(const xmlChar *cur) {
1080 const xmlChar *p = cur;
1081
1082 if (cur == NULL) return(NULL);
1083 while (*p != 0) p++; /* non input consuming */
1084 return(xmlStrndup(cur, p - cur));
1085}
1086
1087/**
1088 * xmlCharStrndup:
1089 * @cur: the input char *
1090 * @len: the len of @cur
1091 *
1092 * a strndup for char's to xmlChar's
1093 *
1094 * Returns a new xmlChar * or NULL
1095 */
1096
1097xmlChar *
1098xmlCharStrndup(const char *cur, int len) {
1099 int i;
1100 xmlChar *ret;
1101
1102 if ((cur == NULL) || (len < 0)) return(NULL);
1103 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1104 if (ret == NULL) {
1105 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1106 (len + 1) * (long)sizeof(xmlChar));
1107 return(NULL);
1108 }
1109 for (i = 0;i < len;i++)
1110 ret[i] = (xmlChar) cur[i];
1111 ret[len] = 0;
1112 return(ret);
1113}
1114
1115/**
1116 * xmlCharStrdup:
1117 * @cur: the input char *
1118 * @len: the len of @cur
1119 *
1120 * a strdup for char's to xmlChar's
1121 *
1122 * Returns a new xmlChar * or NULL
1123 */
1124
1125xmlChar *
1126xmlCharStrdup(const char *cur) {
1127 const char *p = cur;
1128
1129 if (cur == NULL) return(NULL);
1130 while (*p != '\0') p++; /* non input consuming */
1131 return(xmlCharStrndup(cur, p - cur));
1132}
1133
1134/**
1135 * xmlStrcmp:
1136 * @str1: the first xmlChar *
1137 * @str2: the second xmlChar *
1138 *
1139 * a strcmp for xmlChar's
1140 *
1141 * Returns the integer result of the comparison
1142 */
1143
1144int
1145xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1146 register int tmp;
1147
1148 if (str1 == str2) return(0);
1149 if (str1 == NULL) return(-1);
1150 if (str2 == NULL) return(1);
1151 do {
1152 tmp = *str1++ - *str2;
1153 if (tmp != 0) return(tmp);
1154 } while (*str2++ != 0);
1155 return 0;
1156}
1157
1158/**
1159 * xmlStrEqual:
1160 * @str1: the first xmlChar *
1161 * @str2: the second xmlChar *
1162 *
1163 * Check if both string are equal of have same content
1164 * Should be a bit more readable and faster than xmlStrEqual()
1165 *
1166 * Returns 1 if they are equal, 0 if they are different
1167 */
1168
1169int
1170xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1171 if (str1 == str2) return(1);
1172 if (str1 == NULL) return(0);
1173 if (str2 == NULL) return(0);
1174 do {
1175 if (*str1++ != *str2) return(0);
1176 } while (*str2++);
1177 return(1);
1178}
1179
1180/**
1181 * xmlStrncmp:
1182 * @str1: the first xmlChar *
1183 * @str2: the second xmlChar *
1184 * @len: the max comparison length
1185 *
1186 * a strncmp for xmlChar's
1187 *
1188 * Returns the integer result of the comparison
1189 */
1190
1191int
1192xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1193 register int tmp;
1194
1195 if (len <= 0) return(0);
1196 if (str1 == str2) return(0);
1197 if (str1 == NULL) return(-1);
1198 if (str2 == NULL) return(1);
1199 do {
1200 tmp = *str1++ - *str2;
1201 if (tmp != 0 || --len == 0) return(tmp);
1202 } while (*str2++ != 0);
1203 return 0;
1204}
1205
Daniel Veillardb44025c2001-10-11 22:55:55 +00001206static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001207 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1208 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1209 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1210 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1211 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1212 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1213 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1214 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1215 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1216 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1217 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1218 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1219 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1220 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1221 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1222 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1223 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1224 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1225 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1226 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1227 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1228 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1229 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1230 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1231 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1232 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1233 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1234 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1235 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1236 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1237 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1238 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1239};
1240
1241/**
1242 * xmlStrcasecmp:
1243 * @str1: the first xmlChar *
1244 * @str2: the second xmlChar *
1245 *
1246 * a strcasecmp for xmlChar's
1247 *
1248 * Returns the integer result of the comparison
1249 */
1250
1251int
1252xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1253 register int tmp;
1254
1255 if (str1 == str2) return(0);
1256 if (str1 == NULL) return(-1);
1257 if (str2 == NULL) return(1);
1258 do {
1259 tmp = casemap[*str1++] - casemap[*str2];
1260 if (tmp != 0) return(tmp);
1261 } while (*str2++ != 0);
1262 return 0;
1263}
1264
1265/**
1266 * xmlStrncasecmp:
1267 * @str1: the first xmlChar *
1268 * @str2: the second xmlChar *
1269 * @len: the max comparison length
1270 *
1271 * a strncasecmp for xmlChar's
1272 *
1273 * Returns the integer result of the comparison
1274 */
1275
1276int
1277xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1278 register int tmp;
1279
1280 if (len <= 0) return(0);
1281 if (str1 == str2) return(0);
1282 if (str1 == NULL) return(-1);
1283 if (str2 == NULL) return(1);
1284 do {
1285 tmp = casemap[*str1++] - casemap[*str2];
1286 if (tmp != 0 || --len == 0) return(tmp);
1287 } while (*str2++ != 0);
1288 return 0;
1289}
1290
1291/**
1292 * xmlStrchr:
1293 * @str: the xmlChar * array
1294 * @val: the xmlChar to search
1295 *
1296 * a strchr for xmlChar's
1297 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001298 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001299 */
1300
1301const xmlChar *
1302xmlStrchr(const xmlChar *str, xmlChar val) {
1303 if (str == NULL) return(NULL);
1304 while (*str != 0) { /* non input consuming */
1305 if (*str == val) return((xmlChar *) str);
1306 str++;
1307 }
1308 return(NULL);
1309}
1310
1311/**
1312 * xmlStrstr:
1313 * @str: the xmlChar * array (haystack)
1314 * @val: the xmlChar to search (needle)
1315 *
1316 * a strstr for xmlChar's
1317 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001318 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001319 */
1320
1321const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001322xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001323 int n;
1324
1325 if (str == NULL) return(NULL);
1326 if (val == NULL) return(NULL);
1327 n = xmlStrlen(val);
1328
1329 if (n == 0) return(str);
1330 while (*str != 0) { /* non input consuming */
1331 if (*str == *val) {
1332 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1333 }
1334 str++;
1335 }
1336 return(NULL);
1337}
1338
1339/**
1340 * xmlStrcasestr:
1341 * @str: the xmlChar * array (haystack)
1342 * @val: the xmlChar to search (needle)
1343 *
1344 * a case-ignoring strstr for xmlChar's
1345 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001346 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001347 */
1348
1349const xmlChar *
1350xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1351 int n;
1352
1353 if (str == NULL) return(NULL);
1354 if (val == NULL) return(NULL);
1355 n = xmlStrlen(val);
1356
1357 if (n == 0) return(str);
1358 while (*str != 0) { /* non input consuming */
1359 if (casemap[*str] == casemap[*val])
1360 if (!xmlStrncasecmp(str, val, n)) return(str);
1361 str++;
1362 }
1363 return(NULL);
1364}
1365
1366/**
1367 * xmlStrsub:
1368 * @str: the xmlChar * array (haystack)
1369 * @start: the index of the first char (zero based)
1370 * @len: the length of the substring
1371 *
1372 * Extract a substring of a given string
1373 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001374 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001375 */
1376
1377xmlChar *
1378xmlStrsub(const xmlChar *str, int start, int len) {
1379 int i;
1380
1381 if (str == NULL) return(NULL);
1382 if (start < 0) return(NULL);
1383 if (len < 0) return(NULL);
1384
1385 for (i = 0;i < start;i++) {
1386 if (*str == 0) return(NULL);
1387 str++;
1388 }
1389 if (*str == 0) return(NULL);
1390 return(xmlStrndup(str, len));
1391}
1392
1393/**
1394 * xmlStrlen:
1395 * @str: the xmlChar * array
1396 *
1397 * length of a xmlChar's string
1398 *
1399 * Returns the number of xmlChar contained in the ARRAY.
1400 */
1401
1402int
1403xmlStrlen(const xmlChar *str) {
1404 int len = 0;
1405
1406 if (str == NULL) return(0);
1407 while (*str != 0) { /* non input consuming */
1408 str++;
1409 len++;
1410 }
1411 return(len);
1412}
1413
1414/**
1415 * xmlStrncat:
1416 * @cur: the original xmlChar * array
1417 * @add: the xmlChar * array added
1418 * @len: the length of @add
1419 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001420 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001421 * first bytes of @add.
1422 *
1423 * Returns a new xmlChar *, the original @cur is reallocated if needed
1424 * and should not be freed
1425 */
1426
1427xmlChar *
1428xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1429 int size;
1430 xmlChar *ret;
1431
1432 if ((add == NULL) || (len == 0))
1433 return(cur);
1434 if (cur == NULL)
1435 return(xmlStrndup(add, len));
1436
1437 size = xmlStrlen(cur);
1438 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1439 if (ret == NULL) {
1440 xmlGenericError(xmlGenericErrorContext,
1441 "xmlStrncat: realloc of %ld byte failed\n",
1442 (size + len + 1) * (long)sizeof(xmlChar));
1443 return(cur);
1444 }
1445 memcpy(&ret[size], add, len * sizeof(xmlChar));
1446 ret[size + len] = 0;
1447 return(ret);
1448}
1449
1450/**
1451 * xmlStrcat:
1452 * @cur: the original xmlChar * array
1453 * @add: the xmlChar * array added
1454 *
1455 * a strcat for array of xmlChar's. Since they are supposed to be
1456 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1457 * a termination mark of '0'.
1458 *
1459 * Returns a new xmlChar * containing the concatenated string.
1460 */
1461xmlChar *
1462xmlStrcat(xmlChar *cur, const xmlChar *add) {
1463 const xmlChar *p = add;
1464
1465 if (add == NULL) return(cur);
1466 if (cur == NULL)
1467 return(xmlStrdup(add));
1468
1469 while (*p != 0) p++; /* non input consuming */
1470 return(xmlStrncat(cur, add, p - add));
1471}
1472
1473/************************************************************************
1474 * *
1475 * Commodity functions, cleanup needed ? *
1476 * *
1477 ************************************************************************/
1478
1479/**
1480 * areBlanks:
1481 * @ctxt: an XML parser context
1482 * @str: a xmlChar *
1483 * @len: the size of @str
1484 *
1485 * Is this a sequence of blank chars that one can ignore ?
1486 *
1487 * Returns 1 if ignorable 0 otherwise.
1488 */
1489
1490static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1491 int i, ret;
1492 xmlNodePtr lastChild;
1493
Daniel Veillard05c13a22001-09-09 08:38:09 +00001494 /*
1495 * Don't spend time trying to differentiate them, the same callback is
1496 * used !
1497 */
1498 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001499 return(0);
1500
Owen Taylor3473f882001-02-23 17:55:21 +00001501 /*
1502 * Check for xml:space value.
1503 */
1504 if (*(ctxt->space) == 1)
1505 return(0);
1506
1507 /*
1508 * Check that the string is made of blanks
1509 */
1510 for (i = 0;i < len;i++)
1511 if (!(IS_BLANK(str[i]))) return(0);
1512
1513 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001514 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001515 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001516 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (ctxt->myDoc != NULL) {
1518 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1519 if (ret == 0) return(1);
1520 if (ret == 1) return(0);
1521 }
1522
1523 /*
1524 * Otherwise, heuristic :-\
1525 */
Owen Taylor3473f882001-02-23 17:55:21 +00001526 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001527 if ((ctxt->node->children == NULL) &&
1528 (RAW == '<') && (NXT(1) == '/')) return(0);
1529
1530 lastChild = xmlGetLastChild(ctxt->node);
1531 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001532 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1533 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001534 } else if (xmlNodeIsText(lastChild))
1535 return(0);
1536 else if ((ctxt->node->children != NULL) &&
1537 (xmlNodeIsText(ctxt->node->children)))
1538 return(0);
1539 return(1);
1540}
1541
Owen Taylor3473f882001-02-23 17:55:21 +00001542/************************************************************************
1543 * *
1544 * Extra stuff for namespace support *
1545 * Relates to http://www.w3.org/TR/WD-xml-names *
1546 * *
1547 ************************************************************************/
1548
1549/**
1550 * xmlSplitQName:
1551 * @ctxt: an XML parser context
1552 * @name: an XML parser context
1553 * @prefix: a xmlChar **
1554 *
1555 * parse an UTF8 encoded XML qualified name string
1556 *
1557 * [NS 5] QName ::= (Prefix ':')? LocalPart
1558 *
1559 * [NS 6] Prefix ::= NCName
1560 *
1561 * [NS 7] LocalPart ::= NCName
1562 *
1563 * Returns the local part, and prefix is updated
1564 * to get the Prefix if any.
1565 */
1566
1567xmlChar *
1568xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1569 xmlChar buf[XML_MAX_NAMELEN + 5];
1570 xmlChar *buffer = NULL;
1571 int len = 0;
1572 int max = XML_MAX_NAMELEN;
1573 xmlChar *ret = NULL;
1574 const xmlChar *cur = name;
1575 int c;
1576
1577 *prefix = NULL;
1578
1579#ifndef XML_XML_NAMESPACE
1580 /* xml: prefix is not really a namespace */
1581 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1582 (cur[2] == 'l') && (cur[3] == ':'))
1583 return(xmlStrdup(name));
1584#endif
1585
1586 /* nasty but valid */
1587 if (cur[0] == ':')
1588 return(xmlStrdup(name));
1589
1590 c = *cur++;
1591 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1592 buf[len++] = c;
1593 c = *cur++;
1594 }
1595 if (len >= max) {
1596 /*
1597 * Okay someone managed to make a huge name, so he's ready to pay
1598 * for the processing speed.
1599 */
1600 max = len * 2;
1601
1602 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1603 if (buffer == NULL) {
1604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1605 ctxt->sax->error(ctxt->userData,
1606 "xmlSplitQName: out of memory\n");
1607 return(NULL);
1608 }
1609 memcpy(buffer, buf, len);
1610 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1611 if (len + 10 > max) {
1612 max *= 2;
1613 buffer = (xmlChar *) xmlRealloc(buffer,
1614 max * sizeof(xmlChar));
1615 if (buffer == NULL) {
1616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1617 ctxt->sax->error(ctxt->userData,
1618 "xmlSplitQName: out of memory\n");
1619 return(NULL);
1620 }
1621 }
1622 buffer[len++] = c;
1623 c = *cur++;
1624 }
1625 buffer[len] = 0;
1626 }
1627
1628 if (buffer == NULL)
1629 ret = xmlStrndup(buf, len);
1630 else {
1631 ret = buffer;
1632 buffer = NULL;
1633 max = XML_MAX_NAMELEN;
1634 }
1635
1636
1637 if (c == ':') {
1638 c = *cur++;
1639 if (c == 0) return(ret);
1640 *prefix = ret;
1641 len = 0;
1642
1643 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1644 buf[len++] = c;
1645 c = *cur++;
1646 }
1647 if (len >= max) {
1648 /*
1649 * Okay someone managed to make a huge name, so he's ready to pay
1650 * for the processing speed.
1651 */
1652 max = len * 2;
1653
1654 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1655 if (buffer == NULL) {
1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1657 ctxt->sax->error(ctxt->userData,
1658 "xmlSplitQName: out of memory\n");
1659 return(NULL);
1660 }
1661 memcpy(buffer, buf, len);
1662 while (c != 0) { /* tested bigname2.xml */
1663 if (len + 10 > max) {
1664 max *= 2;
1665 buffer = (xmlChar *) xmlRealloc(buffer,
1666 max * sizeof(xmlChar));
1667 if (buffer == NULL) {
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "xmlSplitQName: out of memory\n");
1671 return(NULL);
1672 }
1673 }
1674 buffer[len++] = c;
1675 c = *cur++;
1676 }
1677 buffer[len] = 0;
1678 }
1679
1680 if (buffer == NULL)
1681 ret = xmlStrndup(buf, len);
1682 else {
1683 ret = buffer;
1684 }
1685 }
1686
1687 return(ret);
1688}
1689
1690/************************************************************************
1691 * *
1692 * The parser itself *
1693 * Relates to http://www.w3.org/TR/REC-xml *
1694 * *
1695 ************************************************************************/
1696
Daniel Veillard76d66f42001-05-16 21:05:17 +00001697static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001698/**
1699 * xmlParseName:
1700 * @ctxt: an XML parser context
1701 *
1702 * parse an XML name.
1703 *
1704 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1705 * CombiningChar | Extender
1706 *
1707 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1708 *
1709 * [6] Names ::= Name (S Name)*
1710 *
1711 * Returns the Name parsed or NULL
1712 */
1713
1714xmlChar *
1715xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001716 const xmlChar *in;
1717 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 int count = 0;
1719
1720 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001721
1722 /*
1723 * Accelerator for simple ASCII names
1724 */
1725 in = ctxt->input->cur;
1726 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1727 ((*in >= 0x41) && (*in <= 0x5A)) ||
1728 (*in == '_') || (*in == ':')) {
1729 in++;
1730 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1731 ((*in >= 0x41) && (*in <= 0x5A)) ||
1732 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001733 (*in == '_') || (*in == '-') ||
1734 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001735 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001736 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001737 count = in - ctxt->input->cur;
1738 ret = xmlStrndup(ctxt->input->cur, count);
1739 ctxt->input->cur = in;
1740 return(ret);
1741 }
1742 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001743 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001744}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001745
Daniel Veillard76d66f42001-05-16 21:05:17 +00001746static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001747xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1748 xmlChar buf[XML_MAX_NAMELEN + 5];
1749 int len = 0, l;
1750 int c;
1751 int count = 0;
1752
1753 /*
1754 * Handler for more complex cases
1755 */
1756 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001757 c = CUR_CHAR(l);
1758 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1759 (!IS_LETTER(c) && (c != '_') &&
1760 (c != ':'))) {
1761 return(NULL);
1762 }
1763
1764 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1765 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1766 (c == '.') || (c == '-') ||
1767 (c == '_') || (c == ':') ||
1768 (IS_COMBINING(c)) ||
1769 (IS_EXTENDER(c)))) {
1770 if (count++ > 100) {
1771 count = 0;
1772 GROW;
1773 }
1774 COPY_BUF(l,buf,len,c);
1775 NEXTL(l);
1776 c = CUR_CHAR(l);
1777 if (len >= XML_MAX_NAMELEN) {
1778 /*
1779 * Okay someone managed to make a huge name, so he's ready to pay
1780 * for the processing speed.
1781 */
1782 xmlChar *buffer;
1783 int max = len * 2;
1784
1785 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1786 if (buffer == NULL) {
1787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1788 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001789 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001790 return(NULL);
1791 }
1792 memcpy(buffer, buf, len);
1793 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1794 (c == '.') || (c == '-') ||
1795 (c == '_') || (c == ':') ||
1796 (IS_COMBINING(c)) ||
1797 (IS_EXTENDER(c))) {
1798 if (count++ > 100) {
1799 count = 0;
1800 GROW;
1801 }
1802 if (len + 10 > max) {
1803 max *= 2;
1804 buffer = (xmlChar *) xmlRealloc(buffer,
1805 max * sizeof(xmlChar));
1806 if (buffer == NULL) {
1807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1808 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001809 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001810 return(NULL);
1811 }
1812 }
1813 COPY_BUF(l,buffer,len,c);
1814 NEXTL(l);
1815 c = CUR_CHAR(l);
1816 }
1817 buffer[len] = 0;
1818 return(buffer);
1819 }
1820 }
1821 return(xmlStrndup(buf, len));
1822}
1823
1824/**
1825 * xmlParseStringName:
1826 * @ctxt: an XML parser context
1827 * @str: a pointer to the string pointer (IN/OUT)
1828 *
1829 * parse an XML name.
1830 *
1831 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1832 * CombiningChar | Extender
1833 *
1834 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1835 *
1836 * [6] Names ::= Name (S Name)*
1837 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001838 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001839 * is updated to the current location in the string.
1840 */
1841
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001842static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001843xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1844 xmlChar buf[XML_MAX_NAMELEN + 5];
1845 const xmlChar *cur = *str;
1846 int len = 0, l;
1847 int c;
1848
1849 c = CUR_SCHAR(cur, l);
1850 if (!IS_LETTER(c) && (c != '_') &&
1851 (c != ':')) {
1852 return(NULL);
1853 }
1854
1855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1856 (c == '.') || (c == '-') ||
1857 (c == '_') || (c == ':') ||
1858 (IS_COMBINING(c)) ||
1859 (IS_EXTENDER(c))) {
1860 COPY_BUF(l,buf,len,c);
1861 cur += l;
1862 c = CUR_SCHAR(cur, l);
1863 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1864 /*
1865 * Okay someone managed to make a huge name, so he's ready to pay
1866 * for the processing speed.
1867 */
1868 xmlChar *buffer;
1869 int max = len * 2;
1870
1871 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1872 if (buffer == NULL) {
1873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1874 ctxt->sax->error(ctxt->userData,
1875 "xmlParseStringName: out of memory\n");
1876 return(NULL);
1877 }
1878 memcpy(buffer, buf, len);
1879 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1880 (c == '.') || (c == '-') ||
1881 (c == '_') || (c == ':') ||
1882 (IS_COMBINING(c)) ||
1883 (IS_EXTENDER(c))) {
1884 if (len + 10 > max) {
1885 max *= 2;
1886 buffer = (xmlChar *) xmlRealloc(buffer,
1887 max * sizeof(xmlChar));
1888 if (buffer == NULL) {
1889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1890 ctxt->sax->error(ctxt->userData,
1891 "xmlParseStringName: out of memory\n");
1892 return(NULL);
1893 }
1894 }
1895 COPY_BUF(l,buffer,len,c);
1896 cur += l;
1897 c = CUR_SCHAR(cur, l);
1898 }
1899 buffer[len] = 0;
1900 *str = cur;
1901 return(buffer);
1902 }
1903 }
1904 *str = cur;
1905 return(xmlStrndup(buf, len));
1906}
1907
1908/**
1909 * xmlParseNmtoken:
1910 * @ctxt: an XML parser context
1911 *
1912 * parse an XML Nmtoken.
1913 *
1914 * [7] Nmtoken ::= (NameChar)+
1915 *
1916 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1917 *
1918 * Returns the Nmtoken parsed or NULL
1919 */
1920
1921xmlChar *
1922xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1923 xmlChar buf[XML_MAX_NAMELEN + 5];
1924 int len = 0, l;
1925 int c;
1926 int count = 0;
1927
1928 GROW;
1929 c = CUR_CHAR(l);
1930
1931 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1932 (c == '.') || (c == '-') ||
1933 (c == '_') || (c == ':') ||
1934 (IS_COMBINING(c)) ||
1935 (IS_EXTENDER(c))) {
1936 if (count++ > 100) {
1937 count = 0;
1938 GROW;
1939 }
1940 COPY_BUF(l,buf,len,c);
1941 NEXTL(l);
1942 c = CUR_CHAR(l);
1943 if (len >= XML_MAX_NAMELEN) {
1944 /*
1945 * Okay someone managed to make a huge token, so he's ready to pay
1946 * for the processing speed.
1947 */
1948 xmlChar *buffer;
1949 int max = len * 2;
1950
1951 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1952 if (buffer == NULL) {
1953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1954 ctxt->sax->error(ctxt->userData,
1955 "xmlParseNmtoken: out of memory\n");
1956 return(NULL);
1957 }
1958 memcpy(buffer, buf, len);
1959 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1960 (c == '.') || (c == '-') ||
1961 (c == '_') || (c == ':') ||
1962 (IS_COMBINING(c)) ||
1963 (IS_EXTENDER(c))) {
1964 if (count++ > 100) {
1965 count = 0;
1966 GROW;
1967 }
1968 if (len + 10 > max) {
1969 max *= 2;
1970 buffer = (xmlChar *) xmlRealloc(buffer,
1971 max * sizeof(xmlChar));
1972 if (buffer == NULL) {
1973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1974 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001975 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001976 return(NULL);
1977 }
1978 }
1979 COPY_BUF(l,buffer,len,c);
1980 NEXTL(l);
1981 c = CUR_CHAR(l);
1982 }
1983 buffer[len] = 0;
1984 return(buffer);
1985 }
1986 }
1987 if (len == 0)
1988 return(NULL);
1989 return(xmlStrndup(buf, len));
1990}
1991
1992/**
1993 * xmlParseEntityValue:
1994 * @ctxt: an XML parser context
1995 * @orig: if non-NULL store a copy of the original entity value
1996 *
1997 * parse a value for ENTITY declarations
1998 *
1999 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2000 * "'" ([^%&'] | PEReference | Reference)* "'"
2001 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002002 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002003 */
2004
2005xmlChar *
2006xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2007 xmlChar *buf = NULL;
2008 int len = 0;
2009 int size = XML_PARSER_BUFFER_SIZE;
2010 int c, l;
2011 xmlChar stop;
2012 xmlChar *ret = NULL;
2013 const xmlChar *cur = NULL;
2014 xmlParserInputPtr input;
2015
2016 if (RAW == '"') stop = '"';
2017 else if (RAW == '\'') stop = '\'';
2018 else {
2019 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2021 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2022 ctxt->wellFormed = 0;
2023 ctxt->disableSAX = 1;
2024 return(NULL);
2025 }
2026 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2027 if (buf == NULL) {
2028 xmlGenericError(xmlGenericErrorContext,
2029 "malloc of %d byte failed\n", size);
2030 return(NULL);
2031 }
2032
2033 /*
2034 * The content of the entity definition is copied in a buffer.
2035 */
2036
2037 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2038 input = ctxt->input;
2039 GROW;
2040 NEXT;
2041 c = CUR_CHAR(l);
2042 /*
2043 * NOTE: 4.4.5 Included in Literal
2044 * When a parameter entity reference appears in a literal entity
2045 * value, ... a single or double quote character in the replacement
2046 * text is always treated as a normal data character and will not
2047 * terminate the literal.
2048 * In practice it means we stop the loop only when back at parsing
2049 * the initial entity and the quote is found
2050 */
2051 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2052 (ctxt->input != input))) {
2053 if (len + 5 >= size) {
2054 size *= 2;
2055 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2056 if (buf == NULL) {
2057 xmlGenericError(xmlGenericErrorContext,
2058 "realloc of %d byte failed\n", size);
2059 return(NULL);
2060 }
2061 }
2062 COPY_BUF(l,buf,len,c);
2063 NEXTL(l);
2064 /*
2065 * Pop-up of finished entities.
2066 */
2067 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2068 xmlPopInput(ctxt);
2069
2070 GROW;
2071 c = CUR_CHAR(l);
2072 if (c == 0) {
2073 GROW;
2074 c = CUR_CHAR(l);
2075 }
2076 }
2077 buf[len] = 0;
2078
2079 /*
2080 * Raise problem w.r.t. '&' and '%' being used in non-entities
2081 * reference constructs. Note Charref will be handled in
2082 * xmlStringDecodeEntities()
2083 */
2084 cur = buf;
2085 while (*cur != 0) { /* non input consuming */
2086 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2087 xmlChar *name;
2088 xmlChar tmp = *cur;
2089
2090 cur++;
2091 name = xmlParseStringName(ctxt, &cur);
2092 if ((name == NULL) || (*cur != ';')) {
2093 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2095 ctxt->sax->error(ctxt->userData,
2096 "EntityValue: '%c' forbidden except for entities references\n",
2097 tmp);
2098 ctxt->wellFormed = 0;
2099 ctxt->disableSAX = 1;
2100 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002101 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2102 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002103 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2105 ctxt->sax->error(ctxt->userData,
2106 "EntityValue: PEReferences forbidden in internal subset\n",
2107 tmp);
2108 ctxt->wellFormed = 0;
2109 ctxt->disableSAX = 1;
2110 }
2111 if (name != NULL)
2112 xmlFree(name);
2113 }
2114 cur++;
2115 }
2116
2117 /*
2118 * Then PEReference entities are substituted.
2119 */
2120 if (c != stop) {
2121 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2123 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2124 ctxt->wellFormed = 0;
2125 ctxt->disableSAX = 1;
2126 xmlFree(buf);
2127 } else {
2128 NEXT;
2129 /*
2130 * NOTE: 4.4.7 Bypassed
2131 * When a general entity reference appears in the EntityValue in
2132 * an entity declaration, it is bypassed and left as is.
2133 * so XML_SUBSTITUTE_REF is not set here.
2134 */
2135 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2136 0, 0, 0);
2137 if (orig != NULL)
2138 *orig = buf;
2139 else
2140 xmlFree(buf);
2141 }
2142
2143 return(ret);
2144}
2145
2146/**
2147 * xmlParseAttValue:
2148 * @ctxt: an XML parser context
2149 *
2150 * parse a value for an attribute
2151 * Note: the parser won't do substitution of entities here, this
2152 * will be handled later in xmlStringGetNodeList
2153 *
2154 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2155 * "'" ([^<&'] | Reference)* "'"
2156 *
2157 * 3.3.3 Attribute-Value Normalization:
2158 * Before the value of an attribute is passed to the application or
2159 * checked for validity, the XML processor must normalize it as follows:
2160 * - a character reference is processed by appending the referenced
2161 * character to the attribute value
2162 * - an entity reference is processed by recursively processing the
2163 * replacement text of the entity
2164 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2165 * appending #x20 to the normalized value, except that only a single
2166 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2167 * parsed entity or the literal entity value of an internal parsed entity
2168 * - other characters are processed by appending them to the normalized value
2169 * If the declared value is not CDATA, then the XML processor must further
2170 * process the normalized attribute value by discarding any leading and
2171 * trailing space (#x20) characters, and by replacing sequences of space
2172 * (#x20) characters by a single space (#x20) character.
2173 * All attributes for which no declaration has been read should be treated
2174 * by a non-validating parser as if declared CDATA.
2175 *
2176 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2177 */
2178
2179xmlChar *
2180xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2181 xmlChar limit = 0;
2182 xmlChar *buf = NULL;
2183 int len = 0;
2184 int buf_size = 0;
2185 int c, l;
2186 xmlChar *current = NULL;
2187 xmlEntityPtr ent;
2188
2189
2190 SHRINK;
2191 if (NXT(0) == '"') {
2192 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2193 limit = '"';
2194 NEXT;
2195 } else if (NXT(0) == '\'') {
2196 limit = '\'';
2197 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2198 NEXT;
2199 } else {
2200 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2202 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2203 ctxt->wellFormed = 0;
2204 ctxt->disableSAX = 1;
2205 return(NULL);
2206 }
2207
2208 /*
2209 * allocate a translation buffer.
2210 */
2211 buf_size = XML_PARSER_BUFFER_SIZE;
2212 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2213 if (buf == NULL) {
2214 perror("xmlParseAttValue: malloc failed");
2215 return(NULL);
2216 }
2217
2218 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002219 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002220 */
2221 c = CUR_CHAR(l);
2222 while (((NXT(0) != limit) && /* checked */
2223 (c != '<')) || (ctxt->token != 0)) {
2224 if (c == 0) break;
2225 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002226 if (ctxt->replaceEntities) {
2227 if (len > buf_size - 10) {
2228 growBuffer(buf);
2229 }
2230 buf[len++] = '&';
2231 } else {
2232 /*
2233 * The reparsing will be done in xmlStringGetNodeList()
2234 * called by the attribute() function in SAX.c
2235 */
2236 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002237
Daniel Veillard319a7422001-09-11 09:27:09 +00002238 if (len > buf_size - 10) {
2239 growBuffer(buf);
2240 }
2241 current = &buffer[0];
2242 while (*current != 0) { /* non input consuming */
2243 buf[len++] = *current++;
2244 }
2245 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002246 }
Owen Taylor3473f882001-02-23 17:55:21 +00002247 } else if (c == '&') {
2248 if (NXT(1) == '#') {
2249 int val = xmlParseCharRef(ctxt);
2250 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002251 if (ctxt->replaceEntities) {
2252 if (len > buf_size - 10) {
2253 growBuffer(buf);
2254 }
2255 buf[len++] = '&';
2256 } else {
2257 /*
2258 * The reparsing will be done in xmlStringGetNodeList()
2259 * called by the attribute() function in SAX.c
2260 */
2261 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002262
Daniel Veillard319a7422001-09-11 09:27:09 +00002263 if (len > buf_size - 10) {
2264 growBuffer(buf);
2265 }
2266 current = &buffer[0];
2267 while (*current != 0) { /* non input consuming */
2268 buf[len++] = *current++;
2269 }
Owen Taylor3473f882001-02-23 17:55:21 +00002270 }
2271 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002272 if (len > buf_size - 10) {
2273 growBuffer(buf);
2274 }
Owen Taylor3473f882001-02-23 17:55:21 +00002275 len += xmlCopyChar(0, &buf[len], val);
2276 }
2277 } else {
2278 ent = xmlParseEntityRef(ctxt);
2279 if ((ent != NULL) &&
2280 (ctxt->replaceEntities != 0)) {
2281 xmlChar *rep;
2282
2283 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2284 rep = xmlStringDecodeEntities(ctxt, ent->content,
2285 XML_SUBSTITUTE_REF, 0, 0, 0);
2286 if (rep != NULL) {
2287 current = rep;
2288 while (*current != 0) { /* non input consuming */
2289 buf[len++] = *current++;
2290 if (len > buf_size - 10) {
2291 growBuffer(buf);
2292 }
2293 }
2294 xmlFree(rep);
2295 }
2296 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002297 if (len > buf_size - 10) {
2298 growBuffer(buf);
2299 }
Owen Taylor3473f882001-02-23 17:55:21 +00002300 if (ent->content != NULL)
2301 buf[len++] = ent->content[0];
2302 }
2303 } else if (ent != NULL) {
2304 int i = xmlStrlen(ent->name);
2305 const xmlChar *cur = ent->name;
2306
2307 /*
2308 * This may look absurd but is needed to detect
2309 * entities problems
2310 */
2311 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2312 (ent->content != NULL)) {
2313 xmlChar *rep;
2314 rep = xmlStringDecodeEntities(ctxt, ent->content,
2315 XML_SUBSTITUTE_REF, 0, 0, 0);
2316 if (rep != NULL)
2317 xmlFree(rep);
2318 }
2319
2320 /*
2321 * Just output the reference
2322 */
2323 buf[len++] = '&';
2324 if (len > buf_size - i - 10) {
2325 growBuffer(buf);
2326 }
2327 for (;i > 0;i--)
2328 buf[len++] = *cur++;
2329 buf[len++] = ';';
2330 }
2331 }
2332 } else {
2333 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2334 COPY_BUF(l,buf,len,0x20);
2335 if (len > buf_size - 10) {
2336 growBuffer(buf);
2337 }
2338 } else {
2339 COPY_BUF(l,buf,len,c);
2340 if (len > buf_size - 10) {
2341 growBuffer(buf);
2342 }
2343 }
2344 NEXTL(l);
2345 }
2346 GROW;
2347 c = CUR_CHAR(l);
2348 }
2349 buf[len++] = 0;
2350 if (RAW == '<') {
2351 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2353 ctxt->sax->error(ctxt->userData,
2354 "Unescaped '<' not allowed in attributes values\n");
2355 ctxt->wellFormed = 0;
2356 ctxt->disableSAX = 1;
2357 } else if (RAW != limit) {
2358 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2360 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2361 ctxt->wellFormed = 0;
2362 ctxt->disableSAX = 1;
2363 } else
2364 NEXT;
2365 return(buf);
2366}
2367
2368/**
2369 * xmlParseSystemLiteral:
2370 * @ctxt: an XML parser context
2371 *
2372 * parse an XML Literal
2373 *
2374 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2375 *
2376 * Returns the SystemLiteral parsed or NULL
2377 */
2378
2379xmlChar *
2380xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2381 xmlChar *buf = NULL;
2382 int len = 0;
2383 int size = XML_PARSER_BUFFER_SIZE;
2384 int cur, l;
2385 xmlChar stop;
2386 int state = ctxt->instate;
2387 int count = 0;
2388
2389 SHRINK;
2390 if (RAW == '"') {
2391 NEXT;
2392 stop = '"';
2393 } else if (RAW == '\'') {
2394 NEXT;
2395 stop = '\'';
2396 } else {
2397 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2399 ctxt->sax->error(ctxt->userData,
2400 "SystemLiteral \" or ' expected\n");
2401 ctxt->wellFormed = 0;
2402 ctxt->disableSAX = 1;
2403 return(NULL);
2404 }
2405
2406 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2407 if (buf == NULL) {
2408 xmlGenericError(xmlGenericErrorContext,
2409 "malloc of %d byte failed\n", size);
2410 return(NULL);
2411 }
2412 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2413 cur = CUR_CHAR(l);
2414 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2415 if (len + 5 >= size) {
2416 size *= 2;
2417 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2418 if (buf == NULL) {
2419 xmlGenericError(xmlGenericErrorContext,
2420 "realloc of %d byte failed\n", size);
2421 ctxt->instate = (xmlParserInputState) state;
2422 return(NULL);
2423 }
2424 }
2425 count++;
2426 if (count > 50) {
2427 GROW;
2428 count = 0;
2429 }
2430 COPY_BUF(l,buf,len,cur);
2431 NEXTL(l);
2432 cur = CUR_CHAR(l);
2433 if (cur == 0) {
2434 GROW;
2435 SHRINK;
2436 cur = CUR_CHAR(l);
2437 }
2438 }
2439 buf[len] = 0;
2440 ctxt->instate = (xmlParserInputState) state;
2441 if (!IS_CHAR(cur)) {
2442 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2444 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2445 ctxt->wellFormed = 0;
2446 ctxt->disableSAX = 1;
2447 } else {
2448 NEXT;
2449 }
2450 return(buf);
2451}
2452
2453/**
2454 * xmlParsePubidLiteral:
2455 * @ctxt: an XML parser context
2456 *
2457 * parse an XML public literal
2458 *
2459 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2460 *
2461 * Returns the PubidLiteral parsed or NULL.
2462 */
2463
2464xmlChar *
2465xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2466 xmlChar *buf = NULL;
2467 int len = 0;
2468 int size = XML_PARSER_BUFFER_SIZE;
2469 xmlChar cur;
2470 xmlChar stop;
2471 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002472 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002473
2474 SHRINK;
2475 if (RAW == '"') {
2476 NEXT;
2477 stop = '"';
2478 } else if (RAW == '\'') {
2479 NEXT;
2480 stop = '\'';
2481 } else {
2482 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2484 ctxt->sax->error(ctxt->userData,
2485 "SystemLiteral \" or ' expected\n");
2486 ctxt->wellFormed = 0;
2487 ctxt->disableSAX = 1;
2488 return(NULL);
2489 }
2490 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2491 if (buf == NULL) {
2492 xmlGenericError(xmlGenericErrorContext,
2493 "malloc of %d byte failed\n", size);
2494 return(NULL);
2495 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002496 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002497 cur = CUR;
2498 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2499 if (len + 1 >= size) {
2500 size *= 2;
2501 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2502 if (buf == NULL) {
2503 xmlGenericError(xmlGenericErrorContext,
2504 "realloc of %d byte failed\n", size);
2505 return(NULL);
2506 }
2507 }
2508 buf[len++] = cur;
2509 count++;
2510 if (count > 50) {
2511 GROW;
2512 count = 0;
2513 }
2514 NEXT;
2515 cur = CUR;
2516 if (cur == 0) {
2517 GROW;
2518 SHRINK;
2519 cur = CUR;
2520 }
2521 }
2522 buf[len] = 0;
2523 if (cur != stop) {
2524 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2526 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2527 ctxt->wellFormed = 0;
2528 ctxt->disableSAX = 1;
2529 } else {
2530 NEXT;
2531 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002532 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002533 return(buf);
2534}
2535
Daniel Veillard48b2f892001-02-25 16:11:03 +00002536void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002537/**
2538 * xmlParseCharData:
2539 * @ctxt: an XML parser context
2540 * @cdata: int indicating whether we are within a CDATA section
2541 *
2542 * parse a CharData section.
2543 * if we are within a CDATA section ']]>' marks an end of section.
2544 *
2545 * The right angle bracket (>) may be represented using the string "&gt;",
2546 * and must, for compatibility, be escaped using "&gt;" or a character
2547 * reference when it appears in the string "]]>" in content, when that
2548 * string is not marking the end of a CDATA section.
2549 *
2550 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2551 */
2552
2553void
2554xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002555 const xmlChar *in;
2556 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002557 int line = ctxt->input->line;
2558 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002559
2560 SHRINK;
2561 GROW;
2562 /*
2563 * Accelerated common case where input don't need to be
2564 * modified before passing it to the handler.
2565 */
2566 if ((ctxt->token == 0) && (!cdata)) {
2567 in = ctxt->input->cur;
2568 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002569get_more:
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002570 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
Daniel Veillard48b2f892001-02-25 16:11:03 +00002571 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2572 in++;
2573 if (*in == 0xA) {
2574 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002575 in++;
2576 while (*in == 0xA) {
2577 ctxt->input->line++;
2578 in++;
2579 }
2580 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002581 }
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002582 if (*in == ']') {
2583 if ((in[1] == ']') && (in[2] == '>')) {
2584 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2586 ctxt->sax->error(ctxt->userData,
2587 "Sequence ']]>' not allowed in content\n");
2588 ctxt->input->cur = in;
2589 ctxt->wellFormed = 0;
2590 ctxt->disableSAX = 1;
2591 return;
2592 }
2593 in++;
2594 goto get_more;
2595 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002596 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002597 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002598 if (IS_BLANK(*ctxt->input->cur)) {
2599 const xmlChar *tmp = ctxt->input->cur;
2600 ctxt->input->cur = in;
2601 if (areBlanks(ctxt, tmp, nbchar)) {
2602 if (ctxt->sax->ignorableWhitespace != NULL)
2603 ctxt->sax->ignorableWhitespace(ctxt->userData,
2604 tmp, nbchar);
2605 } else {
2606 if (ctxt->sax->characters != NULL)
2607 ctxt->sax->characters(ctxt->userData,
2608 tmp, nbchar);
2609 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002610 line = ctxt->input->line;
2611 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002612 } else {
2613 if (ctxt->sax->characters != NULL)
2614 ctxt->sax->characters(ctxt->userData,
2615 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002616 line = ctxt->input->line;
2617 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002618 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002619 }
2620 ctxt->input->cur = in;
2621 if (*in == 0xD) {
2622 in++;
2623 if (*in == 0xA) {
2624 ctxt->input->cur = in;
2625 in++;
2626 ctxt->input->line++;
2627 continue; /* while */
2628 }
2629 in--;
2630 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002631 if (*in == '<') {
2632 return;
2633 }
2634 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002635 return;
2636 }
2637 SHRINK;
2638 GROW;
2639 in = ctxt->input->cur;
2640 } while ((*in >= 0x20) && (*in <= 0x7F));
2641 nbchar = 0;
2642 }
Daniel Veillard50582112001-03-26 22:52:16 +00002643 ctxt->input->line = line;
2644 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002645 xmlParseCharDataComplex(ctxt, cdata);
2646}
2647
2648void
2649xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002650 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2651 int nbchar = 0;
2652 int cur, l;
2653 int count = 0;
2654
2655 SHRINK;
2656 GROW;
2657 cur = CUR_CHAR(l);
2658 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2659 ((cur != '&') || (ctxt->token == '&')) &&
2660 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2661 if ((cur == ']') && (NXT(1) == ']') &&
2662 (NXT(2) == '>')) {
2663 if (cdata) break;
2664 else {
2665 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2667 ctxt->sax->error(ctxt->userData,
2668 "Sequence ']]>' not allowed in content\n");
2669 /* Should this be relaxed ??? I see a "must here */
2670 ctxt->wellFormed = 0;
2671 ctxt->disableSAX = 1;
2672 }
2673 }
2674 COPY_BUF(l,buf,nbchar,cur);
2675 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2676 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002677 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002678 */
2679 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2680 if (areBlanks(ctxt, buf, nbchar)) {
2681 if (ctxt->sax->ignorableWhitespace != NULL)
2682 ctxt->sax->ignorableWhitespace(ctxt->userData,
2683 buf, nbchar);
2684 } else {
2685 if (ctxt->sax->characters != NULL)
2686 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2687 }
2688 }
2689 nbchar = 0;
2690 }
2691 count++;
2692 if (count > 50) {
2693 GROW;
2694 count = 0;
2695 }
2696 NEXTL(l);
2697 cur = CUR_CHAR(l);
2698 }
2699 if (nbchar != 0) {
2700 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002701 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002702 */
2703 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2704 if (areBlanks(ctxt, buf, nbchar)) {
2705 if (ctxt->sax->ignorableWhitespace != NULL)
2706 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2707 } else {
2708 if (ctxt->sax->characters != NULL)
2709 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2710 }
2711 }
2712 }
2713}
2714
2715/**
2716 * xmlParseExternalID:
2717 * @ctxt: an XML parser context
2718 * @publicID: a xmlChar** receiving PubidLiteral
2719 * @strict: indicate whether we should restrict parsing to only
2720 * production [75], see NOTE below
2721 *
2722 * Parse an External ID or a Public ID
2723 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002724 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002725 * 'PUBLIC' S PubidLiteral S SystemLiteral
2726 *
2727 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2728 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2729 *
2730 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2731 *
2732 * Returns the function returns SystemLiteral and in the second
2733 * case publicID receives PubidLiteral, is strict is off
2734 * it is possible to return NULL and have publicID set.
2735 */
2736
2737xmlChar *
2738xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2739 xmlChar *URI = NULL;
2740
2741 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002742
2743 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002744 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2745 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2746 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2747 SKIP(6);
2748 if (!IS_BLANK(CUR)) {
2749 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2751 ctxt->sax->error(ctxt->userData,
2752 "Space required after 'SYSTEM'\n");
2753 ctxt->wellFormed = 0;
2754 ctxt->disableSAX = 1;
2755 }
2756 SKIP_BLANKS;
2757 URI = xmlParseSystemLiteral(ctxt);
2758 if (URI == NULL) {
2759 ctxt->errNo = XML_ERR_URI_REQUIRED;
2760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2761 ctxt->sax->error(ctxt->userData,
2762 "xmlParseExternalID: SYSTEM, no URI\n");
2763 ctxt->wellFormed = 0;
2764 ctxt->disableSAX = 1;
2765 }
2766 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2767 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2768 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2769 SKIP(6);
2770 if (!IS_BLANK(CUR)) {
2771 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2773 ctxt->sax->error(ctxt->userData,
2774 "Space required after 'PUBLIC'\n");
2775 ctxt->wellFormed = 0;
2776 ctxt->disableSAX = 1;
2777 }
2778 SKIP_BLANKS;
2779 *publicID = xmlParsePubidLiteral(ctxt);
2780 if (*publicID == NULL) {
2781 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2783 ctxt->sax->error(ctxt->userData,
2784 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2785 ctxt->wellFormed = 0;
2786 ctxt->disableSAX = 1;
2787 }
2788 if (strict) {
2789 /*
2790 * We don't handle [83] so "S SystemLiteral" is required.
2791 */
2792 if (!IS_BLANK(CUR)) {
2793 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2795 ctxt->sax->error(ctxt->userData,
2796 "Space required after the Public Identifier\n");
2797 ctxt->wellFormed = 0;
2798 ctxt->disableSAX = 1;
2799 }
2800 } else {
2801 /*
2802 * We handle [83] so we return immediately, if
2803 * "S SystemLiteral" is not detected. From a purely parsing
2804 * point of view that's a nice mess.
2805 */
2806 const xmlChar *ptr;
2807 GROW;
2808
2809 ptr = CUR_PTR;
2810 if (!IS_BLANK(*ptr)) return(NULL);
2811
2812 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2813 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2814 }
2815 SKIP_BLANKS;
2816 URI = xmlParseSystemLiteral(ctxt);
2817 if (URI == NULL) {
2818 ctxt->errNo = XML_ERR_URI_REQUIRED;
2819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2820 ctxt->sax->error(ctxt->userData,
2821 "xmlParseExternalID: PUBLIC, no URI\n");
2822 ctxt->wellFormed = 0;
2823 ctxt->disableSAX = 1;
2824 }
2825 }
2826 return(URI);
2827}
2828
2829/**
2830 * xmlParseComment:
2831 * @ctxt: an XML parser context
2832 *
2833 * Skip an XML (SGML) comment <!-- .... -->
2834 * The spec says that "For compatibility, the string "--" (double-hyphen)
2835 * must not occur within comments. "
2836 *
2837 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2838 */
2839void
2840xmlParseComment(xmlParserCtxtPtr ctxt) {
2841 xmlChar *buf = NULL;
2842 int len;
2843 int size = XML_PARSER_BUFFER_SIZE;
2844 int q, ql;
2845 int r, rl;
2846 int cur, l;
2847 xmlParserInputState state;
2848 xmlParserInputPtr input = ctxt->input;
2849 int count = 0;
2850
2851 /*
2852 * Check that there is a comment right here.
2853 */
2854 if ((RAW != '<') || (NXT(1) != '!') ||
2855 (NXT(2) != '-') || (NXT(3) != '-')) return;
2856
2857 state = ctxt->instate;
2858 ctxt->instate = XML_PARSER_COMMENT;
2859 SHRINK;
2860 SKIP(4);
2861 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2862 if (buf == NULL) {
2863 xmlGenericError(xmlGenericErrorContext,
2864 "malloc of %d byte failed\n", size);
2865 ctxt->instate = state;
2866 return;
2867 }
2868 q = CUR_CHAR(ql);
2869 NEXTL(ql);
2870 r = CUR_CHAR(rl);
2871 NEXTL(rl);
2872 cur = CUR_CHAR(l);
2873 len = 0;
2874 while (IS_CHAR(cur) && /* checked */
2875 ((cur != '>') ||
2876 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002877 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002878 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2880 ctxt->sax->error(ctxt->userData,
2881 "Comment must not contain '--' (double-hyphen)`\n");
2882 ctxt->wellFormed = 0;
2883 ctxt->disableSAX = 1;
2884 }
2885 if (len + 5 >= size) {
2886 size *= 2;
2887 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2888 if (buf == NULL) {
2889 xmlGenericError(xmlGenericErrorContext,
2890 "realloc of %d byte failed\n", size);
2891 ctxt->instate = state;
2892 return;
2893 }
2894 }
2895 COPY_BUF(ql,buf,len,q);
2896 q = r;
2897 ql = rl;
2898 r = cur;
2899 rl = l;
2900
2901 count++;
2902 if (count > 50) {
2903 GROW;
2904 count = 0;
2905 }
2906 NEXTL(l);
2907 cur = CUR_CHAR(l);
2908 if (cur == 0) {
2909 SHRINK;
2910 GROW;
2911 cur = CUR_CHAR(l);
2912 }
2913 }
2914 buf[len] = 0;
2915 if (!IS_CHAR(cur)) {
2916 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2917 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2918 ctxt->sax->error(ctxt->userData,
2919 "Comment not terminated \n<!--%.50s\n", buf);
2920 ctxt->wellFormed = 0;
2921 ctxt->disableSAX = 1;
2922 xmlFree(buf);
2923 } else {
2924 if (input != ctxt->input) {
2925 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2927 ctxt->sax->error(ctxt->userData,
2928"Comment doesn't start and stop in the same entity\n");
2929 ctxt->wellFormed = 0;
2930 ctxt->disableSAX = 1;
2931 }
2932 NEXT;
2933 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2934 (!ctxt->disableSAX))
2935 ctxt->sax->comment(ctxt->userData, buf);
2936 xmlFree(buf);
2937 }
2938 ctxt->instate = state;
2939}
2940
2941/**
2942 * xmlParsePITarget:
2943 * @ctxt: an XML parser context
2944 *
2945 * parse the name of a PI
2946 *
2947 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2948 *
2949 * Returns the PITarget name or NULL
2950 */
2951
2952xmlChar *
2953xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2954 xmlChar *name;
2955
2956 name = xmlParseName(ctxt);
2957 if ((name != NULL) &&
2958 ((name[0] == 'x') || (name[0] == 'X')) &&
2959 ((name[1] == 'm') || (name[1] == 'M')) &&
2960 ((name[2] == 'l') || (name[2] == 'L'))) {
2961 int i;
2962 if ((name[0] == 'x') && (name[1] == 'm') &&
2963 (name[2] == 'l') && (name[3] == 0)) {
2964 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2966 ctxt->sax->error(ctxt->userData,
2967 "XML declaration allowed only at the start of the document\n");
2968 ctxt->wellFormed = 0;
2969 ctxt->disableSAX = 1;
2970 return(name);
2971 } else if (name[3] == 0) {
2972 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2974 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2975 ctxt->wellFormed = 0;
2976 ctxt->disableSAX = 1;
2977 return(name);
2978 }
2979 for (i = 0;;i++) {
2980 if (xmlW3CPIs[i] == NULL) break;
2981 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2982 return(name);
2983 }
2984 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2985 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2986 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002987 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002988 }
2989 }
2990 return(name);
2991}
2992
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002993#ifdef LIBXML_CATALOG_ENABLED
2994/**
2995 * xmlParseCatalogPI:
2996 * @ctxt: an XML parser context
2997 * @catalog: the PI value string
2998 *
2999 * parse an XML Catalog Processing Instruction.
3000 *
3001 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3002 *
3003 * Occurs only if allowed by the user and if happening in the Misc
3004 * part of the document before any doctype informations
3005 * This will add the given catalog to the parsing context in order
3006 * to be used if there is a resolution need further down in the document
3007 */
3008
3009static void
3010xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3011 xmlChar *URL = NULL;
3012 const xmlChar *tmp, *base;
3013 xmlChar marker;
3014
3015 tmp = catalog;
3016 while (IS_BLANK(*tmp)) tmp++;
3017 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3018 goto error;
3019 tmp += 7;
3020 while (IS_BLANK(*tmp)) tmp++;
3021 if (*tmp != '=') {
3022 return;
3023 }
3024 tmp++;
3025 while (IS_BLANK(*tmp)) tmp++;
3026 marker = *tmp;
3027 if ((marker != '\'') && (marker != '"'))
3028 goto error;
3029 tmp++;
3030 base = tmp;
3031 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3032 if (*tmp == 0)
3033 goto error;
3034 URL = xmlStrndup(base, tmp - base);
3035 tmp++;
3036 while (IS_BLANK(*tmp)) tmp++;
3037 if (*tmp != 0)
3038 goto error;
3039
3040 if (URL != NULL) {
3041 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3042 xmlFree(URL);
3043 }
3044 return;
3045
3046error:
3047 ctxt->errNo = XML_WAR_CATALOG_PI;
3048 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3049 ctxt->sax->warning(ctxt->userData,
3050 "Catalog PI syntax error: %s\n", catalog);
3051 if (URL != NULL)
3052 xmlFree(URL);
3053}
3054#endif
3055
Owen Taylor3473f882001-02-23 17:55:21 +00003056/**
3057 * xmlParsePI:
3058 * @ctxt: an XML parser context
3059 *
3060 * parse an XML Processing Instruction.
3061 *
3062 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3063 *
3064 * The processing is transfered to SAX once parsed.
3065 */
3066
3067void
3068xmlParsePI(xmlParserCtxtPtr ctxt) {
3069 xmlChar *buf = NULL;
3070 int len = 0;
3071 int size = XML_PARSER_BUFFER_SIZE;
3072 int cur, l;
3073 xmlChar *target;
3074 xmlParserInputState state;
3075 int count = 0;
3076
3077 if ((RAW == '<') && (NXT(1) == '?')) {
3078 xmlParserInputPtr input = ctxt->input;
3079 state = ctxt->instate;
3080 ctxt->instate = XML_PARSER_PI;
3081 /*
3082 * this is a Processing Instruction.
3083 */
3084 SKIP(2);
3085 SHRINK;
3086
3087 /*
3088 * Parse the target name and check for special support like
3089 * namespace.
3090 */
3091 target = xmlParsePITarget(ctxt);
3092 if (target != NULL) {
3093 if ((RAW == '?') && (NXT(1) == '>')) {
3094 if (input != ctxt->input) {
3095 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3097 ctxt->sax->error(ctxt->userData,
3098 "PI declaration doesn't start and stop in the same entity\n");
3099 ctxt->wellFormed = 0;
3100 ctxt->disableSAX = 1;
3101 }
3102 SKIP(2);
3103
3104 /*
3105 * SAX: PI detected.
3106 */
3107 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3108 (ctxt->sax->processingInstruction != NULL))
3109 ctxt->sax->processingInstruction(ctxt->userData,
3110 target, NULL);
3111 ctxt->instate = state;
3112 xmlFree(target);
3113 return;
3114 }
3115 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3116 if (buf == NULL) {
3117 xmlGenericError(xmlGenericErrorContext,
3118 "malloc of %d byte failed\n", size);
3119 ctxt->instate = state;
3120 return;
3121 }
3122 cur = CUR;
3123 if (!IS_BLANK(cur)) {
3124 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3126 ctxt->sax->error(ctxt->userData,
3127 "xmlParsePI: PI %s space expected\n", target);
3128 ctxt->wellFormed = 0;
3129 ctxt->disableSAX = 1;
3130 }
3131 SKIP_BLANKS;
3132 cur = CUR_CHAR(l);
3133 while (IS_CHAR(cur) && /* checked */
3134 ((cur != '?') || (NXT(1) != '>'))) {
3135 if (len + 5 >= size) {
3136 size *= 2;
3137 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3138 if (buf == NULL) {
3139 xmlGenericError(xmlGenericErrorContext,
3140 "realloc of %d byte failed\n", size);
3141 ctxt->instate = state;
3142 return;
3143 }
3144 }
3145 count++;
3146 if (count > 50) {
3147 GROW;
3148 count = 0;
3149 }
3150 COPY_BUF(l,buf,len,cur);
3151 NEXTL(l);
3152 cur = CUR_CHAR(l);
3153 if (cur == 0) {
3154 SHRINK;
3155 GROW;
3156 cur = CUR_CHAR(l);
3157 }
3158 }
3159 buf[len] = 0;
3160 if (cur != '?') {
3161 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3163 ctxt->sax->error(ctxt->userData,
3164 "xmlParsePI: PI %s never end ...\n", target);
3165 ctxt->wellFormed = 0;
3166 ctxt->disableSAX = 1;
3167 } else {
3168 if (input != ctxt->input) {
3169 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3171 ctxt->sax->error(ctxt->userData,
3172 "PI declaration doesn't start and stop in the same entity\n");
3173 ctxt->wellFormed = 0;
3174 ctxt->disableSAX = 1;
3175 }
3176 SKIP(2);
3177
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003178#ifdef LIBXML_CATALOG_ENABLED
3179 if (((state == XML_PARSER_MISC) ||
3180 (state == XML_PARSER_START)) &&
3181 (xmlStrEqual(target, XML_CATALOG_PI))) {
3182 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3183 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3184 (allow == XML_CATA_ALLOW_ALL))
3185 xmlParseCatalogPI(ctxt, buf);
3186 }
3187#endif
3188
3189
Owen Taylor3473f882001-02-23 17:55:21 +00003190 /*
3191 * SAX: PI detected.
3192 */
3193 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3194 (ctxt->sax->processingInstruction != NULL))
3195 ctxt->sax->processingInstruction(ctxt->userData,
3196 target, buf);
3197 }
3198 xmlFree(buf);
3199 xmlFree(target);
3200 } else {
3201 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3203 ctxt->sax->error(ctxt->userData,
3204 "xmlParsePI : no target name\n");
3205 ctxt->wellFormed = 0;
3206 ctxt->disableSAX = 1;
3207 }
3208 ctxt->instate = state;
3209 }
3210}
3211
3212/**
3213 * xmlParseNotationDecl:
3214 * @ctxt: an XML parser context
3215 *
3216 * parse a notation declaration
3217 *
3218 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3219 *
3220 * Hence there is actually 3 choices:
3221 * 'PUBLIC' S PubidLiteral
3222 * 'PUBLIC' S PubidLiteral S SystemLiteral
3223 * and 'SYSTEM' S SystemLiteral
3224 *
3225 * See the NOTE on xmlParseExternalID().
3226 */
3227
3228void
3229xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3230 xmlChar *name;
3231 xmlChar *Pubid;
3232 xmlChar *Systemid;
3233
3234 if ((RAW == '<') && (NXT(1) == '!') &&
3235 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3236 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3237 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3238 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3239 xmlParserInputPtr input = ctxt->input;
3240 SHRINK;
3241 SKIP(10);
3242 if (!IS_BLANK(CUR)) {
3243 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3245 ctxt->sax->error(ctxt->userData,
3246 "Space required after '<!NOTATION'\n");
3247 ctxt->wellFormed = 0;
3248 ctxt->disableSAX = 1;
3249 return;
3250 }
3251 SKIP_BLANKS;
3252
Daniel Veillard76d66f42001-05-16 21:05:17 +00003253 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003254 if (name == NULL) {
3255 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3257 ctxt->sax->error(ctxt->userData,
3258 "NOTATION: Name expected here\n");
3259 ctxt->wellFormed = 0;
3260 ctxt->disableSAX = 1;
3261 return;
3262 }
3263 if (!IS_BLANK(CUR)) {
3264 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3266 ctxt->sax->error(ctxt->userData,
3267 "Space required after the NOTATION name'\n");
3268 ctxt->wellFormed = 0;
3269 ctxt->disableSAX = 1;
3270 return;
3271 }
3272 SKIP_BLANKS;
3273
3274 /*
3275 * Parse the IDs.
3276 */
3277 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3278 SKIP_BLANKS;
3279
3280 if (RAW == '>') {
3281 if (input != ctxt->input) {
3282 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3284 ctxt->sax->error(ctxt->userData,
3285"Notation declaration doesn't start and stop in the same entity\n");
3286 ctxt->wellFormed = 0;
3287 ctxt->disableSAX = 1;
3288 }
3289 NEXT;
3290 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3291 (ctxt->sax->notationDecl != NULL))
3292 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3293 } else {
3294 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3295 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3296 ctxt->sax->error(ctxt->userData,
3297 "'>' required to close NOTATION declaration\n");
3298 ctxt->wellFormed = 0;
3299 ctxt->disableSAX = 1;
3300 }
3301 xmlFree(name);
3302 if (Systemid != NULL) xmlFree(Systemid);
3303 if (Pubid != NULL) xmlFree(Pubid);
3304 }
3305}
3306
3307/**
3308 * xmlParseEntityDecl:
3309 * @ctxt: an XML parser context
3310 *
3311 * parse <!ENTITY declarations
3312 *
3313 * [70] EntityDecl ::= GEDecl | PEDecl
3314 *
3315 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3316 *
3317 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3318 *
3319 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3320 *
3321 * [74] PEDef ::= EntityValue | ExternalID
3322 *
3323 * [76] NDataDecl ::= S 'NDATA' S Name
3324 *
3325 * [ VC: Notation Declared ]
3326 * The Name must match the declared name of a notation.
3327 */
3328
3329void
3330xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3331 xmlChar *name = NULL;
3332 xmlChar *value = NULL;
3333 xmlChar *URI = NULL, *literal = NULL;
3334 xmlChar *ndata = NULL;
3335 int isParameter = 0;
3336 xmlChar *orig = NULL;
3337
3338 GROW;
3339 if ((RAW == '<') && (NXT(1) == '!') &&
3340 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3341 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3342 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3343 xmlParserInputPtr input = ctxt->input;
3344 ctxt->instate = XML_PARSER_ENTITY_DECL;
3345 SHRINK;
3346 SKIP(8);
3347 if (!IS_BLANK(CUR)) {
3348 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3350 ctxt->sax->error(ctxt->userData,
3351 "Space required after '<!ENTITY'\n");
3352 ctxt->wellFormed = 0;
3353 ctxt->disableSAX = 1;
3354 }
3355 SKIP_BLANKS;
3356
3357 if (RAW == '%') {
3358 NEXT;
3359 if (!IS_BLANK(CUR)) {
3360 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3362 ctxt->sax->error(ctxt->userData,
3363 "Space required after '%'\n");
3364 ctxt->wellFormed = 0;
3365 ctxt->disableSAX = 1;
3366 }
3367 SKIP_BLANKS;
3368 isParameter = 1;
3369 }
3370
Daniel Veillard76d66f42001-05-16 21:05:17 +00003371 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003372 if (name == NULL) {
3373 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3375 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3376 ctxt->wellFormed = 0;
3377 ctxt->disableSAX = 1;
3378 return;
3379 }
3380 if (!IS_BLANK(CUR)) {
3381 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3383 ctxt->sax->error(ctxt->userData,
3384 "Space required after the entity name\n");
3385 ctxt->wellFormed = 0;
3386 ctxt->disableSAX = 1;
3387 }
3388 SKIP_BLANKS;
3389
3390 /*
3391 * handle the various case of definitions...
3392 */
3393 if (isParameter) {
3394 if ((RAW == '"') || (RAW == '\'')) {
3395 value = xmlParseEntityValue(ctxt, &orig);
3396 if (value) {
3397 if ((ctxt->sax != NULL) &&
3398 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3399 ctxt->sax->entityDecl(ctxt->userData, name,
3400 XML_INTERNAL_PARAMETER_ENTITY,
3401 NULL, NULL, value);
3402 }
3403 } else {
3404 URI = xmlParseExternalID(ctxt, &literal, 1);
3405 if ((URI == NULL) && (literal == NULL)) {
3406 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3408 ctxt->sax->error(ctxt->userData,
3409 "Entity value required\n");
3410 ctxt->wellFormed = 0;
3411 ctxt->disableSAX = 1;
3412 }
3413 if (URI) {
3414 xmlURIPtr uri;
3415
3416 uri = xmlParseURI((const char *) URI);
3417 if (uri == NULL) {
3418 ctxt->errNo = XML_ERR_INVALID_URI;
3419 if ((ctxt->sax != NULL) &&
3420 (!ctxt->disableSAX) &&
3421 (ctxt->sax->error != NULL))
3422 ctxt->sax->error(ctxt->userData,
3423 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003424 /*
3425 * This really ought to be a well formedness error
3426 * but the XML Core WG decided otherwise c.f. issue
3427 * E26 of the XML erratas.
3428 */
Owen Taylor3473f882001-02-23 17:55:21 +00003429 } else {
3430 if (uri->fragment != NULL) {
3431 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3432 if ((ctxt->sax != NULL) &&
3433 (!ctxt->disableSAX) &&
3434 (ctxt->sax->error != NULL))
3435 ctxt->sax->error(ctxt->userData,
3436 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003437 /*
3438 * Okay this is foolish to block those but not
3439 * invalid URIs.
3440 */
Owen Taylor3473f882001-02-23 17:55:21 +00003441 ctxt->wellFormed = 0;
3442 } else {
3443 if ((ctxt->sax != NULL) &&
3444 (!ctxt->disableSAX) &&
3445 (ctxt->sax->entityDecl != NULL))
3446 ctxt->sax->entityDecl(ctxt->userData, name,
3447 XML_EXTERNAL_PARAMETER_ENTITY,
3448 literal, URI, NULL);
3449 }
3450 xmlFreeURI(uri);
3451 }
3452 }
3453 }
3454 } else {
3455 if ((RAW == '"') || (RAW == '\'')) {
3456 value = xmlParseEntityValue(ctxt, &orig);
3457 if ((ctxt->sax != NULL) &&
3458 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3459 ctxt->sax->entityDecl(ctxt->userData, name,
3460 XML_INTERNAL_GENERAL_ENTITY,
3461 NULL, NULL, value);
3462 } else {
3463 URI = xmlParseExternalID(ctxt, &literal, 1);
3464 if ((URI == NULL) && (literal == NULL)) {
3465 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3467 ctxt->sax->error(ctxt->userData,
3468 "Entity value required\n");
3469 ctxt->wellFormed = 0;
3470 ctxt->disableSAX = 1;
3471 }
3472 if (URI) {
3473 xmlURIPtr uri;
3474
3475 uri = xmlParseURI((const char *)URI);
3476 if (uri == NULL) {
3477 ctxt->errNo = XML_ERR_INVALID_URI;
3478 if ((ctxt->sax != NULL) &&
3479 (!ctxt->disableSAX) &&
3480 (ctxt->sax->error != NULL))
3481 ctxt->sax->error(ctxt->userData,
3482 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003483 /*
3484 * This really ought to be a well formedness error
3485 * but the XML Core WG decided otherwise c.f. issue
3486 * E26 of the XML erratas.
3487 */
Owen Taylor3473f882001-02-23 17:55:21 +00003488 } else {
3489 if (uri->fragment != NULL) {
3490 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3491 if ((ctxt->sax != NULL) &&
3492 (!ctxt->disableSAX) &&
3493 (ctxt->sax->error != NULL))
3494 ctxt->sax->error(ctxt->userData,
3495 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003496 /*
3497 * Okay this is foolish to block those but not
3498 * invalid URIs.
3499 */
Owen Taylor3473f882001-02-23 17:55:21 +00003500 ctxt->wellFormed = 0;
3501 }
3502 xmlFreeURI(uri);
3503 }
3504 }
3505 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3506 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3508 ctxt->sax->error(ctxt->userData,
3509 "Space required before 'NDATA'\n");
3510 ctxt->wellFormed = 0;
3511 ctxt->disableSAX = 1;
3512 }
3513 SKIP_BLANKS;
3514 if ((RAW == 'N') && (NXT(1) == 'D') &&
3515 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3516 (NXT(4) == 'A')) {
3517 SKIP(5);
3518 if (!IS_BLANK(CUR)) {
3519 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3521 ctxt->sax->error(ctxt->userData,
3522 "Space required after 'NDATA'\n");
3523 ctxt->wellFormed = 0;
3524 ctxt->disableSAX = 1;
3525 }
3526 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003527 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003528 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3529 (ctxt->sax->unparsedEntityDecl != NULL))
3530 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3531 literal, URI, ndata);
3532 } else {
3533 if ((ctxt->sax != NULL) &&
3534 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3535 ctxt->sax->entityDecl(ctxt->userData, name,
3536 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3537 literal, URI, NULL);
3538 }
3539 }
3540 }
3541 SKIP_BLANKS;
3542 if (RAW != '>') {
3543 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3545 ctxt->sax->error(ctxt->userData,
3546 "xmlParseEntityDecl: entity %s not terminated\n", name);
3547 ctxt->wellFormed = 0;
3548 ctxt->disableSAX = 1;
3549 } else {
3550 if (input != ctxt->input) {
3551 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3553 ctxt->sax->error(ctxt->userData,
3554"Entity declaration doesn't start and stop in the same entity\n");
3555 ctxt->wellFormed = 0;
3556 ctxt->disableSAX = 1;
3557 }
3558 NEXT;
3559 }
3560 if (orig != NULL) {
3561 /*
3562 * Ugly mechanism to save the raw entity value.
3563 */
3564 xmlEntityPtr cur = NULL;
3565
3566 if (isParameter) {
3567 if ((ctxt->sax != NULL) &&
3568 (ctxt->sax->getParameterEntity != NULL))
3569 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3570 } else {
3571 if ((ctxt->sax != NULL) &&
3572 (ctxt->sax->getEntity != NULL))
3573 cur = ctxt->sax->getEntity(ctxt->userData, name);
3574 }
3575 if (cur != NULL) {
3576 if (cur->orig != NULL)
3577 xmlFree(orig);
3578 else
3579 cur->orig = orig;
3580 } else
3581 xmlFree(orig);
3582 }
3583 if (name != NULL) xmlFree(name);
3584 if (value != NULL) xmlFree(value);
3585 if (URI != NULL) xmlFree(URI);
3586 if (literal != NULL) xmlFree(literal);
3587 if (ndata != NULL) xmlFree(ndata);
3588 }
3589}
3590
3591/**
3592 * xmlParseDefaultDecl:
3593 * @ctxt: an XML parser context
3594 * @value: Receive a possible fixed default value for the attribute
3595 *
3596 * Parse an attribute default declaration
3597 *
3598 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3599 *
3600 * [ VC: Required Attribute ]
3601 * if the default declaration is the keyword #REQUIRED, then the
3602 * attribute must be specified for all elements of the type in the
3603 * attribute-list declaration.
3604 *
3605 * [ VC: Attribute Default Legal ]
3606 * The declared default value must meet the lexical constraints of
3607 * the declared attribute type c.f. xmlValidateAttributeDecl()
3608 *
3609 * [ VC: Fixed Attribute Default ]
3610 * if an attribute has a default value declared with the #FIXED
3611 * keyword, instances of that attribute must match the default value.
3612 *
3613 * [ WFC: No < in Attribute Values ]
3614 * handled in xmlParseAttValue()
3615 *
3616 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3617 * or XML_ATTRIBUTE_FIXED.
3618 */
3619
3620int
3621xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3622 int val;
3623 xmlChar *ret;
3624
3625 *value = NULL;
3626 if ((RAW == '#') && (NXT(1) == 'R') &&
3627 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3628 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3629 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3630 (NXT(8) == 'D')) {
3631 SKIP(9);
3632 return(XML_ATTRIBUTE_REQUIRED);
3633 }
3634 if ((RAW == '#') && (NXT(1) == 'I') &&
3635 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3636 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3637 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3638 SKIP(8);
3639 return(XML_ATTRIBUTE_IMPLIED);
3640 }
3641 val = XML_ATTRIBUTE_NONE;
3642 if ((RAW == '#') && (NXT(1) == 'F') &&
3643 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3644 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3645 SKIP(6);
3646 val = XML_ATTRIBUTE_FIXED;
3647 if (!IS_BLANK(CUR)) {
3648 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3650 ctxt->sax->error(ctxt->userData,
3651 "Space required after '#FIXED'\n");
3652 ctxt->wellFormed = 0;
3653 ctxt->disableSAX = 1;
3654 }
3655 SKIP_BLANKS;
3656 }
3657 ret = xmlParseAttValue(ctxt);
3658 ctxt->instate = XML_PARSER_DTD;
3659 if (ret == NULL) {
3660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3661 ctxt->sax->error(ctxt->userData,
3662 "Attribute default value declaration error\n");
3663 ctxt->wellFormed = 0;
3664 ctxt->disableSAX = 1;
3665 } else
3666 *value = ret;
3667 return(val);
3668}
3669
3670/**
3671 * xmlParseNotationType:
3672 * @ctxt: an XML parser context
3673 *
3674 * parse an Notation attribute type.
3675 *
3676 * Note: the leading 'NOTATION' S part has already being parsed...
3677 *
3678 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3679 *
3680 * [ VC: Notation Attributes ]
3681 * Values of this type must match one of the notation names included
3682 * in the declaration; all notation names in the declaration must be declared.
3683 *
3684 * Returns: the notation attribute tree built while parsing
3685 */
3686
3687xmlEnumerationPtr
3688xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3689 xmlChar *name;
3690 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3691
3692 if (RAW != '(') {
3693 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3695 ctxt->sax->error(ctxt->userData,
3696 "'(' required to start 'NOTATION'\n");
3697 ctxt->wellFormed = 0;
3698 ctxt->disableSAX = 1;
3699 return(NULL);
3700 }
3701 SHRINK;
3702 do {
3703 NEXT;
3704 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003705 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003706 if (name == NULL) {
3707 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3709 ctxt->sax->error(ctxt->userData,
3710 "Name expected in NOTATION declaration\n");
3711 ctxt->wellFormed = 0;
3712 ctxt->disableSAX = 1;
3713 return(ret);
3714 }
3715 cur = xmlCreateEnumeration(name);
3716 xmlFree(name);
3717 if (cur == NULL) return(ret);
3718 if (last == NULL) ret = last = cur;
3719 else {
3720 last->next = cur;
3721 last = cur;
3722 }
3723 SKIP_BLANKS;
3724 } while (RAW == '|');
3725 if (RAW != ')') {
3726 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3728 ctxt->sax->error(ctxt->userData,
3729 "')' required to finish NOTATION declaration\n");
3730 ctxt->wellFormed = 0;
3731 ctxt->disableSAX = 1;
3732 if ((last != NULL) && (last != ret))
3733 xmlFreeEnumeration(last);
3734 return(ret);
3735 }
3736 NEXT;
3737 return(ret);
3738}
3739
3740/**
3741 * xmlParseEnumerationType:
3742 * @ctxt: an XML parser context
3743 *
3744 * parse an Enumeration attribute type.
3745 *
3746 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3747 *
3748 * [ VC: Enumeration ]
3749 * Values of this type must match one of the Nmtoken tokens in
3750 * the declaration
3751 *
3752 * Returns: the enumeration attribute tree built while parsing
3753 */
3754
3755xmlEnumerationPtr
3756xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3757 xmlChar *name;
3758 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3759
3760 if (RAW != '(') {
3761 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3763 ctxt->sax->error(ctxt->userData,
3764 "'(' required to start ATTLIST enumeration\n");
3765 ctxt->wellFormed = 0;
3766 ctxt->disableSAX = 1;
3767 return(NULL);
3768 }
3769 SHRINK;
3770 do {
3771 NEXT;
3772 SKIP_BLANKS;
3773 name = xmlParseNmtoken(ctxt);
3774 if (name == NULL) {
3775 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3777 ctxt->sax->error(ctxt->userData,
3778 "NmToken expected in ATTLIST enumeration\n");
3779 ctxt->wellFormed = 0;
3780 ctxt->disableSAX = 1;
3781 return(ret);
3782 }
3783 cur = xmlCreateEnumeration(name);
3784 xmlFree(name);
3785 if (cur == NULL) return(ret);
3786 if (last == NULL) ret = last = cur;
3787 else {
3788 last->next = cur;
3789 last = cur;
3790 }
3791 SKIP_BLANKS;
3792 } while (RAW == '|');
3793 if (RAW != ')') {
3794 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3796 ctxt->sax->error(ctxt->userData,
3797 "')' required to finish ATTLIST enumeration\n");
3798 ctxt->wellFormed = 0;
3799 ctxt->disableSAX = 1;
3800 return(ret);
3801 }
3802 NEXT;
3803 return(ret);
3804}
3805
3806/**
3807 * xmlParseEnumeratedType:
3808 * @ctxt: an XML parser context
3809 * @tree: the enumeration tree built while parsing
3810 *
3811 * parse an Enumerated attribute type.
3812 *
3813 * [57] EnumeratedType ::= NotationType | Enumeration
3814 *
3815 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3816 *
3817 *
3818 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3819 */
3820
3821int
3822xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3823 if ((RAW == 'N') && (NXT(1) == 'O') &&
3824 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3825 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3826 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3827 SKIP(8);
3828 if (!IS_BLANK(CUR)) {
3829 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3831 ctxt->sax->error(ctxt->userData,
3832 "Space required after 'NOTATION'\n");
3833 ctxt->wellFormed = 0;
3834 ctxt->disableSAX = 1;
3835 return(0);
3836 }
3837 SKIP_BLANKS;
3838 *tree = xmlParseNotationType(ctxt);
3839 if (*tree == NULL) return(0);
3840 return(XML_ATTRIBUTE_NOTATION);
3841 }
3842 *tree = xmlParseEnumerationType(ctxt);
3843 if (*tree == NULL) return(0);
3844 return(XML_ATTRIBUTE_ENUMERATION);
3845}
3846
3847/**
3848 * xmlParseAttributeType:
3849 * @ctxt: an XML parser context
3850 * @tree: the enumeration tree built while parsing
3851 *
3852 * parse the Attribute list def for an element
3853 *
3854 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3855 *
3856 * [55] StringType ::= 'CDATA'
3857 *
3858 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3859 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3860 *
3861 * Validity constraints for attribute values syntax are checked in
3862 * xmlValidateAttributeValue()
3863 *
3864 * [ VC: ID ]
3865 * Values of type ID must match the Name production. A name must not
3866 * appear more than once in an XML document as a value of this type;
3867 * i.e., ID values must uniquely identify the elements which bear them.
3868 *
3869 * [ VC: One ID per Element Type ]
3870 * No element type may have more than one ID attribute specified.
3871 *
3872 * [ VC: ID Attribute Default ]
3873 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3874 *
3875 * [ VC: IDREF ]
3876 * Values of type IDREF must match the Name production, and values
3877 * of type IDREFS must match Names; each IDREF Name must match the value
3878 * of an ID attribute on some element in the XML document; i.e. IDREF
3879 * values must match the value of some ID attribute.
3880 *
3881 * [ VC: Entity Name ]
3882 * Values of type ENTITY must match the Name production, values
3883 * of type ENTITIES must match Names; each Entity Name must match the
3884 * name of an unparsed entity declared in the DTD.
3885 *
3886 * [ VC: Name Token ]
3887 * Values of type NMTOKEN must match the Nmtoken production; values
3888 * of type NMTOKENS must match Nmtokens.
3889 *
3890 * Returns the attribute type
3891 */
3892int
3893xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3894 SHRINK;
3895 if ((RAW == 'C') && (NXT(1) == 'D') &&
3896 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3897 (NXT(4) == 'A')) {
3898 SKIP(5);
3899 return(XML_ATTRIBUTE_CDATA);
3900 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3901 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3902 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3903 SKIP(6);
3904 return(XML_ATTRIBUTE_IDREFS);
3905 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3906 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3907 (NXT(4) == 'F')) {
3908 SKIP(5);
3909 return(XML_ATTRIBUTE_IDREF);
3910 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3911 SKIP(2);
3912 return(XML_ATTRIBUTE_ID);
3913 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3914 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3915 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3916 SKIP(6);
3917 return(XML_ATTRIBUTE_ENTITY);
3918 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3919 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3920 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3921 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3922 SKIP(8);
3923 return(XML_ATTRIBUTE_ENTITIES);
3924 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3925 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3926 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3927 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3928 SKIP(8);
3929 return(XML_ATTRIBUTE_NMTOKENS);
3930 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3931 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3932 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3933 (NXT(6) == 'N')) {
3934 SKIP(7);
3935 return(XML_ATTRIBUTE_NMTOKEN);
3936 }
3937 return(xmlParseEnumeratedType(ctxt, tree));
3938}
3939
3940/**
3941 * xmlParseAttributeListDecl:
3942 * @ctxt: an XML parser context
3943 *
3944 * : parse the Attribute list def for an element
3945 *
3946 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3947 *
3948 * [53] AttDef ::= S Name S AttType S DefaultDecl
3949 *
3950 */
3951void
3952xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3953 xmlChar *elemName;
3954 xmlChar *attrName;
3955 xmlEnumerationPtr tree;
3956
3957 if ((RAW == '<') && (NXT(1) == '!') &&
3958 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3959 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3960 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3961 (NXT(8) == 'T')) {
3962 xmlParserInputPtr input = ctxt->input;
3963
3964 SKIP(9);
3965 if (!IS_BLANK(CUR)) {
3966 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3967 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3968 ctxt->sax->error(ctxt->userData,
3969 "Space required after '<!ATTLIST'\n");
3970 ctxt->wellFormed = 0;
3971 ctxt->disableSAX = 1;
3972 }
3973 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003974 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003975 if (elemName == NULL) {
3976 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3978 ctxt->sax->error(ctxt->userData,
3979 "ATTLIST: no name for Element\n");
3980 ctxt->wellFormed = 0;
3981 ctxt->disableSAX = 1;
3982 return;
3983 }
3984 SKIP_BLANKS;
3985 GROW;
3986 while (RAW != '>') {
3987 const xmlChar *check = CUR_PTR;
3988 int type;
3989 int def;
3990 xmlChar *defaultValue = NULL;
3991
3992 GROW;
3993 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003994 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003995 if (attrName == NULL) {
3996 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3998 ctxt->sax->error(ctxt->userData,
3999 "ATTLIST: no name for Attribute\n");
4000 ctxt->wellFormed = 0;
4001 ctxt->disableSAX = 1;
4002 break;
4003 }
4004 GROW;
4005 if (!IS_BLANK(CUR)) {
4006 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4008 ctxt->sax->error(ctxt->userData,
4009 "Space required after the attribute name\n");
4010 ctxt->wellFormed = 0;
4011 ctxt->disableSAX = 1;
4012 if (attrName != NULL)
4013 xmlFree(attrName);
4014 if (defaultValue != NULL)
4015 xmlFree(defaultValue);
4016 break;
4017 }
4018 SKIP_BLANKS;
4019
4020 type = xmlParseAttributeType(ctxt, &tree);
4021 if (type <= 0) {
4022 if (attrName != NULL)
4023 xmlFree(attrName);
4024 if (defaultValue != NULL)
4025 xmlFree(defaultValue);
4026 break;
4027 }
4028
4029 GROW;
4030 if (!IS_BLANK(CUR)) {
4031 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4033 ctxt->sax->error(ctxt->userData,
4034 "Space required after the attribute type\n");
4035 ctxt->wellFormed = 0;
4036 ctxt->disableSAX = 1;
4037 if (attrName != NULL)
4038 xmlFree(attrName);
4039 if (defaultValue != NULL)
4040 xmlFree(defaultValue);
4041 if (tree != NULL)
4042 xmlFreeEnumeration(tree);
4043 break;
4044 }
4045 SKIP_BLANKS;
4046
4047 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4048 if (def <= 0) {
4049 if (attrName != NULL)
4050 xmlFree(attrName);
4051 if (defaultValue != NULL)
4052 xmlFree(defaultValue);
4053 if (tree != NULL)
4054 xmlFreeEnumeration(tree);
4055 break;
4056 }
4057
4058 GROW;
4059 if (RAW != '>') {
4060 if (!IS_BLANK(CUR)) {
4061 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4063 ctxt->sax->error(ctxt->userData,
4064 "Space required after the attribute default value\n");
4065 ctxt->wellFormed = 0;
4066 ctxt->disableSAX = 1;
4067 if (attrName != NULL)
4068 xmlFree(attrName);
4069 if (defaultValue != NULL)
4070 xmlFree(defaultValue);
4071 if (tree != NULL)
4072 xmlFreeEnumeration(tree);
4073 break;
4074 }
4075 SKIP_BLANKS;
4076 }
4077 if (check == CUR_PTR) {
4078 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4080 ctxt->sax->error(ctxt->userData,
4081 "xmlParseAttributeListDecl: detected internal error\n");
4082 if (attrName != NULL)
4083 xmlFree(attrName);
4084 if (defaultValue != NULL)
4085 xmlFree(defaultValue);
4086 if (tree != NULL)
4087 xmlFreeEnumeration(tree);
4088 break;
4089 }
4090 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4091 (ctxt->sax->attributeDecl != NULL))
4092 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4093 type, def, defaultValue, tree);
4094 if (attrName != NULL)
4095 xmlFree(attrName);
4096 if (defaultValue != NULL)
4097 xmlFree(defaultValue);
4098 GROW;
4099 }
4100 if (RAW == '>') {
4101 if (input != ctxt->input) {
4102 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4104 ctxt->sax->error(ctxt->userData,
4105"Attribute list declaration doesn't start and stop in the same entity\n");
4106 ctxt->wellFormed = 0;
4107 ctxt->disableSAX = 1;
4108 }
4109 NEXT;
4110 }
4111
4112 xmlFree(elemName);
4113 }
4114}
4115
4116/**
4117 * xmlParseElementMixedContentDecl:
4118 * @ctxt: an XML parser context
4119 *
4120 * parse the declaration for a Mixed Element content
4121 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4122 *
4123 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4124 * '(' S? '#PCDATA' S? ')'
4125 *
4126 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4127 *
4128 * [ VC: No Duplicate Types ]
4129 * The same name must not appear more than once in a single
4130 * mixed-content declaration.
4131 *
4132 * returns: the list of the xmlElementContentPtr describing the element choices
4133 */
4134xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004135xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004136 xmlElementContentPtr ret = NULL, cur = NULL, n;
4137 xmlChar *elem = NULL;
4138
4139 GROW;
4140 if ((RAW == '#') && (NXT(1) == 'P') &&
4141 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4142 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4143 (NXT(6) == 'A')) {
4144 SKIP(7);
4145 SKIP_BLANKS;
4146 SHRINK;
4147 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004148 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4149 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4150 if (ctxt->vctxt.error != NULL)
4151 ctxt->vctxt.error(ctxt->vctxt.userData,
4152"Element content declaration doesn't start and stop in the same entity\n");
4153 ctxt->valid = 0;
4154 }
Owen Taylor3473f882001-02-23 17:55:21 +00004155 NEXT;
4156 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4157 if (RAW == '*') {
4158 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4159 NEXT;
4160 }
4161 return(ret);
4162 }
4163 if ((RAW == '(') || (RAW == '|')) {
4164 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4165 if (ret == NULL) return(NULL);
4166 }
4167 while (RAW == '|') {
4168 NEXT;
4169 if (elem == NULL) {
4170 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4171 if (ret == NULL) return(NULL);
4172 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004173 if (cur != NULL)
4174 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004175 cur = ret;
4176 } else {
4177 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4178 if (n == NULL) return(NULL);
4179 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004180 if (n->c1 != NULL)
4181 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004182 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004183 if (n != NULL)
4184 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004185 cur = n;
4186 xmlFree(elem);
4187 }
4188 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004189 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004190 if (elem == NULL) {
4191 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4193 ctxt->sax->error(ctxt->userData,
4194 "xmlParseElementMixedContentDecl : Name expected\n");
4195 ctxt->wellFormed = 0;
4196 ctxt->disableSAX = 1;
4197 xmlFreeElementContent(cur);
4198 return(NULL);
4199 }
4200 SKIP_BLANKS;
4201 GROW;
4202 }
4203 if ((RAW == ')') && (NXT(1) == '*')) {
4204 if (elem != NULL) {
4205 cur->c2 = xmlNewElementContent(elem,
4206 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004207 if (cur->c2 != NULL)
4208 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004209 xmlFree(elem);
4210 }
4211 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004212 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4213 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4214 if (ctxt->vctxt.error != NULL)
4215 ctxt->vctxt.error(ctxt->vctxt.userData,
4216"Element content declaration doesn't start and stop in the same entity\n");
4217 ctxt->valid = 0;
4218 }
Owen Taylor3473f882001-02-23 17:55:21 +00004219 SKIP(2);
4220 } else {
4221 if (elem != NULL) xmlFree(elem);
4222 xmlFreeElementContent(ret);
4223 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4225 ctxt->sax->error(ctxt->userData,
4226 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4227 ctxt->wellFormed = 0;
4228 ctxt->disableSAX = 1;
4229 return(NULL);
4230 }
4231
4232 } else {
4233 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4234 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4235 ctxt->sax->error(ctxt->userData,
4236 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4237 ctxt->wellFormed = 0;
4238 ctxt->disableSAX = 1;
4239 }
4240 return(ret);
4241}
4242
4243/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004244 * xmlParseElementChildrenContentD:
4245 * @ctxt: an XML parser context
4246 *
4247 * VMS version of xmlParseElementChildrenContentDecl()
4248 *
4249 * Returns the tree of xmlElementContentPtr describing the element
4250 * hierarchy.
4251 */
4252/**
Owen Taylor3473f882001-02-23 17:55:21 +00004253 * xmlParseElementChildrenContentDecl:
4254 * @ctxt: an XML parser context
4255 *
4256 * parse the declaration for a Mixed Element content
4257 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4258 *
4259 *
4260 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4261 *
4262 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4263 *
4264 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4265 *
4266 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4267 *
4268 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4269 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004270 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004271 * opening or closing parentheses in a choice, seq, or Mixed
4272 * construct is contained in the replacement text for a parameter
4273 * entity, both must be contained in the same replacement text. For
4274 * interoperability, if a parameter-entity reference appears in a
4275 * choice, seq, or Mixed construct, its replacement text should not
4276 * be empty, and neither the first nor last non-blank character of
4277 * the replacement text should be a connector (| or ,).
4278 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004279 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004280 * hierarchy.
4281 */
4282xmlElementContentPtr
4283#ifdef VMS
4284xmlParseElementChildrenContentD
4285#else
4286xmlParseElementChildrenContentDecl
4287#endif
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004288(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004289 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4290 xmlChar *elem;
4291 xmlChar type = 0;
4292
4293 SKIP_BLANKS;
4294 GROW;
4295 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004296 xmlParserInputPtr input = ctxt->input;
4297
Owen Taylor3473f882001-02-23 17:55:21 +00004298 /* Recurse on first child */
4299 NEXT;
4300 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004301 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004302 SKIP_BLANKS;
4303 GROW;
4304 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004305 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004306 if (elem == NULL) {
4307 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4309 ctxt->sax->error(ctxt->userData,
4310 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4311 ctxt->wellFormed = 0;
4312 ctxt->disableSAX = 1;
4313 return(NULL);
4314 }
4315 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4316 GROW;
4317 if (RAW == '?') {
4318 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4319 NEXT;
4320 } else if (RAW == '*') {
4321 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4322 NEXT;
4323 } else if (RAW == '+') {
4324 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4325 NEXT;
4326 } else {
4327 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4328 }
4329 xmlFree(elem);
4330 GROW;
4331 }
4332 SKIP_BLANKS;
4333 SHRINK;
4334 while (RAW != ')') {
4335 /*
4336 * Each loop we parse one separator and one element.
4337 */
4338 if (RAW == ',') {
4339 if (type == 0) type = CUR;
4340
4341 /*
4342 * Detect "Name | Name , Name" error
4343 */
4344 else if (type != CUR) {
4345 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4347 ctxt->sax->error(ctxt->userData,
4348 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4349 type);
4350 ctxt->wellFormed = 0;
4351 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004352 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004353 xmlFreeElementContent(last);
4354 if (ret != NULL)
4355 xmlFreeElementContent(ret);
4356 return(NULL);
4357 }
4358 NEXT;
4359
4360 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4361 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004362 if ((last != NULL) && (last != ret))
4363 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004364 xmlFreeElementContent(ret);
4365 return(NULL);
4366 }
4367 if (last == NULL) {
4368 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004369 if (ret != NULL)
4370 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004371 ret = cur = op;
4372 } else {
4373 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004374 if (op != NULL)
4375 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004376 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004377 if (last != NULL)
4378 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004379 cur =op;
4380 last = NULL;
4381 }
4382 } else if (RAW == '|') {
4383 if (type == 0) type = CUR;
4384
4385 /*
4386 * Detect "Name , Name | Name" error
4387 */
4388 else if (type != CUR) {
4389 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4390 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4391 ctxt->sax->error(ctxt->userData,
4392 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4393 type);
4394 ctxt->wellFormed = 0;
4395 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004396 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004397 xmlFreeElementContent(last);
4398 if (ret != NULL)
4399 xmlFreeElementContent(ret);
4400 return(NULL);
4401 }
4402 NEXT;
4403
4404 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4405 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004406 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004407 xmlFreeElementContent(last);
4408 if (ret != NULL)
4409 xmlFreeElementContent(ret);
4410 return(NULL);
4411 }
4412 if (last == NULL) {
4413 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004414 if (ret != NULL)
4415 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004416 ret = cur = op;
4417 } else {
4418 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004419 if (op != NULL)
4420 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004421 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004422 if (last != NULL)
4423 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004424 cur =op;
4425 last = NULL;
4426 }
4427 } else {
4428 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4430 ctxt->sax->error(ctxt->userData,
4431 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4432 ctxt->wellFormed = 0;
4433 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004434 if (ret != NULL)
4435 xmlFreeElementContent(ret);
4436 return(NULL);
4437 }
4438 GROW;
4439 SKIP_BLANKS;
4440 GROW;
4441 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004442 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004443 /* Recurse on second child */
4444 NEXT;
4445 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004446 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004447 SKIP_BLANKS;
4448 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004449 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004450 if (elem == NULL) {
4451 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4453 ctxt->sax->error(ctxt->userData,
4454 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4455 ctxt->wellFormed = 0;
4456 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004457 if (ret != NULL)
4458 xmlFreeElementContent(ret);
4459 return(NULL);
4460 }
4461 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4462 xmlFree(elem);
4463 if (RAW == '?') {
4464 last->ocur = XML_ELEMENT_CONTENT_OPT;
4465 NEXT;
4466 } else if (RAW == '*') {
4467 last->ocur = XML_ELEMENT_CONTENT_MULT;
4468 NEXT;
4469 } else if (RAW == '+') {
4470 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4471 NEXT;
4472 } else {
4473 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4474 }
4475 }
4476 SKIP_BLANKS;
4477 GROW;
4478 }
4479 if ((cur != NULL) && (last != NULL)) {
4480 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004481 if (last != NULL)
4482 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004483 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004484 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4485 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4486 if (ctxt->vctxt.error != NULL)
4487 ctxt->vctxt.error(ctxt->vctxt.userData,
4488"Element content declaration doesn't start and stop in the same entity\n");
4489 ctxt->valid = 0;
4490 }
Owen Taylor3473f882001-02-23 17:55:21 +00004491 NEXT;
4492 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004493 if (ret != NULL)
4494 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004495 NEXT;
4496 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004497 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004498 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004499 cur = ret;
4500 /*
4501 * Some normalization:
4502 * (a | b* | c?)* == (a | b | c)*
4503 */
4504 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4505 if ((cur->c1 != NULL) &&
4506 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4507 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4508 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4509 if ((cur->c2 != NULL) &&
4510 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4511 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4512 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4513 cur = cur->c2;
4514 }
4515 }
Owen Taylor3473f882001-02-23 17:55:21 +00004516 NEXT;
4517 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004518 if (ret != NULL) {
4519 int found = 0;
4520
Daniel Veillarde470df72001-04-18 21:41:07 +00004521 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004522 /*
4523 * Some normalization:
4524 * (a | b*)+ == (a | b)*
4525 * (a | b?)+ == (a | b)*
4526 */
4527 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4528 if ((cur->c1 != NULL) &&
4529 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4530 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4531 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4532 found = 1;
4533 }
4534 if ((cur->c2 != NULL) &&
4535 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4536 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4537 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4538 found = 1;
4539 }
4540 cur = cur->c2;
4541 }
4542 if (found)
4543 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4544 }
Owen Taylor3473f882001-02-23 17:55:21 +00004545 NEXT;
4546 }
4547 return(ret);
4548}
4549
4550/**
4551 * xmlParseElementContentDecl:
4552 * @ctxt: an XML parser context
4553 * @name: the name of the element being defined.
4554 * @result: the Element Content pointer will be stored here if any
4555 *
4556 * parse the declaration for an Element content either Mixed or Children,
4557 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4558 *
4559 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4560 *
4561 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4562 */
4563
4564int
4565xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4566 xmlElementContentPtr *result) {
4567
4568 xmlElementContentPtr tree = NULL;
4569 xmlParserInputPtr input = ctxt->input;
4570 int res;
4571
4572 *result = NULL;
4573
4574 if (RAW != '(') {
4575 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4577 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004578 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004579 ctxt->wellFormed = 0;
4580 ctxt->disableSAX = 1;
4581 return(-1);
4582 }
4583 NEXT;
4584 GROW;
4585 SKIP_BLANKS;
4586 if ((RAW == '#') && (NXT(1) == 'P') &&
4587 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4588 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4589 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004590 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004591 res = XML_ELEMENT_TYPE_MIXED;
4592 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004593 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004594 res = XML_ELEMENT_TYPE_ELEMENT;
4595 }
Owen Taylor3473f882001-02-23 17:55:21 +00004596 SKIP_BLANKS;
4597 *result = tree;
4598 return(res);
4599}
4600
4601/**
4602 * xmlParseElementDecl:
4603 * @ctxt: an XML parser context
4604 *
4605 * parse an Element declaration.
4606 *
4607 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4608 *
4609 * [ VC: Unique Element Type Declaration ]
4610 * No element type may be declared more than once
4611 *
4612 * Returns the type of the element, or -1 in case of error
4613 */
4614int
4615xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4616 xmlChar *name;
4617 int ret = -1;
4618 xmlElementContentPtr content = NULL;
4619
4620 GROW;
4621 if ((RAW == '<') && (NXT(1) == '!') &&
4622 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4623 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4624 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4625 (NXT(8) == 'T')) {
4626 xmlParserInputPtr input = ctxt->input;
4627
4628 SKIP(9);
4629 if (!IS_BLANK(CUR)) {
4630 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4632 ctxt->sax->error(ctxt->userData,
4633 "Space required after 'ELEMENT'\n");
4634 ctxt->wellFormed = 0;
4635 ctxt->disableSAX = 1;
4636 }
4637 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004638 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004639 if (name == NULL) {
4640 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4642 ctxt->sax->error(ctxt->userData,
4643 "xmlParseElementDecl: no name for Element\n");
4644 ctxt->wellFormed = 0;
4645 ctxt->disableSAX = 1;
4646 return(-1);
4647 }
4648 while ((RAW == 0) && (ctxt->inputNr > 1))
4649 xmlPopInput(ctxt);
4650 if (!IS_BLANK(CUR)) {
4651 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4653 ctxt->sax->error(ctxt->userData,
4654 "Space required after the element name\n");
4655 ctxt->wellFormed = 0;
4656 ctxt->disableSAX = 1;
4657 }
4658 SKIP_BLANKS;
4659 if ((RAW == 'E') && (NXT(1) == 'M') &&
4660 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4661 (NXT(4) == 'Y')) {
4662 SKIP(5);
4663 /*
4664 * Element must always be empty.
4665 */
4666 ret = XML_ELEMENT_TYPE_EMPTY;
4667 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4668 (NXT(2) == 'Y')) {
4669 SKIP(3);
4670 /*
4671 * Element is a generic container.
4672 */
4673 ret = XML_ELEMENT_TYPE_ANY;
4674 } else if (RAW == '(') {
4675 ret = xmlParseElementContentDecl(ctxt, name, &content);
4676 } else {
4677 /*
4678 * [ WFC: PEs in Internal Subset ] error handling.
4679 */
4680 if ((RAW == '%') && (ctxt->external == 0) &&
4681 (ctxt->inputNr == 1)) {
4682 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4684 ctxt->sax->error(ctxt->userData,
4685 "PEReference: forbidden within markup decl in internal subset\n");
4686 } else {
4687 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4689 ctxt->sax->error(ctxt->userData,
4690 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4691 }
4692 ctxt->wellFormed = 0;
4693 ctxt->disableSAX = 1;
4694 if (name != NULL) xmlFree(name);
4695 return(-1);
4696 }
4697
4698 SKIP_BLANKS;
4699 /*
4700 * Pop-up of finished entities.
4701 */
4702 while ((RAW == 0) && (ctxt->inputNr > 1))
4703 xmlPopInput(ctxt);
4704 SKIP_BLANKS;
4705
4706 if (RAW != '>') {
4707 ctxt->errNo = XML_ERR_GT_REQUIRED;
4708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4709 ctxt->sax->error(ctxt->userData,
4710 "xmlParseElementDecl: expected '>' at the end\n");
4711 ctxt->wellFormed = 0;
4712 ctxt->disableSAX = 1;
4713 } else {
4714 if (input != ctxt->input) {
4715 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4717 ctxt->sax->error(ctxt->userData,
4718"Element declaration doesn't start and stop in the same entity\n");
4719 ctxt->wellFormed = 0;
4720 ctxt->disableSAX = 1;
4721 }
4722
4723 NEXT;
4724 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4725 (ctxt->sax->elementDecl != NULL))
4726 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4727 content);
4728 }
4729 if (content != NULL) {
4730 xmlFreeElementContent(content);
4731 }
4732 if (name != NULL) {
4733 xmlFree(name);
4734 }
4735 }
4736 return(ret);
4737}
4738
4739/**
Owen Taylor3473f882001-02-23 17:55:21 +00004740 * xmlParseConditionalSections
4741 * @ctxt: an XML parser context
4742 *
4743 * [61] conditionalSect ::= includeSect | ignoreSect
4744 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4745 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4746 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4747 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4748 */
4749
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004750static void
Owen Taylor3473f882001-02-23 17:55:21 +00004751xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4752 SKIP(3);
4753 SKIP_BLANKS;
4754 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4755 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4756 (NXT(6) == 'E')) {
4757 SKIP(7);
4758 SKIP_BLANKS;
4759 if (RAW != '[') {
4760 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4762 ctxt->sax->error(ctxt->userData,
4763 "XML conditional section '[' expected\n");
4764 ctxt->wellFormed = 0;
4765 ctxt->disableSAX = 1;
4766 } else {
4767 NEXT;
4768 }
4769 if (xmlParserDebugEntities) {
4770 if ((ctxt->input != NULL) && (ctxt->input->filename))
4771 xmlGenericError(xmlGenericErrorContext,
4772 "%s(%d): ", ctxt->input->filename,
4773 ctxt->input->line);
4774 xmlGenericError(xmlGenericErrorContext,
4775 "Entering INCLUDE Conditional Section\n");
4776 }
4777
4778 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4779 (NXT(2) != '>'))) {
4780 const xmlChar *check = CUR_PTR;
4781 int cons = ctxt->input->consumed;
4782 int tok = ctxt->token;
4783
4784 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4785 xmlParseConditionalSections(ctxt);
4786 } else if (IS_BLANK(CUR)) {
4787 NEXT;
4788 } else if (RAW == '%') {
4789 xmlParsePEReference(ctxt);
4790 } else
4791 xmlParseMarkupDecl(ctxt);
4792
4793 /*
4794 * Pop-up of finished entities.
4795 */
4796 while ((RAW == 0) && (ctxt->inputNr > 1))
4797 xmlPopInput(ctxt);
4798
4799 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4800 (tok == ctxt->token)) {
4801 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4803 ctxt->sax->error(ctxt->userData,
4804 "Content error in the external subset\n");
4805 ctxt->wellFormed = 0;
4806 ctxt->disableSAX = 1;
4807 break;
4808 }
4809 }
4810 if (xmlParserDebugEntities) {
4811 if ((ctxt->input != NULL) && (ctxt->input->filename))
4812 xmlGenericError(xmlGenericErrorContext,
4813 "%s(%d): ", ctxt->input->filename,
4814 ctxt->input->line);
4815 xmlGenericError(xmlGenericErrorContext,
4816 "Leaving INCLUDE Conditional Section\n");
4817 }
4818
4819 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4820 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4821 int state;
4822 int instate;
4823 int depth = 0;
4824
4825 SKIP(6);
4826 SKIP_BLANKS;
4827 if (RAW != '[') {
4828 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4829 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4830 ctxt->sax->error(ctxt->userData,
4831 "XML conditional section '[' expected\n");
4832 ctxt->wellFormed = 0;
4833 ctxt->disableSAX = 1;
4834 } else {
4835 NEXT;
4836 }
4837 if (xmlParserDebugEntities) {
4838 if ((ctxt->input != NULL) && (ctxt->input->filename))
4839 xmlGenericError(xmlGenericErrorContext,
4840 "%s(%d): ", ctxt->input->filename,
4841 ctxt->input->line);
4842 xmlGenericError(xmlGenericErrorContext,
4843 "Entering IGNORE Conditional Section\n");
4844 }
4845
4846 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004847 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004848 * But disable SAX event generating DTD building in the meantime
4849 */
4850 state = ctxt->disableSAX;
4851 instate = ctxt->instate;
4852 ctxt->disableSAX = 1;
4853 ctxt->instate = XML_PARSER_IGNORE;
4854
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004855 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004856 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4857 depth++;
4858 SKIP(3);
4859 continue;
4860 }
4861 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4862 if (--depth >= 0) SKIP(3);
4863 continue;
4864 }
4865 NEXT;
4866 continue;
4867 }
4868
4869 ctxt->disableSAX = state;
4870 ctxt->instate = instate;
4871
4872 if (xmlParserDebugEntities) {
4873 if ((ctxt->input != NULL) && (ctxt->input->filename))
4874 xmlGenericError(xmlGenericErrorContext,
4875 "%s(%d): ", ctxt->input->filename,
4876 ctxt->input->line);
4877 xmlGenericError(xmlGenericErrorContext,
4878 "Leaving IGNORE Conditional Section\n");
4879 }
4880
4881 } else {
4882 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4884 ctxt->sax->error(ctxt->userData,
4885 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4886 ctxt->wellFormed = 0;
4887 ctxt->disableSAX = 1;
4888 }
4889
4890 if (RAW == 0)
4891 SHRINK;
4892
4893 if (RAW == 0) {
4894 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4896 ctxt->sax->error(ctxt->userData,
4897 "XML conditional section not closed\n");
4898 ctxt->wellFormed = 0;
4899 ctxt->disableSAX = 1;
4900 } else {
4901 SKIP(3);
4902 }
4903}
4904
4905/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004906 * xmlParseMarkupDecl:
4907 * @ctxt: an XML parser context
4908 *
4909 * parse Markup declarations
4910 *
4911 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4912 * NotationDecl | PI | Comment
4913 *
4914 * [ VC: Proper Declaration/PE Nesting ]
4915 * Parameter-entity replacement text must be properly nested with
4916 * markup declarations. That is to say, if either the first character
4917 * or the last character of a markup declaration (markupdecl above) is
4918 * contained in the replacement text for a parameter-entity reference,
4919 * both must be contained in the same replacement text.
4920 *
4921 * [ WFC: PEs in Internal Subset ]
4922 * In the internal DTD subset, parameter-entity references can occur
4923 * only where markup declarations can occur, not within markup declarations.
4924 * (This does not apply to references that occur in external parameter
4925 * entities or to the external subset.)
4926 */
4927void
4928xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4929 GROW;
4930 xmlParseElementDecl(ctxt);
4931 xmlParseAttributeListDecl(ctxt);
4932 xmlParseEntityDecl(ctxt);
4933 xmlParseNotationDecl(ctxt);
4934 xmlParsePI(ctxt);
4935 xmlParseComment(ctxt);
4936 /*
4937 * This is only for internal subset. On external entities,
4938 * the replacement is done before parsing stage
4939 */
4940 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4941 xmlParsePEReference(ctxt);
4942
4943 /*
4944 * Conditional sections are allowed from entities included
4945 * by PE References in the internal subset.
4946 */
4947 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4948 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4949 xmlParseConditionalSections(ctxt);
4950 }
4951 }
4952
4953 ctxt->instate = XML_PARSER_DTD;
4954}
4955
4956/**
4957 * xmlParseTextDecl:
4958 * @ctxt: an XML parser context
4959 *
4960 * parse an XML declaration header for external entities
4961 *
4962 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4963 *
4964 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4965 */
4966
4967void
4968xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4969 xmlChar *version;
4970
4971 /*
4972 * We know that '<?xml' is here.
4973 */
4974 if ((RAW == '<') && (NXT(1) == '?') &&
4975 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4976 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4977 SKIP(5);
4978 } else {
4979 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4981 ctxt->sax->error(ctxt->userData,
4982 "Text declaration '<?xml' required\n");
4983 ctxt->wellFormed = 0;
4984 ctxt->disableSAX = 1;
4985
4986 return;
4987 }
4988
4989 if (!IS_BLANK(CUR)) {
4990 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4992 ctxt->sax->error(ctxt->userData,
4993 "Space needed after '<?xml'\n");
4994 ctxt->wellFormed = 0;
4995 ctxt->disableSAX = 1;
4996 }
4997 SKIP_BLANKS;
4998
4999 /*
5000 * We may have the VersionInfo here.
5001 */
5002 version = xmlParseVersionInfo(ctxt);
5003 if (version == NULL)
5004 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005005 else {
5006 if (!IS_BLANK(CUR)) {
5007 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5009 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5010 ctxt->wellFormed = 0;
5011 ctxt->disableSAX = 1;
5012 }
5013 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005014 ctxt->input->version = version;
5015
5016 /*
5017 * We must have the encoding declaration
5018 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005019 xmlParseEncodingDecl(ctxt);
5020 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5021 /*
5022 * The XML REC instructs us to stop parsing right here
5023 */
5024 return;
5025 }
5026
5027 SKIP_BLANKS;
5028 if ((RAW == '?') && (NXT(1) == '>')) {
5029 SKIP(2);
5030 } else if (RAW == '>') {
5031 /* Deprecated old WD ... */
5032 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5034 ctxt->sax->error(ctxt->userData,
5035 "XML declaration must end-up with '?>'\n");
5036 ctxt->wellFormed = 0;
5037 ctxt->disableSAX = 1;
5038 NEXT;
5039 } else {
5040 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5042 ctxt->sax->error(ctxt->userData,
5043 "parsing XML declaration: '?>' expected\n");
5044 ctxt->wellFormed = 0;
5045 ctxt->disableSAX = 1;
5046 MOVETO_ENDTAG(CUR_PTR);
5047 NEXT;
5048 }
5049}
5050
5051/**
Owen Taylor3473f882001-02-23 17:55:21 +00005052 * xmlParseExternalSubset:
5053 * @ctxt: an XML parser context
5054 * @ExternalID: the external identifier
5055 * @SystemID: the system identifier (or URL)
5056 *
5057 * parse Markup declarations from an external subset
5058 *
5059 * [30] extSubset ::= textDecl? extSubsetDecl
5060 *
5061 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5062 */
5063void
5064xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5065 const xmlChar *SystemID) {
5066 GROW;
5067 if ((RAW == '<') && (NXT(1) == '?') &&
5068 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5069 (NXT(4) == 'l')) {
5070 xmlParseTextDecl(ctxt);
5071 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5072 /*
5073 * The XML REC instructs us to stop parsing right here
5074 */
5075 ctxt->instate = XML_PARSER_EOF;
5076 return;
5077 }
5078 }
5079 if (ctxt->myDoc == NULL) {
5080 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5081 }
5082 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5083 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5084
5085 ctxt->instate = XML_PARSER_DTD;
5086 ctxt->external = 1;
5087 while (((RAW == '<') && (NXT(1) == '?')) ||
5088 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005089 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005090 const xmlChar *check = CUR_PTR;
5091 int cons = ctxt->input->consumed;
5092 int tok = ctxt->token;
5093
5094 GROW;
5095 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5096 xmlParseConditionalSections(ctxt);
5097 } else if (IS_BLANK(CUR)) {
5098 NEXT;
5099 } else if (RAW == '%') {
5100 xmlParsePEReference(ctxt);
5101 } else
5102 xmlParseMarkupDecl(ctxt);
5103
5104 /*
5105 * Pop-up of finished entities.
5106 */
5107 while ((RAW == 0) && (ctxt->inputNr > 1))
5108 xmlPopInput(ctxt);
5109
5110 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5111 (tok == ctxt->token)) {
5112 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5114 ctxt->sax->error(ctxt->userData,
5115 "Content error in the external subset\n");
5116 ctxt->wellFormed = 0;
5117 ctxt->disableSAX = 1;
5118 break;
5119 }
5120 }
5121
5122 if (RAW != 0) {
5123 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5125 ctxt->sax->error(ctxt->userData,
5126 "Extra content at the end of the document\n");
5127 ctxt->wellFormed = 0;
5128 ctxt->disableSAX = 1;
5129 }
5130
5131}
5132
5133/**
5134 * xmlParseReference:
5135 * @ctxt: an XML parser context
5136 *
5137 * parse and handle entity references in content, depending on the SAX
5138 * interface, this may end-up in a call to character() if this is a
5139 * CharRef, a predefined entity, if there is no reference() callback.
5140 * or if the parser was asked to switch to that mode.
5141 *
5142 * [67] Reference ::= EntityRef | CharRef
5143 */
5144void
5145xmlParseReference(xmlParserCtxtPtr ctxt) {
5146 xmlEntityPtr ent;
5147 xmlChar *val;
5148 if (RAW != '&') return;
5149
5150 if (NXT(1) == '#') {
5151 int i = 0;
5152 xmlChar out[10];
5153 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005154 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005155
5156 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5157 /*
5158 * So we are using non-UTF-8 buffers
5159 * Check that the char fit on 8bits, if not
5160 * generate a CharRef.
5161 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005162 if (value <= 0xFF) {
5163 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005164 out[1] = 0;
5165 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5166 (!ctxt->disableSAX))
5167 ctxt->sax->characters(ctxt->userData, out, 1);
5168 } else {
5169 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005170 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005171 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005172 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005173 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5174 (!ctxt->disableSAX))
5175 ctxt->sax->reference(ctxt->userData, out);
5176 }
5177 } else {
5178 /*
5179 * Just encode the value in UTF-8
5180 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005181 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005182 out[i] = 0;
5183 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5184 (!ctxt->disableSAX))
5185 ctxt->sax->characters(ctxt->userData, out, i);
5186 }
5187 } else {
5188 ent = xmlParseEntityRef(ctxt);
5189 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005190 if (!ctxt->wellFormed)
5191 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005192 if ((ent->name != NULL) &&
5193 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5194 xmlNodePtr list = NULL;
5195 int ret;
5196
5197
5198 /*
5199 * The first reference to the entity trigger a parsing phase
5200 * where the ent->children is filled with the result from
5201 * the parsing.
5202 */
5203 if (ent->children == NULL) {
5204 xmlChar *value;
5205 value = ent->content;
5206
5207 /*
5208 * Check that this entity is well formed
5209 */
5210 if ((value != NULL) &&
5211 (value[1] == 0) && (value[0] == '<') &&
5212 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5213 /*
5214 * DONE: get definite answer on this !!!
5215 * Lots of entity decls are used to declare a single
5216 * char
5217 * <!ENTITY lt "<">
5218 * Which seems to be valid since
5219 * 2.4: The ampersand character (&) and the left angle
5220 * bracket (<) may appear in their literal form only
5221 * when used ... They are also legal within the literal
5222 * entity value of an internal entity declaration;i
5223 * see "4.3.2 Well-Formed Parsed Entities".
5224 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5225 * Looking at the OASIS test suite and James Clark
5226 * tests, this is broken. However the XML REC uses
5227 * it. Is the XML REC not well-formed ????
5228 * This is a hack to avoid this problem
5229 *
5230 * ANSWER: since lt gt amp .. are already defined,
5231 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005232 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005233 * is lousy but acceptable.
5234 */
5235 list = xmlNewDocText(ctxt->myDoc, value);
5236 if (list != NULL) {
5237 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5238 (ent->children == NULL)) {
5239 ent->children = list;
5240 ent->last = list;
5241 list->parent = (xmlNodePtr) ent;
5242 } else {
5243 xmlFreeNodeList(list);
5244 }
5245 } else if (list != NULL) {
5246 xmlFreeNodeList(list);
5247 }
5248 } else {
5249 /*
5250 * 4.3.2: An internal general parsed entity is well-formed
5251 * if its replacement text matches the production labeled
5252 * content.
5253 */
5254 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5255 ctxt->depth++;
5256 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5257 ctxt->sax, NULL, ctxt->depth,
5258 value, &list);
5259 ctxt->depth--;
5260 } else if (ent->etype ==
5261 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5262 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005263 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005264 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005265 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005266 ctxt->depth--;
5267 } else {
5268 ret = -1;
5269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5270 ctxt->sax->error(ctxt->userData,
5271 "Internal: invalid entity type\n");
5272 }
5273 if (ret == XML_ERR_ENTITY_LOOP) {
5274 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5275 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5276 ctxt->sax->error(ctxt->userData,
5277 "Detected entity reference loop\n");
5278 ctxt->wellFormed = 0;
5279 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005280 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005281 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005282 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5283 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005284 (ent->children == NULL)) {
5285 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005286 if (ctxt->replaceEntities) {
5287 /*
5288 * Prune it directly in the generated document
5289 * except for single text nodes.
5290 */
5291 if ((list->type == XML_TEXT_NODE) &&
5292 (list->next == NULL)) {
5293 list->parent = (xmlNodePtr) ent;
5294 list = NULL;
5295 } else {
5296 while (list != NULL) {
5297 list->parent = (xmlNodePtr) ctxt->node;
5298 if (list->next == NULL)
5299 ent->last = list;
5300 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005301 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005302 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005303 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5304 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005305 }
5306 } else {
5307 while (list != NULL) {
5308 list->parent = (xmlNodePtr) ent;
5309 if (list->next == NULL)
5310 ent->last = list;
5311 list = list->next;
5312 }
Owen Taylor3473f882001-02-23 17:55:21 +00005313 }
5314 } else {
5315 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005316 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005317 }
5318 } else if (ret > 0) {
5319 ctxt->errNo = ret;
5320 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5321 ctxt->sax->error(ctxt->userData,
5322 "Entity value required\n");
5323 ctxt->wellFormed = 0;
5324 ctxt->disableSAX = 1;
5325 } else if (list != NULL) {
5326 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005327 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005328 }
5329 }
5330 }
5331 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5332 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5333 /*
5334 * Create a node.
5335 */
5336 ctxt->sax->reference(ctxt->userData, ent->name);
5337 return;
5338 } else if (ctxt->replaceEntities) {
5339 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5340 /*
5341 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005342 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005343 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005344 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005345 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005346 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005347 cur = ent->children;
5348 while (cur != NULL) {
5349 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005350 if (firstChild == NULL){
5351 firstChild = new;
5352 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005353 xmlAddChild(ctxt->node, new);
5354 if (cur == ent->last)
5355 break;
5356 cur = cur->next;
5357 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005358 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5359 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005360 } else {
5361 /*
5362 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005363 * node with a possible previous text one which
5364 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005365 */
5366 if (ent->children->type == XML_TEXT_NODE)
5367 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5368 if ((ent->last != ent->children) &&
5369 (ent->last->type == XML_TEXT_NODE))
5370 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5371 xmlAddChildList(ctxt->node, ent->children);
5372 }
5373
Owen Taylor3473f882001-02-23 17:55:21 +00005374 /*
5375 * This is to avoid a nasty side effect, see
5376 * characters() in SAX.c
5377 */
5378 ctxt->nodemem = 0;
5379 ctxt->nodelen = 0;
5380 return;
5381 } else {
5382 /*
5383 * Probably running in SAX mode
5384 */
5385 xmlParserInputPtr input;
5386
5387 input = xmlNewEntityInputStream(ctxt, ent);
5388 xmlPushInput(ctxt, input);
5389 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5390 (RAW == '<') && (NXT(1) == '?') &&
5391 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5392 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5393 xmlParseTextDecl(ctxt);
5394 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5395 /*
5396 * The XML REC instructs us to stop parsing right here
5397 */
5398 ctxt->instate = XML_PARSER_EOF;
5399 return;
5400 }
5401 if (input->standalone == 1) {
5402 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5404 ctxt->sax->error(ctxt->userData,
5405 "external parsed entities cannot be standalone\n");
5406 ctxt->wellFormed = 0;
5407 ctxt->disableSAX = 1;
5408 }
5409 }
5410 return;
5411 }
5412 }
5413 } else {
5414 val = ent->content;
5415 if (val == NULL) return;
5416 /*
5417 * inline the entity.
5418 */
5419 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5420 (!ctxt->disableSAX))
5421 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5422 }
5423 }
5424}
5425
5426/**
5427 * xmlParseEntityRef:
5428 * @ctxt: an XML parser context
5429 *
5430 * parse ENTITY references declarations
5431 *
5432 * [68] EntityRef ::= '&' Name ';'
5433 *
5434 * [ WFC: Entity Declared ]
5435 * In a document without any DTD, a document with only an internal DTD
5436 * subset which contains no parameter entity references, or a document
5437 * with "standalone='yes'", the Name given in the entity reference
5438 * must match that in an entity declaration, except that well-formed
5439 * documents need not declare any of the following entities: amp, lt,
5440 * gt, apos, quot. The declaration of a parameter entity must precede
5441 * any reference to it. Similarly, the declaration of a general entity
5442 * must precede any reference to it which appears in a default value in an
5443 * attribute-list declaration. Note that if entities are declared in the
5444 * external subset or in external parameter entities, a non-validating
5445 * processor is not obligated to read and process their declarations;
5446 * for such documents, the rule that an entity must be declared is a
5447 * well-formedness constraint only if standalone='yes'.
5448 *
5449 * [ WFC: Parsed Entity ]
5450 * An entity reference must not contain the name of an unparsed entity
5451 *
5452 * Returns the xmlEntityPtr if found, or NULL otherwise.
5453 */
5454xmlEntityPtr
5455xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5456 xmlChar *name;
5457 xmlEntityPtr ent = NULL;
5458
5459 GROW;
5460
5461 if (RAW == '&') {
5462 NEXT;
5463 name = xmlParseName(ctxt);
5464 if (name == NULL) {
5465 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5467 ctxt->sax->error(ctxt->userData,
5468 "xmlParseEntityRef: no name\n");
5469 ctxt->wellFormed = 0;
5470 ctxt->disableSAX = 1;
5471 } else {
5472 if (RAW == ';') {
5473 NEXT;
5474 /*
5475 * Ask first SAX for entity resolution, otherwise try the
5476 * predefined set.
5477 */
5478 if (ctxt->sax != NULL) {
5479 if (ctxt->sax->getEntity != NULL)
5480 ent = ctxt->sax->getEntity(ctxt->userData, name);
5481 if (ent == NULL)
5482 ent = xmlGetPredefinedEntity(name);
5483 }
5484 /*
5485 * [ WFC: Entity Declared ]
5486 * In a document without any DTD, a document with only an
5487 * internal DTD subset which contains no parameter entity
5488 * references, or a document with "standalone='yes'", the
5489 * Name given in the entity reference must match that in an
5490 * entity declaration, except that well-formed documents
5491 * need not declare any of the following entities: amp, lt,
5492 * gt, apos, quot.
5493 * The declaration of a parameter entity must precede any
5494 * reference to it.
5495 * Similarly, the declaration of a general entity must
5496 * precede any reference to it which appears in a default
5497 * value in an attribute-list declaration. Note that if
5498 * entities are declared in the external subset or in
5499 * external parameter entities, a non-validating processor
5500 * is not obligated to read and process their declarations;
5501 * for such documents, the rule that an entity must be
5502 * declared is a well-formedness constraint only if
5503 * standalone='yes'.
5504 */
5505 if (ent == NULL) {
5506 if ((ctxt->standalone == 1) ||
5507 ((ctxt->hasExternalSubset == 0) &&
5508 (ctxt->hasPErefs == 0))) {
5509 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5511 ctxt->sax->error(ctxt->userData,
5512 "Entity '%s' not defined\n", name);
5513 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005514 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005515 ctxt->disableSAX = 1;
5516 } else {
5517 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005518 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005519 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005520 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005521 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005522 }
5523 }
5524
5525 /*
5526 * [ WFC: Parsed Entity ]
5527 * An entity reference must not contain the name of an
5528 * unparsed entity
5529 */
5530 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5531 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5533 ctxt->sax->error(ctxt->userData,
5534 "Entity reference to unparsed entity %s\n", name);
5535 ctxt->wellFormed = 0;
5536 ctxt->disableSAX = 1;
5537 }
5538
5539 /*
5540 * [ WFC: No External Entity References ]
5541 * Attribute values cannot contain direct or indirect
5542 * entity references to external entities.
5543 */
5544 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5545 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5546 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5548 ctxt->sax->error(ctxt->userData,
5549 "Attribute references external entity '%s'\n", name);
5550 ctxt->wellFormed = 0;
5551 ctxt->disableSAX = 1;
5552 }
5553 /*
5554 * [ WFC: No < in Attribute Values ]
5555 * The replacement text of any entity referred to directly or
5556 * indirectly in an attribute value (other than "&lt;") must
5557 * not contain a <.
5558 */
5559 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5560 (ent != NULL) &&
5561 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5562 (ent->content != NULL) &&
5563 (xmlStrchr(ent->content, '<'))) {
5564 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5566 ctxt->sax->error(ctxt->userData,
5567 "'<' in entity '%s' is not allowed in attributes values\n", name);
5568 ctxt->wellFormed = 0;
5569 ctxt->disableSAX = 1;
5570 }
5571
5572 /*
5573 * Internal check, no parameter entities here ...
5574 */
5575 else {
5576 switch (ent->etype) {
5577 case XML_INTERNAL_PARAMETER_ENTITY:
5578 case XML_EXTERNAL_PARAMETER_ENTITY:
5579 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5581 ctxt->sax->error(ctxt->userData,
5582 "Attempt to reference the parameter entity '%s'\n", name);
5583 ctxt->wellFormed = 0;
5584 ctxt->disableSAX = 1;
5585 break;
5586 default:
5587 break;
5588 }
5589 }
5590
5591 /*
5592 * [ WFC: No Recursion ]
5593 * A parsed entity must not contain a recursive reference
5594 * to itself, either directly or indirectly.
5595 * Done somewhere else
5596 */
5597
5598 } else {
5599 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5601 ctxt->sax->error(ctxt->userData,
5602 "xmlParseEntityRef: expecting ';'\n");
5603 ctxt->wellFormed = 0;
5604 ctxt->disableSAX = 1;
5605 }
5606 xmlFree(name);
5607 }
5608 }
5609 return(ent);
5610}
5611
5612/**
5613 * xmlParseStringEntityRef:
5614 * @ctxt: an XML parser context
5615 * @str: a pointer to an index in the string
5616 *
5617 * parse ENTITY references declarations, but this version parses it from
5618 * a string value.
5619 *
5620 * [68] EntityRef ::= '&' Name ';'
5621 *
5622 * [ WFC: Entity Declared ]
5623 * In a document without any DTD, a document with only an internal DTD
5624 * subset which contains no parameter entity references, or a document
5625 * with "standalone='yes'", the Name given in the entity reference
5626 * must match that in an entity declaration, except that well-formed
5627 * documents need not declare any of the following entities: amp, lt,
5628 * gt, apos, quot. The declaration of a parameter entity must precede
5629 * any reference to it. Similarly, the declaration of a general entity
5630 * must precede any reference to it which appears in a default value in an
5631 * attribute-list declaration. Note that if entities are declared in the
5632 * external subset or in external parameter entities, a non-validating
5633 * processor is not obligated to read and process their declarations;
5634 * for such documents, the rule that an entity must be declared is a
5635 * well-formedness constraint only if standalone='yes'.
5636 *
5637 * [ WFC: Parsed Entity ]
5638 * An entity reference must not contain the name of an unparsed entity
5639 *
5640 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5641 * is updated to the current location in the string.
5642 */
5643xmlEntityPtr
5644xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5645 xmlChar *name;
5646 const xmlChar *ptr;
5647 xmlChar cur;
5648 xmlEntityPtr ent = NULL;
5649
5650 if ((str == NULL) || (*str == NULL))
5651 return(NULL);
5652 ptr = *str;
5653 cur = *ptr;
5654 if (cur == '&') {
5655 ptr++;
5656 cur = *ptr;
5657 name = xmlParseStringName(ctxt, &ptr);
5658 if (name == NULL) {
5659 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5661 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005662 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005663 ctxt->wellFormed = 0;
5664 ctxt->disableSAX = 1;
5665 } else {
5666 if (*ptr == ';') {
5667 ptr++;
5668 /*
5669 * Ask first SAX for entity resolution, otherwise try the
5670 * predefined set.
5671 */
5672 if (ctxt->sax != NULL) {
5673 if (ctxt->sax->getEntity != NULL)
5674 ent = ctxt->sax->getEntity(ctxt->userData, name);
5675 if (ent == NULL)
5676 ent = xmlGetPredefinedEntity(name);
5677 }
5678 /*
5679 * [ WFC: Entity Declared ]
5680 * In a document without any DTD, a document with only an
5681 * internal DTD subset which contains no parameter entity
5682 * references, or a document with "standalone='yes'", the
5683 * Name given in the entity reference must match that in an
5684 * entity declaration, except that well-formed documents
5685 * need not declare any of the following entities: amp, lt,
5686 * gt, apos, quot.
5687 * The declaration of a parameter entity must precede any
5688 * reference to it.
5689 * Similarly, the declaration of a general entity must
5690 * precede any reference to it which appears in a default
5691 * value in an attribute-list declaration. Note that if
5692 * entities are declared in the external subset or in
5693 * external parameter entities, a non-validating processor
5694 * is not obligated to read and process their declarations;
5695 * for such documents, the rule that an entity must be
5696 * declared is a well-formedness constraint only if
5697 * standalone='yes'.
5698 */
5699 if (ent == NULL) {
5700 if ((ctxt->standalone == 1) ||
5701 ((ctxt->hasExternalSubset == 0) &&
5702 (ctxt->hasPErefs == 0))) {
5703 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5705 ctxt->sax->error(ctxt->userData,
5706 "Entity '%s' not defined\n", name);
5707 ctxt->wellFormed = 0;
5708 ctxt->disableSAX = 1;
5709 } else {
5710 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5711 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5712 ctxt->sax->warning(ctxt->userData,
5713 "Entity '%s' not defined\n", name);
5714 }
5715 }
5716
5717 /*
5718 * [ WFC: Parsed Entity ]
5719 * An entity reference must not contain the name of an
5720 * unparsed entity
5721 */
5722 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5723 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5724 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5725 ctxt->sax->error(ctxt->userData,
5726 "Entity reference to unparsed entity %s\n", name);
5727 ctxt->wellFormed = 0;
5728 ctxt->disableSAX = 1;
5729 }
5730
5731 /*
5732 * [ WFC: No External Entity References ]
5733 * Attribute values cannot contain direct or indirect
5734 * entity references to external entities.
5735 */
5736 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5737 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5738 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5740 ctxt->sax->error(ctxt->userData,
5741 "Attribute references external entity '%s'\n", name);
5742 ctxt->wellFormed = 0;
5743 ctxt->disableSAX = 1;
5744 }
5745 /*
5746 * [ WFC: No < in Attribute Values ]
5747 * The replacement text of any entity referred to directly or
5748 * indirectly in an attribute value (other than "&lt;") must
5749 * not contain a <.
5750 */
5751 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5752 (ent != NULL) &&
5753 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5754 (ent->content != NULL) &&
5755 (xmlStrchr(ent->content, '<'))) {
5756 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5758 ctxt->sax->error(ctxt->userData,
5759 "'<' in entity '%s' is not allowed in attributes values\n", name);
5760 ctxt->wellFormed = 0;
5761 ctxt->disableSAX = 1;
5762 }
5763
5764 /*
5765 * Internal check, no parameter entities here ...
5766 */
5767 else {
5768 switch (ent->etype) {
5769 case XML_INTERNAL_PARAMETER_ENTITY:
5770 case XML_EXTERNAL_PARAMETER_ENTITY:
5771 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5773 ctxt->sax->error(ctxt->userData,
5774 "Attempt to reference the parameter entity '%s'\n", name);
5775 ctxt->wellFormed = 0;
5776 ctxt->disableSAX = 1;
5777 break;
5778 default:
5779 break;
5780 }
5781 }
5782
5783 /*
5784 * [ WFC: No Recursion ]
5785 * A parsed entity must not contain a recursive reference
5786 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005787 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005788 */
5789
5790 } else {
5791 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5793 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005794 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005795 ctxt->wellFormed = 0;
5796 ctxt->disableSAX = 1;
5797 }
5798 xmlFree(name);
5799 }
5800 }
5801 *str = ptr;
5802 return(ent);
5803}
5804
5805/**
5806 * xmlParsePEReference:
5807 * @ctxt: an XML parser context
5808 *
5809 * parse PEReference declarations
5810 * The entity content is handled directly by pushing it's content as
5811 * a new input stream.
5812 *
5813 * [69] PEReference ::= '%' Name ';'
5814 *
5815 * [ WFC: No Recursion ]
5816 * A parsed entity must not contain a recursive
5817 * reference to itself, either directly or indirectly.
5818 *
5819 * [ WFC: Entity Declared ]
5820 * In a document without any DTD, a document with only an internal DTD
5821 * subset which contains no parameter entity references, or a document
5822 * with "standalone='yes'", ... ... The declaration of a parameter
5823 * entity must precede any reference to it...
5824 *
5825 * [ VC: Entity Declared ]
5826 * In a document with an external subset or external parameter entities
5827 * with "standalone='no'", ... ... The declaration of a parameter entity
5828 * must precede any reference to it...
5829 *
5830 * [ WFC: In DTD ]
5831 * Parameter-entity references may only appear in the DTD.
5832 * NOTE: misleading but this is handled.
5833 */
5834void
5835xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5836 xmlChar *name;
5837 xmlEntityPtr entity = NULL;
5838 xmlParserInputPtr input;
5839
5840 if (RAW == '%') {
5841 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005842 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005843 if (name == NULL) {
5844 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5846 ctxt->sax->error(ctxt->userData,
5847 "xmlParsePEReference: no name\n");
5848 ctxt->wellFormed = 0;
5849 ctxt->disableSAX = 1;
5850 } else {
5851 if (RAW == ';') {
5852 NEXT;
5853 if ((ctxt->sax != NULL) &&
5854 (ctxt->sax->getParameterEntity != NULL))
5855 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5856 name);
5857 if (entity == NULL) {
5858 /*
5859 * [ WFC: Entity Declared ]
5860 * In a document without any DTD, a document with only an
5861 * internal DTD subset which contains no parameter entity
5862 * references, or a document with "standalone='yes'", ...
5863 * ... The declaration of a parameter entity must precede
5864 * any reference to it...
5865 */
5866 if ((ctxt->standalone == 1) ||
5867 ((ctxt->hasExternalSubset == 0) &&
5868 (ctxt->hasPErefs == 0))) {
5869 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5870 if ((!ctxt->disableSAX) &&
5871 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5872 ctxt->sax->error(ctxt->userData,
5873 "PEReference: %%%s; not found\n", name);
5874 ctxt->wellFormed = 0;
5875 ctxt->disableSAX = 1;
5876 } else {
5877 /*
5878 * [ VC: Entity Declared ]
5879 * In a document with an external subset or external
5880 * parameter entities with "standalone='no'", ...
5881 * ... The declaration of a parameter entity must precede
5882 * any reference to it...
5883 */
5884 if ((!ctxt->disableSAX) &&
5885 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5886 ctxt->sax->warning(ctxt->userData,
5887 "PEReference: %%%s; not found\n", name);
5888 ctxt->valid = 0;
5889 }
5890 } else {
5891 /*
5892 * Internal checking in case the entity quest barfed
5893 */
5894 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5895 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5896 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5897 ctxt->sax->warning(ctxt->userData,
5898 "Internal: %%%s; is not a parameter entity\n", name);
5899 } else {
5900 /*
5901 * TODO !!!
5902 * handle the extra spaces added before and after
5903 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5904 */
5905 input = xmlNewEntityInputStream(ctxt, entity);
5906 xmlPushInput(ctxt, input);
5907 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5908 (RAW == '<') && (NXT(1) == '?') &&
5909 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5910 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5911 xmlParseTextDecl(ctxt);
5912 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5913 /*
5914 * The XML REC instructs us to stop parsing
5915 * right here
5916 */
5917 ctxt->instate = XML_PARSER_EOF;
5918 xmlFree(name);
5919 return;
5920 }
5921 }
5922 if (ctxt->token == 0)
5923 ctxt->token = ' ';
5924 }
5925 }
5926 ctxt->hasPErefs = 1;
5927 } else {
5928 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5930 ctxt->sax->error(ctxt->userData,
5931 "xmlParsePEReference: expecting ';'\n");
5932 ctxt->wellFormed = 0;
5933 ctxt->disableSAX = 1;
5934 }
5935 xmlFree(name);
5936 }
5937 }
5938}
5939
5940/**
5941 * xmlParseStringPEReference:
5942 * @ctxt: an XML parser context
5943 * @str: a pointer to an index in the string
5944 *
5945 * parse PEReference declarations
5946 *
5947 * [69] PEReference ::= '%' Name ';'
5948 *
5949 * [ WFC: No Recursion ]
5950 * A parsed entity must not contain a recursive
5951 * reference to itself, either directly or indirectly.
5952 *
5953 * [ WFC: Entity Declared ]
5954 * In a document without any DTD, a document with only an internal DTD
5955 * subset which contains no parameter entity references, or a document
5956 * with "standalone='yes'", ... ... The declaration of a parameter
5957 * entity must precede any reference to it...
5958 *
5959 * [ VC: Entity Declared ]
5960 * In a document with an external subset or external parameter entities
5961 * with "standalone='no'", ... ... The declaration of a parameter entity
5962 * must precede any reference to it...
5963 *
5964 * [ WFC: In DTD ]
5965 * Parameter-entity references may only appear in the DTD.
5966 * NOTE: misleading but this is handled.
5967 *
5968 * Returns the string of the entity content.
5969 * str is updated to the current value of the index
5970 */
5971xmlEntityPtr
5972xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5973 const xmlChar *ptr;
5974 xmlChar cur;
5975 xmlChar *name;
5976 xmlEntityPtr entity = NULL;
5977
5978 if ((str == NULL) || (*str == NULL)) return(NULL);
5979 ptr = *str;
5980 cur = *ptr;
5981 if (cur == '%') {
5982 ptr++;
5983 cur = *ptr;
5984 name = xmlParseStringName(ctxt, &ptr);
5985 if (name == NULL) {
5986 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5988 ctxt->sax->error(ctxt->userData,
5989 "xmlParseStringPEReference: no name\n");
5990 ctxt->wellFormed = 0;
5991 ctxt->disableSAX = 1;
5992 } else {
5993 cur = *ptr;
5994 if (cur == ';') {
5995 ptr++;
5996 cur = *ptr;
5997 if ((ctxt->sax != NULL) &&
5998 (ctxt->sax->getParameterEntity != NULL))
5999 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6000 name);
6001 if (entity == NULL) {
6002 /*
6003 * [ WFC: Entity Declared ]
6004 * In a document without any DTD, a document with only an
6005 * internal DTD subset which contains no parameter entity
6006 * references, or a document with "standalone='yes'", ...
6007 * ... The declaration of a parameter entity must precede
6008 * any reference to it...
6009 */
6010 if ((ctxt->standalone == 1) ||
6011 ((ctxt->hasExternalSubset == 0) &&
6012 (ctxt->hasPErefs == 0))) {
6013 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6015 ctxt->sax->error(ctxt->userData,
6016 "PEReference: %%%s; not found\n", name);
6017 ctxt->wellFormed = 0;
6018 ctxt->disableSAX = 1;
6019 } else {
6020 /*
6021 * [ VC: Entity Declared ]
6022 * In a document with an external subset or external
6023 * parameter entities with "standalone='no'", ...
6024 * ... The declaration of a parameter entity must
6025 * precede any reference to it...
6026 */
6027 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6028 ctxt->sax->warning(ctxt->userData,
6029 "PEReference: %%%s; not found\n", name);
6030 ctxt->valid = 0;
6031 }
6032 } else {
6033 /*
6034 * Internal checking in case the entity quest barfed
6035 */
6036 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6037 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6038 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6039 ctxt->sax->warning(ctxt->userData,
6040 "Internal: %%%s; is not a parameter entity\n", name);
6041 }
6042 }
6043 ctxt->hasPErefs = 1;
6044 } else {
6045 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6047 ctxt->sax->error(ctxt->userData,
6048 "xmlParseStringPEReference: expecting ';'\n");
6049 ctxt->wellFormed = 0;
6050 ctxt->disableSAX = 1;
6051 }
6052 xmlFree(name);
6053 }
6054 }
6055 *str = ptr;
6056 return(entity);
6057}
6058
6059/**
6060 * xmlParseDocTypeDecl:
6061 * @ctxt: an XML parser context
6062 *
6063 * parse a DOCTYPE declaration
6064 *
6065 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6066 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6067 *
6068 * [ VC: Root Element Type ]
6069 * The Name in the document type declaration must match the element
6070 * type of the root element.
6071 */
6072
6073void
6074xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6075 xmlChar *name = NULL;
6076 xmlChar *ExternalID = NULL;
6077 xmlChar *URI = NULL;
6078
6079 /*
6080 * We know that '<!DOCTYPE' has been detected.
6081 */
6082 SKIP(9);
6083
6084 SKIP_BLANKS;
6085
6086 /*
6087 * Parse the DOCTYPE name.
6088 */
6089 name = xmlParseName(ctxt);
6090 if (name == NULL) {
6091 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6093 ctxt->sax->error(ctxt->userData,
6094 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6095 ctxt->wellFormed = 0;
6096 ctxt->disableSAX = 1;
6097 }
6098 ctxt->intSubName = name;
6099
6100 SKIP_BLANKS;
6101
6102 /*
6103 * Check for SystemID and ExternalID
6104 */
6105 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6106
6107 if ((URI != NULL) || (ExternalID != NULL)) {
6108 ctxt->hasExternalSubset = 1;
6109 }
6110 ctxt->extSubURI = URI;
6111 ctxt->extSubSystem = ExternalID;
6112
6113 SKIP_BLANKS;
6114
6115 /*
6116 * Create and update the internal subset.
6117 */
6118 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6119 (!ctxt->disableSAX))
6120 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6121
6122 /*
6123 * Is there any internal subset declarations ?
6124 * they are handled separately in xmlParseInternalSubset()
6125 */
6126 if (RAW == '[')
6127 return;
6128
6129 /*
6130 * We should be at the end of the DOCTYPE declaration.
6131 */
6132 if (RAW != '>') {
6133 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006135 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006136 ctxt->wellFormed = 0;
6137 ctxt->disableSAX = 1;
6138 }
6139 NEXT;
6140}
6141
6142/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006143 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006144 * @ctxt: an XML parser context
6145 *
6146 * parse the internal subset declaration
6147 *
6148 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6149 */
6150
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006151static void
Owen Taylor3473f882001-02-23 17:55:21 +00006152xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6153 /*
6154 * Is there any DTD definition ?
6155 */
6156 if (RAW == '[') {
6157 ctxt->instate = XML_PARSER_DTD;
6158 NEXT;
6159 /*
6160 * Parse the succession of Markup declarations and
6161 * PEReferences.
6162 * Subsequence (markupdecl | PEReference | S)*
6163 */
6164 while (RAW != ']') {
6165 const xmlChar *check = CUR_PTR;
6166 int cons = ctxt->input->consumed;
6167
6168 SKIP_BLANKS;
6169 xmlParseMarkupDecl(ctxt);
6170 xmlParsePEReference(ctxt);
6171
6172 /*
6173 * Pop-up of finished entities.
6174 */
6175 while ((RAW == 0) && (ctxt->inputNr > 1))
6176 xmlPopInput(ctxt);
6177
6178 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6179 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6181 ctxt->sax->error(ctxt->userData,
6182 "xmlParseInternalSubset: error detected in Markup declaration\n");
6183 ctxt->wellFormed = 0;
6184 ctxt->disableSAX = 1;
6185 break;
6186 }
6187 }
6188 if (RAW == ']') {
6189 NEXT;
6190 SKIP_BLANKS;
6191 }
6192 }
6193
6194 /*
6195 * We should be at the end of the DOCTYPE declaration.
6196 */
6197 if (RAW != '>') {
6198 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006200 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006201 ctxt->wellFormed = 0;
6202 ctxt->disableSAX = 1;
6203 }
6204 NEXT;
6205}
6206
6207/**
6208 * xmlParseAttribute:
6209 * @ctxt: an XML parser context
6210 * @value: a xmlChar ** used to store the value of the attribute
6211 *
6212 * parse an attribute
6213 *
6214 * [41] Attribute ::= Name Eq AttValue
6215 *
6216 * [ WFC: No External Entity References ]
6217 * Attribute values cannot contain direct or indirect entity references
6218 * to external entities.
6219 *
6220 * [ WFC: No < in Attribute Values ]
6221 * The replacement text of any entity referred to directly or indirectly in
6222 * an attribute value (other than "&lt;") must not contain a <.
6223 *
6224 * [ VC: Attribute Value Type ]
6225 * The attribute must have been declared; the value must be of the type
6226 * declared for it.
6227 *
6228 * [25] Eq ::= S? '=' S?
6229 *
6230 * With namespace:
6231 *
6232 * [NS 11] Attribute ::= QName Eq AttValue
6233 *
6234 * Also the case QName == xmlns:??? is handled independently as a namespace
6235 * definition.
6236 *
6237 * Returns the attribute name, and the value in *value.
6238 */
6239
6240xmlChar *
6241xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6242 xmlChar *name, *val;
6243
6244 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006245 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006246 name = xmlParseName(ctxt);
6247 if (name == NULL) {
6248 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6250 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6251 ctxt->wellFormed = 0;
6252 ctxt->disableSAX = 1;
6253 return(NULL);
6254 }
6255
6256 /*
6257 * read the value
6258 */
6259 SKIP_BLANKS;
6260 if (RAW == '=') {
6261 NEXT;
6262 SKIP_BLANKS;
6263 val = xmlParseAttValue(ctxt);
6264 ctxt->instate = XML_PARSER_CONTENT;
6265 } else {
6266 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6268 ctxt->sax->error(ctxt->userData,
6269 "Specification mandate value for attribute %s\n", name);
6270 ctxt->wellFormed = 0;
6271 ctxt->disableSAX = 1;
6272 xmlFree(name);
6273 return(NULL);
6274 }
6275
6276 /*
6277 * Check that xml:lang conforms to the specification
6278 * No more registered as an error, just generate a warning now
6279 * since this was deprecated in XML second edition
6280 */
6281 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6282 if (!xmlCheckLanguageID(val)) {
6283 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6284 ctxt->sax->warning(ctxt->userData,
6285 "Malformed value for xml:lang : %s\n", val);
6286 }
6287 }
6288
6289 /*
6290 * Check that xml:space conforms to the specification
6291 */
6292 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6293 if (xmlStrEqual(val, BAD_CAST "default"))
6294 *(ctxt->space) = 0;
6295 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6296 *(ctxt->space) = 1;
6297 else {
6298 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6300 ctxt->sax->error(ctxt->userData,
6301"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6302 val);
6303 ctxt->wellFormed = 0;
6304 ctxt->disableSAX = 1;
6305 }
6306 }
6307
6308 *value = val;
6309 return(name);
6310}
6311
6312/**
6313 * xmlParseStartTag:
6314 * @ctxt: an XML parser context
6315 *
6316 * parse a start of tag either for rule element or
6317 * EmptyElement. In both case we don't parse the tag closing chars.
6318 *
6319 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6320 *
6321 * [ WFC: Unique Att Spec ]
6322 * No attribute name may appear more than once in the same start-tag or
6323 * empty-element tag.
6324 *
6325 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6326 *
6327 * [ WFC: Unique Att Spec ]
6328 * No attribute name may appear more than once in the same start-tag or
6329 * empty-element tag.
6330 *
6331 * With namespace:
6332 *
6333 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6334 *
6335 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6336 *
6337 * Returns the element name parsed
6338 */
6339
6340xmlChar *
6341xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6342 xmlChar *name;
6343 xmlChar *attname;
6344 xmlChar *attvalue;
6345 const xmlChar **atts = NULL;
6346 int nbatts = 0;
6347 int maxatts = 0;
6348 int i;
6349
6350 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006351 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006352
6353 name = xmlParseName(ctxt);
6354 if (name == NULL) {
6355 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6357 ctxt->sax->error(ctxt->userData,
6358 "xmlParseStartTag: invalid element name\n");
6359 ctxt->wellFormed = 0;
6360 ctxt->disableSAX = 1;
6361 return(NULL);
6362 }
6363
6364 /*
6365 * Now parse the attributes, it ends up with the ending
6366 *
6367 * (S Attribute)* S?
6368 */
6369 SKIP_BLANKS;
6370 GROW;
6371
Daniel Veillard21a0f912001-02-25 19:54:14 +00006372 while ((RAW != '>') &&
6373 ((RAW != '/') || (NXT(1) != '>')) &&
6374 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006375 const xmlChar *q = CUR_PTR;
6376 int cons = ctxt->input->consumed;
6377
6378 attname = xmlParseAttribute(ctxt, &attvalue);
6379 if ((attname != NULL) && (attvalue != NULL)) {
6380 /*
6381 * [ WFC: Unique Att Spec ]
6382 * No attribute name may appear more than once in the same
6383 * start-tag or empty-element tag.
6384 */
6385 for (i = 0; i < nbatts;i += 2) {
6386 if (xmlStrEqual(atts[i], attname)) {
6387 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6389 ctxt->sax->error(ctxt->userData,
6390 "Attribute %s redefined\n",
6391 attname);
6392 ctxt->wellFormed = 0;
6393 ctxt->disableSAX = 1;
6394 xmlFree(attname);
6395 xmlFree(attvalue);
6396 goto failed;
6397 }
6398 }
6399
6400 /*
6401 * Add the pair to atts
6402 */
6403 if (atts == NULL) {
6404 maxatts = 10;
6405 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6406 if (atts == NULL) {
6407 xmlGenericError(xmlGenericErrorContext,
6408 "malloc of %ld byte failed\n",
6409 maxatts * (long)sizeof(xmlChar *));
6410 return(NULL);
6411 }
6412 } else if (nbatts + 4 > maxatts) {
6413 maxatts *= 2;
6414 atts = (const xmlChar **) xmlRealloc((void *) atts,
6415 maxatts * sizeof(xmlChar *));
6416 if (atts == NULL) {
6417 xmlGenericError(xmlGenericErrorContext,
6418 "realloc of %ld byte failed\n",
6419 maxatts * (long)sizeof(xmlChar *));
6420 return(NULL);
6421 }
6422 }
6423 atts[nbatts++] = attname;
6424 atts[nbatts++] = attvalue;
6425 atts[nbatts] = NULL;
6426 atts[nbatts + 1] = NULL;
6427 } else {
6428 if (attname != NULL)
6429 xmlFree(attname);
6430 if (attvalue != NULL)
6431 xmlFree(attvalue);
6432 }
6433
6434failed:
6435
6436 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6437 break;
6438 if (!IS_BLANK(RAW)) {
6439 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6440 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6441 ctxt->sax->error(ctxt->userData,
6442 "attributes construct error\n");
6443 ctxt->wellFormed = 0;
6444 ctxt->disableSAX = 1;
6445 }
6446 SKIP_BLANKS;
6447 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6448 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6450 ctxt->sax->error(ctxt->userData,
6451 "xmlParseStartTag: problem parsing attributes\n");
6452 ctxt->wellFormed = 0;
6453 ctxt->disableSAX = 1;
6454 break;
6455 }
6456 GROW;
6457 }
6458
6459 /*
6460 * SAX: Start of Element !
6461 */
6462 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6463 (!ctxt->disableSAX))
6464 ctxt->sax->startElement(ctxt->userData, name, atts);
6465
6466 if (atts != NULL) {
6467 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6468 xmlFree((void *) atts);
6469 }
6470 return(name);
6471}
6472
6473/**
6474 * xmlParseEndTag:
6475 * @ctxt: an XML parser context
6476 *
6477 * parse an end of tag
6478 *
6479 * [42] ETag ::= '</' Name S? '>'
6480 *
6481 * With namespace
6482 *
6483 * [NS 9] ETag ::= '</' QName S? '>'
6484 */
6485
6486void
6487xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6488 xmlChar *name;
6489 xmlChar *oldname;
6490
6491 GROW;
6492 if ((RAW != '<') || (NXT(1) != '/')) {
6493 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6495 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6496 ctxt->wellFormed = 0;
6497 ctxt->disableSAX = 1;
6498 return;
6499 }
6500 SKIP(2);
6501
6502 name = xmlParseName(ctxt);
6503
6504 /*
6505 * We should definitely be at the ending "S? '>'" part
6506 */
6507 GROW;
6508 SKIP_BLANKS;
6509 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6510 ctxt->errNo = XML_ERR_GT_REQUIRED;
6511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6512 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6513 ctxt->wellFormed = 0;
6514 ctxt->disableSAX = 1;
6515 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006516 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006517
6518 /*
6519 * [ WFC: Element Type Match ]
6520 * The Name in an element's end-tag must match the element type in the
6521 * start-tag.
6522 *
6523 */
6524 if ((name == NULL) || (ctxt->name == NULL) ||
6525 (!xmlStrEqual(name, ctxt->name))) {
6526 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6527 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6528 if ((name != NULL) && (ctxt->name != NULL)) {
6529 ctxt->sax->error(ctxt->userData,
6530 "Opening and ending tag mismatch: %s and %s\n",
6531 ctxt->name, name);
6532 } else if (ctxt->name != NULL) {
6533 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006534 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006535 } else {
6536 ctxt->sax->error(ctxt->userData,
6537 "Ending tag error: internal error ???\n");
6538 }
6539
6540 }
6541 ctxt->wellFormed = 0;
6542 ctxt->disableSAX = 1;
6543 }
6544
6545 /*
6546 * SAX: End of Tag
6547 */
6548 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6549 (!ctxt->disableSAX))
6550 ctxt->sax->endElement(ctxt->userData, name);
6551
6552 if (name != NULL)
6553 xmlFree(name);
6554 oldname = namePop(ctxt);
6555 spacePop(ctxt);
6556 if (oldname != NULL) {
6557#ifdef DEBUG_STACK
6558 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6559#endif
6560 xmlFree(oldname);
6561 }
6562 return;
6563}
6564
6565/**
6566 * xmlParseCDSect:
6567 * @ctxt: an XML parser context
6568 *
6569 * Parse escaped pure raw content.
6570 *
6571 * [18] CDSect ::= CDStart CData CDEnd
6572 *
6573 * [19] CDStart ::= '<![CDATA['
6574 *
6575 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6576 *
6577 * [21] CDEnd ::= ']]>'
6578 */
6579void
6580xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6581 xmlChar *buf = NULL;
6582 int len = 0;
6583 int size = XML_PARSER_BUFFER_SIZE;
6584 int r, rl;
6585 int s, sl;
6586 int cur, l;
6587 int count = 0;
6588
6589 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6590 (NXT(2) == '[') && (NXT(3) == 'C') &&
6591 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6592 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6593 (NXT(8) == '[')) {
6594 SKIP(9);
6595 } else
6596 return;
6597
6598 ctxt->instate = XML_PARSER_CDATA_SECTION;
6599 r = CUR_CHAR(rl);
6600 if (!IS_CHAR(r)) {
6601 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6603 ctxt->sax->error(ctxt->userData,
6604 "CData section not finished\n");
6605 ctxt->wellFormed = 0;
6606 ctxt->disableSAX = 1;
6607 ctxt->instate = XML_PARSER_CONTENT;
6608 return;
6609 }
6610 NEXTL(rl);
6611 s = CUR_CHAR(sl);
6612 if (!IS_CHAR(s)) {
6613 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6615 ctxt->sax->error(ctxt->userData,
6616 "CData section not finished\n");
6617 ctxt->wellFormed = 0;
6618 ctxt->disableSAX = 1;
6619 ctxt->instate = XML_PARSER_CONTENT;
6620 return;
6621 }
6622 NEXTL(sl);
6623 cur = CUR_CHAR(l);
6624 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6625 if (buf == NULL) {
6626 xmlGenericError(xmlGenericErrorContext,
6627 "malloc of %d byte failed\n", size);
6628 return;
6629 }
6630 while (IS_CHAR(cur) &&
6631 ((r != ']') || (s != ']') || (cur != '>'))) {
6632 if (len + 5 >= size) {
6633 size *= 2;
6634 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6635 if (buf == NULL) {
6636 xmlGenericError(xmlGenericErrorContext,
6637 "realloc of %d byte failed\n", size);
6638 return;
6639 }
6640 }
6641 COPY_BUF(rl,buf,len,r);
6642 r = s;
6643 rl = sl;
6644 s = cur;
6645 sl = l;
6646 count++;
6647 if (count > 50) {
6648 GROW;
6649 count = 0;
6650 }
6651 NEXTL(l);
6652 cur = CUR_CHAR(l);
6653 }
6654 buf[len] = 0;
6655 ctxt->instate = XML_PARSER_CONTENT;
6656 if (cur != '>') {
6657 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6659 ctxt->sax->error(ctxt->userData,
6660 "CData section not finished\n%.50s\n", buf);
6661 ctxt->wellFormed = 0;
6662 ctxt->disableSAX = 1;
6663 xmlFree(buf);
6664 return;
6665 }
6666 NEXTL(l);
6667
6668 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006669 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006670 */
6671 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6672 if (ctxt->sax->cdataBlock != NULL)
6673 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006674 else if (ctxt->sax->characters != NULL)
6675 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006676 }
6677 xmlFree(buf);
6678}
6679
6680/**
6681 * xmlParseContent:
6682 * @ctxt: an XML parser context
6683 *
6684 * Parse a content:
6685 *
6686 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6687 */
6688
6689void
6690xmlParseContent(xmlParserCtxtPtr ctxt) {
6691 GROW;
6692 while (((RAW != 0) || (ctxt->token != 0)) &&
6693 ((RAW != '<') || (NXT(1) != '/'))) {
6694 const xmlChar *test = CUR_PTR;
6695 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006696 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006697 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006698
6699 /*
6700 * Handle possible processed charrefs.
6701 */
6702 if (ctxt->token != 0) {
6703 xmlParseCharData(ctxt, 0);
6704 }
6705 /*
6706 * First case : a Processing Instruction.
6707 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006708 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006709 xmlParsePI(ctxt);
6710 }
6711
6712 /*
6713 * Second case : a CDSection
6714 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006715 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006716 (NXT(2) == '[') && (NXT(3) == 'C') &&
6717 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6718 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6719 (NXT(8) == '[')) {
6720 xmlParseCDSect(ctxt);
6721 }
6722
6723 /*
6724 * Third case : a comment
6725 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006726 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006727 (NXT(2) == '-') && (NXT(3) == '-')) {
6728 xmlParseComment(ctxt);
6729 ctxt->instate = XML_PARSER_CONTENT;
6730 }
6731
6732 /*
6733 * Fourth case : a sub-element.
6734 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006735 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006736 xmlParseElement(ctxt);
6737 }
6738
6739 /*
6740 * Fifth case : a reference. If if has not been resolved,
6741 * parsing returns it's Name, create the node
6742 */
6743
Daniel Veillard21a0f912001-02-25 19:54:14 +00006744 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006745 xmlParseReference(ctxt);
6746 }
6747
6748 /*
6749 * Last case, text. Note that References are handled directly.
6750 */
6751 else {
6752 xmlParseCharData(ctxt, 0);
6753 }
6754
6755 GROW;
6756 /*
6757 * Pop-up of finished entities.
6758 */
6759 while ((RAW == 0) && (ctxt->inputNr > 1))
6760 xmlPopInput(ctxt);
6761 SHRINK;
6762
6763 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6764 (tok == ctxt->token)) {
6765 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6767 ctxt->sax->error(ctxt->userData,
6768 "detected an error in element content\n");
6769 ctxt->wellFormed = 0;
6770 ctxt->disableSAX = 1;
6771 ctxt->instate = XML_PARSER_EOF;
6772 break;
6773 }
6774 }
6775}
6776
6777/**
6778 * xmlParseElement:
6779 * @ctxt: an XML parser context
6780 *
6781 * parse an XML element, this is highly recursive
6782 *
6783 * [39] element ::= EmptyElemTag | STag content ETag
6784 *
6785 * [ WFC: Element Type Match ]
6786 * The Name in an element's end-tag must match the element type in the
6787 * start-tag.
6788 *
6789 * [ VC: Element Valid ]
6790 * An element is valid if there is a declaration matching elementdecl
6791 * where the Name matches the element type and one of the following holds:
6792 * - The declaration matches EMPTY and the element has no content.
6793 * - The declaration matches children and the sequence of child elements
6794 * belongs to the language generated by the regular expression in the
6795 * content model, with optional white space (characters matching the
6796 * nonterminal S) between each pair of child elements.
6797 * - The declaration matches Mixed and the content consists of character
6798 * data and child elements whose types match names in the content model.
6799 * - The declaration matches ANY, and the types of any child elements have
6800 * been declared.
6801 */
6802
6803void
6804xmlParseElement(xmlParserCtxtPtr ctxt) {
6805 const xmlChar *openTag = CUR_PTR;
6806 xmlChar *name;
6807 xmlChar *oldname;
6808 xmlParserNodeInfo node_info;
6809 xmlNodePtr ret;
6810
6811 /* Capture start position */
6812 if (ctxt->record_info) {
6813 node_info.begin_pos = ctxt->input->consumed +
6814 (CUR_PTR - ctxt->input->base);
6815 node_info.begin_line = ctxt->input->line;
6816 }
6817
6818 if (ctxt->spaceNr == 0)
6819 spacePush(ctxt, -1);
6820 else
6821 spacePush(ctxt, *ctxt->space);
6822
6823 name = xmlParseStartTag(ctxt);
6824 if (name == NULL) {
6825 spacePop(ctxt);
6826 return;
6827 }
6828 namePush(ctxt, name);
6829 ret = ctxt->node;
6830
6831 /*
6832 * [ VC: Root Element Type ]
6833 * The Name in the document type declaration must match the element
6834 * type of the root element.
6835 */
6836 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6837 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6838 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6839
6840 /*
6841 * Check for an Empty Element.
6842 */
6843 if ((RAW == '/') && (NXT(1) == '>')) {
6844 SKIP(2);
6845 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6846 (!ctxt->disableSAX))
6847 ctxt->sax->endElement(ctxt->userData, name);
6848 oldname = namePop(ctxt);
6849 spacePop(ctxt);
6850 if (oldname != NULL) {
6851#ifdef DEBUG_STACK
6852 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6853#endif
6854 xmlFree(oldname);
6855 }
6856 if ( ret != NULL && ctxt->record_info ) {
6857 node_info.end_pos = ctxt->input->consumed +
6858 (CUR_PTR - ctxt->input->base);
6859 node_info.end_line = ctxt->input->line;
6860 node_info.node = ret;
6861 xmlParserAddNodeInfo(ctxt, &node_info);
6862 }
6863 return;
6864 }
6865 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006866 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006867 } else {
6868 ctxt->errNo = XML_ERR_GT_REQUIRED;
6869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6870 ctxt->sax->error(ctxt->userData,
6871 "Couldn't find end of Start Tag\n%.30s\n",
6872 openTag);
6873 ctxt->wellFormed = 0;
6874 ctxt->disableSAX = 1;
6875
6876 /*
6877 * end of parsing of this node.
6878 */
6879 nodePop(ctxt);
6880 oldname = namePop(ctxt);
6881 spacePop(ctxt);
6882 if (oldname != NULL) {
6883#ifdef DEBUG_STACK
6884 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6885#endif
6886 xmlFree(oldname);
6887 }
6888
6889 /*
6890 * Capture end position and add node
6891 */
6892 if ( ret != NULL && ctxt->record_info ) {
6893 node_info.end_pos = ctxt->input->consumed +
6894 (CUR_PTR - ctxt->input->base);
6895 node_info.end_line = ctxt->input->line;
6896 node_info.node = ret;
6897 xmlParserAddNodeInfo(ctxt, &node_info);
6898 }
6899 return;
6900 }
6901
6902 /*
6903 * Parse the content of the element:
6904 */
6905 xmlParseContent(ctxt);
6906 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00006907 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00006908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6909 ctxt->sax->error(ctxt->userData,
6910 "Premature end of data in tag %.30s\n", openTag);
6911 ctxt->wellFormed = 0;
6912 ctxt->disableSAX = 1;
6913
6914 /*
6915 * end of parsing of this node.
6916 */
6917 nodePop(ctxt);
6918 oldname = namePop(ctxt);
6919 spacePop(ctxt);
6920 if (oldname != NULL) {
6921#ifdef DEBUG_STACK
6922 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6923#endif
6924 xmlFree(oldname);
6925 }
6926 return;
6927 }
6928
6929 /*
6930 * parse the end of tag: '</' should be here.
6931 */
6932 xmlParseEndTag(ctxt);
6933
6934 /*
6935 * Capture end position and add node
6936 */
6937 if ( ret != NULL && ctxt->record_info ) {
6938 node_info.end_pos = ctxt->input->consumed +
6939 (CUR_PTR - ctxt->input->base);
6940 node_info.end_line = ctxt->input->line;
6941 node_info.node = ret;
6942 xmlParserAddNodeInfo(ctxt, &node_info);
6943 }
6944}
6945
6946/**
6947 * xmlParseVersionNum:
6948 * @ctxt: an XML parser context
6949 *
6950 * parse the XML version value.
6951 *
6952 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6953 *
6954 * Returns the string giving the XML version number, or NULL
6955 */
6956xmlChar *
6957xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6958 xmlChar *buf = NULL;
6959 int len = 0;
6960 int size = 10;
6961 xmlChar cur;
6962
6963 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6964 if (buf == NULL) {
6965 xmlGenericError(xmlGenericErrorContext,
6966 "malloc of %d byte failed\n", size);
6967 return(NULL);
6968 }
6969 cur = CUR;
6970 while (((cur >= 'a') && (cur <= 'z')) ||
6971 ((cur >= 'A') && (cur <= 'Z')) ||
6972 ((cur >= '0') && (cur <= '9')) ||
6973 (cur == '_') || (cur == '.') ||
6974 (cur == ':') || (cur == '-')) {
6975 if (len + 1 >= size) {
6976 size *= 2;
6977 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6978 if (buf == NULL) {
6979 xmlGenericError(xmlGenericErrorContext,
6980 "realloc of %d byte failed\n", size);
6981 return(NULL);
6982 }
6983 }
6984 buf[len++] = cur;
6985 NEXT;
6986 cur=CUR;
6987 }
6988 buf[len] = 0;
6989 return(buf);
6990}
6991
6992/**
6993 * xmlParseVersionInfo:
6994 * @ctxt: an XML parser context
6995 *
6996 * parse the XML version.
6997 *
6998 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6999 *
7000 * [25] Eq ::= S? '=' S?
7001 *
7002 * Returns the version string, e.g. "1.0"
7003 */
7004
7005xmlChar *
7006xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7007 xmlChar *version = NULL;
7008 const xmlChar *q;
7009
7010 if ((RAW == 'v') && (NXT(1) == 'e') &&
7011 (NXT(2) == 'r') && (NXT(3) == 's') &&
7012 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7013 (NXT(6) == 'n')) {
7014 SKIP(7);
7015 SKIP_BLANKS;
7016 if (RAW != '=') {
7017 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7019 ctxt->sax->error(ctxt->userData,
7020 "xmlParseVersionInfo : expected '='\n");
7021 ctxt->wellFormed = 0;
7022 ctxt->disableSAX = 1;
7023 return(NULL);
7024 }
7025 NEXT;
7026 SKIP_BLANKS;
7027 if (RAW == '"') {
7028 NEXT;
7029 q = CUR_PTR;
7030 version = xmlParseVersionNum(ctxt);
7031 if (RAW != '"') {
7032 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7034 ctxt->sax->error(ctxt->userData,
7035 "String not closed\n%.50s\n", q);
7036 ctxt->wellFormed = 0;
7037 ctxt->disableSAX = 1;
7038 } else
7039 NEXT;
7040 } else if (RAW == '\''){
7041 NEXT;
7042 q = CUR_PTR;
7043 version = xmlParseVersionNum(ctxt);
7044 if (RAW != '\'') {
7045 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7047 ctxt->sax->error(ctxt->userData,
7048 "String not closed\n%.50s\n", q);
7049 ctxt->wellFormed = 0;
7050 ctxt->disableSAX = 1;
7051 } else
7052 NEXT;
7053 } else {
7054 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7056 ctxt->sax->error(ctxt->userData,
7057 "xmlParseVersionInfo : expected ' or \"\n");
7058 ctxt->wellFormed = 0;
7059 ctxt->disableSAX = 1;
7060 }
7061 }
7062 return(version);
7063}
7064
7065/**
7066 * xmlParseEncName:
7067 * @ctxt: an XML parser context
7068 *
7069 * parse the XML encoding name
7070 *
7071 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7072 *
7073 * Returns the encoding name value or NULL
7074 */
7075xmlChar *
7076xmlParseEncName(xmlParserCtxtPtr ctxt) {
7077 xmlChar *buf = NULL;
7078 int len = 0;
7079 int size = 10;
7080 xmlChar cur;
7081
7082 cur = CUR;
7083 if (((cur >= 'a') && (cur <= 'z')) ||
7084 ((cur >= 'A') && (cur <= 'Z'))) {
7085 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7086 if (buf == NULL) {
7087 xmlGenericError(xmlGenericErrorContext,
7088 "malloc of %d byte failed\n", size);
7089 return(NULL);
7090 }
7091
7092 buf[len++] = cur;
7093 NEXT;
7094 cur = CUR;
7095 while (((cur >= 'a') && (cur <= 'z')) ||
7096 ((cur >= 'A') && (cur <= 'Z')) ||
7097 ((cur >= '0') && (cur <= '9')) ||
7098 (cur == '.') || (cur == '_') ||
7099 (cur == '-')) {
7100 if (len + 1 >= size) {
7101 size *= 2;
7102 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7103 if (buf == NULL) {
7104 xmlGenericError(xmlGenericErrorContext,
7105 "realloc of %d byte failed\n", size);
7106 return(NULL);
7107 }
7108 }
7109 buf[len++] = cur;
7110 NEXT;
7111 cur = CUR;
7112 if (cur == 0) {
7113 SHRINK;
7114 GROW;
7115 cur = CUR;
7116 }
7117 }
7118 buf[len] = 0;
7119 } else {
7120 ctxt->errNo = XML_ERR_ENCODING_NAME;
7121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7122 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7123 ctxt->wellFormed = 0;
7124 ctxt->disableSAX = 1;
7125 }
7126 return(buf);
7127}
7128
7129/**
7130 * xmlParseEncodingDecl:
7131 * @ctxt: an XML parser context
7132 *
7133 * parse the XML encoding declaration
7134 *
7135 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7136 *
7137 * this setups the conversion filters.
7138 *
7139 * Returns the encoding value or NULL
7140 */
7141
7142xmlChar *
7143xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7144 xmlChar *encoding = NULL;
7145 const xmlChar *q;
7146
7147 SKIP_BLANKS;
7148 if ((RAW == 'e') && (NXT(1) == 'n') &&
7149 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7150 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7151 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7152 SKIP(8);
7153 SKIP_BLANKS;
7154 if (RAW != '=') {
7155 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7157 ctxt->sax->error(ctxt->userData,
7158 "xmlParseEncodingDecl : expected '='\n");
7159 ctxt->wellFormed = 0;
7160 ctxt->disableSAX = 1;
7161 return(NULL);
7162 }
7163 NEXT;
7164 SKIP_BLANKS;
7165 if (RAW == '"') {
7166 NEXT;
7167 q = CUR_PTR;
7168 encoding = xmlParseEncName(ctxt);
7169 if (RAW != '"') {
7170 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7172 ctxt->sax->error(ctxt->userData,
7173 "String not closed\n%.50s\n", q);
7174 ctxt->wellFormed = 0;
7175 ctxt->disableSAX = 1;
7176 } else
7177 NEXT;
7178 } else if (RAW == '\''){
7179 NEXT;
7180 q = CUR_PTR;
7181 encoding = xmlParseEncName(ctxt);
7182 if (RAW != '\'') {
7183 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7185 ctxt->sax->error(ctxt->userData,
7186 "String not closed\n%.50s\n", q);
7187 ctxt->wellFormed = 0;
7188 ctxt->disableSAX = 1;
7189 } else
7190 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007191 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007192 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7194 ctxt->sax->error(ctxt->userData,
7195 "xmlParseEncodingDecl : expected ' or \"\n");
7196 ctxt->wellFormed = 0;
7197 ctxt->disableSAX = 1;
7198 }
7199 if (encoding != NULL) {
7200 xmlCharEncoding enc;
7201 xmlCharEncodingHandlerPtr handler;
7202
7203 if (ctxt->input->encoding != NULL)
7204 xmlFree((xmlChar *) ctxt->input->encoding);
7205 ctxt->input->encoding = encoding;
7206
7207 enc = xmlParseCharEncoding((const char *) encoding);
7208 /*
7209 * registered set of known encodings
7210 */
7211 if (enc != XML_CHAR_ENCODING_ERROR) {
7212 xmlSwitchEncoding(ctxt, enc);
7213 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7214 xmlFree(encoding);
7215 return(NULL);
7216 }
7217 } else {
7218 /*
7219 * fallback for unknown encodings
7220 */
7221 handler = xmlFindCharEncodingHandler((const char *) encoding);
7222 if (handler != NULL) {
7223 xmlSwitchToEncoding(ctxt, handler);
7224 } else {
7225 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7227 ctxt->sax->error(ctxt->userData,
7228 "Unsupported encoding %s\n", encoding);
7229 return(NULL);
7230 }
7231 }
7232 }
7233 }
7234 return(encoding);
7235}
7236
7237/**
7238 * xmlParseSDDecl:
7239 * @ctxt: an XML parser context
7240 *
7241 * parse the XML standalone declaration
7242 *
7243 * [32] SDDecl ::= S 'standalone' Eq
7244 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7245 *
7246 * [ VC: Standalone Document Declaration ]
7247 * TODO The standalone document declaration must have the value "no"
7248 * if any external markup declarations contain declarations of:
7249 * - attributes with default values, if elements to which these
7250 * attributes apply appear in the document without specifications
7251 * of values for these attributes, or
7252 * - entities (other than amp, lt, gt, apos, quot), if references
7253 * to those entities appear in the document, or
7254 * - attributes with values subject to normalization, where the
7255 * attribute appears in the document with a value which will change
7256 * as a result of normalization, or
7257 * - element types with element content, if white space occurs directly
7258 * within any instance of those types.
7259 *
7260 * Returns 1 if standalone, 0 otherwise
7261 */
7262
7263int
7264xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7265 int standalone = -1;
7266
7267 SKIP_BLANKS;
7268 if ((RAW == 's') && (NXT(1) == 't') &&
7269 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7270 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7271 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7272 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7273 SKIP(10);
7274 SKIP_BLANKS;
7275 if (RAW != '=') {
7276 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7277 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7278 ctxt->sax->error(ctxt->userData,
7279 "XML standalone declaration : expected '='\n");
7280 ctxt->wellFormed = 0;
7281 ctxt->disableSAX = 1;
7282 return(standalone);
7283 }
7284 NEXT;
7285 SKIP_BLANKS;
7286 if (RAW == '\''){
7287 NEXT;
7288 if ((RAW == 'n') && (NXT(1) == 'o')) {
7289 standalone = 0;
7290 SKIP(2);
7291 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7292 (NXT(2) == 's')) {
7293 standalone = 1;
7294 SKIP(3);
7295 } else {
7296 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7298 ctxt->sax->error(ctxt->userData,
7299 "standalone accepts only 'yes' or 'no'\n");
7300 ctxt->wellFormed = 0;
7301 ctxt->disableSAX = 1;
7302 }
7303 if (RAW != '\'') {
7304 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7306 ctxt->sax->error(ctxt->userData, "String not closed\n");
7307 ctxt->wellFormed = 0;
7308 ctxt->disableSAX = 1;
7309 } else
7310 NEXT;
7311 } else if (RAW == '"'){
7312 NEXT;
7313 if ((RAW == 'n') && (NXT(1) == 'o')) {
7314 standalone = 0;
7315 SKIP(2);
7316 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7317 (NXT(2) == 's')) {
7318 standalone = 1;
7319 SKIP(3);
7320 } else {
7321 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7323 ctxt->sax->error(ctxt->userData,
7324 "standalone accepts only 'yes' or 'no'\n");
7325 ctxt->wellFormed = 0;
7326 ctxt->disableSAX = 1;
7327 }
7328 if (RAW != '"') {
7329 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7331 ctxt->sax->error(ctxt->userData, "String not closed\n");
7332 ctxt->wellFormed = 0;
7333 ctxt->disableSAX = 1;
7334 } else
7335 NEXT;
7336 } else {
7337 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7339 ctxt->sax->error(ctxt->userData,
7340 "Standalone value not found\n");
7341 ctxt->wellFormed = 0;
7342 ctxt->disableSAX = 1;
7343 }
7344 }
7345 return(standalone);
7346}
7347
7348/**
7349 * xmlParseXMLDecl:
7350 * @ctxt: an XML parser context
7351 *
7352 * parse an XML declaration header
7353 *
7354 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7355 */
7356
7357void
7358xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7359 xmlChar *version;
7360
7361 /*
7362 * We know that '<?xml' is here.
7363 */
7364 SKIP(5);
7365
7366 if (!IS_BLANK(RAW)) {
7367 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7369 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7370 ctxt->wellFormed = 0;
7371 ctxt->disableSAX = 1;
7372 }
7373 SKIP_BLANKS;
7374
7375 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007376 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007377 */
7378 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007379 if (version == NULL) {
7380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7381 ctxt->sax->error(ctxt->userData,
7382 "Malformed declaration expecting version\n");
7383 ctxt->wellFormed = 0;
7384 ctxt->disableSAX = 1;
7385 } else {
7386 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7387 /*
7388 * TODO: Blueberry should be detected here
7389 */
7390 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7391 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7392 version);
7393 }
7394 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007395 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007396 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007397 }
Owen Taylor3473f882001-02-23 17:55:21 +00007398
7399 /*
7400 * We may have the encoding declaration
7401 */
7402 if (!IS_BLANK(RAW)) {
7403 if ((RAW == '?') && (NXT(1) == '>')) {
7404 SKIP(2);
7405 return;
7406 }
7407 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7409 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7410 ctxt->wellFormed = 0;
7411 ctxt->disableSAX = 1;
7412 }
7413 xmlParseEncodingDecl(ctxt);
7414 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7415 /*
7416 * The XML REC instructs us to stop parsing right here
7417 */
7418 return;
7419 }
7420
7421 /*
7422 * We may have the standalone status.
7423 */
7424 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7425 if ((RAW == '?') && (NXT(1) == '>')) {
7426 SKIP(2);
7427 return;
7428 }
7429 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7431 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7432 ctxt->wellFormed = 0;
7433 ctxt->disableSAX = 1;
7434 }
7435 SKIP_BLANKS;
7436 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7437
7438 SKIP_BLANKS;
7439 if ((RAW == '?') && (NXT(1) == '>')) {
7440 SKIP(2);
7441 } else if (RAW == '>') {
7442 /* Deprecated old WD ... */
7443 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7445 ctxt->sax->error(ctxt->userData,
7446 "XML declaration must end-up with '?>'\n");
7447 ctxt->wellFormed = 0;
7448 ctxt->disableSAX = 1;
7449 NEXT;
7450 } else {
7451 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7453 ctxt->sax->error(ctxt->userData,
7454 "parsing XML declaration: '?>' expected\n");
7455 ctxt->wellFormed = 0;
7456 ctxt->disableSAX = 1;
7457 MOVETO_ENDTAG(CUR_PTR);
7458 NEXT;
7459 }
7460}
7461
7462/**
7463 * xmlParseMisc:
7464 * @ctxt: an XML parser context
7465 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007466 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007467 *
7468 * [27] Misc ::= Comment | PI | S
7469 */
7470
7471void
7472xmlParseMisc(xmlParserCtxtPtr ctxt) {
7473 while (((RAW == '<') && (NXT(1) == '?')) ||
7474 ((RAW == '<') && (NXT(1) == '!') &&
7475 (NXT(2) == '-') && (NXT(3) == '-')) ||
7476 IS_BLANK(CUR)) {
7477 if ((RAW == '<') && (NXT(1) == '?')) {
7478 xmlParsePI(ctxt);
7479 } else if (IS_BLANK(CUR)) {
7480 NEXT;
7481 } else
7482 xmlParseComment(ctxt);
7483 }
7484}
7485
7486/**
7487 * xmlParseDocument:
7488 * @ctxt: an XML parser context
7489 *
7490 * parse an XML document (and build a tree if using the standard SAX
7491 * interface).
7492 *
7493 * [1] document ::= prolog element Misc*
7494 *
7495 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7496 *
7497 * Returns 0, -1 in case of error. the parser context is augmented
7498 * as a result of the parsing.
7499 */
7500
7501int
7502xmlParseDocument(xmlParserCtxtPtr ctxt) {
7503 xmlChar start[4];
7504 xmlCharEncoding enc;
7505
7506 xmlInitParser();
7507
7508 GROW;
7509
7510 /*
7511 * SAX: beginning of the document processing.
7512 */
7513 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7514 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7515
Daniel Veillard50f34372001-08-03 12:06:36 +00007516 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007517 /*
7518 * Get the 4 first bytes and decode the charset
7519 * if enc != XML_CHAR_ENCODING_NONE
7520 * plug some encoding conversion routines.
7521 */
7522 start[0] = RAW;
7523 start[1] = NXT(1);
7524 start[2] = NXT(2);
7525 start[3] = NXT(3);
7526 enc = xmlDetectCharEncoding(start, 4);
7527 if (enc != XML_CHAR_ENCODING_NONE) {
7528 xmlSwitchEncoding(ctxt, enc);
7529 }
Owen Taylor3473f882001-02-23 17:55:21 +00007530 }
7531
7532
7533 if (CUR == 0) {
7534 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7536 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7537 ctxt->wellFormed = 0;
7538 ctxt->disableSAX = 1;
7539 }
7540
7541 /*
7542 * Check for the XMLDecl in the Prolog.
7543 */
7544 GROW;
7545 if ((RAW == '<') && (NXT(1) == '?') &&
7546 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7547 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7548
7549 /*
7550 * Note that we will switch encoding on the fly.
7551 */
7552 xmlParseXMLDecl(ctxt);
7553 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7554 /*
7555 * The XML REC instructs us to stop parsing right here
7556 */
7557 return(-1);
7558 }
7559 ctxt->standalone = ctxt->input->standalone;
7560 SKIP_BLANKS;
7561 } else {
7562 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7563 }
7564 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7565 ctxt->sax->startDocument(ctxt->userData);
7566
7567 /*
7568 * The Misc part of the Prolog
7569 */
7570 GROW;
7571 xmlParseMisc(ctxt);
7572
7573 /*
7574 * Then possibly doc type declaration(s) and more Misc
7575 * (doctypedecl Misc*)?
7576 */
7577 GROW;
7578 if ((RAW == '<') && (NXT(1) == '!') &&
7579 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7580 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7581 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7582 (NXT(8) == 'E')) {
7583
7584 ctxt->inSubset = 1;
7585 xmlParseDocTypeDecl(ctxt);
7586 if (RAW == '[') {
7587 ctxt->instate = XML_PARSER_DTD;
7588 xmlParseInternalSubset(ctxt);
7589 }
7590
7591 /*
7592 * Create and update the external subset.
7593 */
7594 ctxt->inSubset = 2;
7595 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7596 (!ctxt->disableSAX))
7597 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7598 ctxt->extSubSystem, ctxt->extSubURI);
7599 ctxt->inSubset = 0;
7600
7601
7602 ctxt->instate = XML_PARSER_PROLOG;
7603 xmlParseMisc(ctxt);
7604 }
7605
7606 /*
7607 * Time to start parsing the tree itself
7608 */
7609 GROW;
7610 if (RAW != '<') {
7611 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7613 ctxt->sax->error(ctxt->userData,
7614 "Start tag expected, '<' not found\n");
7615 ctxt->wellFormed = 0;
7616 ctxt->disableSAX = 1;
7617 ctxt->instate = XML_PARSER_EOF;
7618 } else {
7619 ctxt->instate = XML_PARSER_CONTENT;
7620 xmlParseElement(ctxt);
7621 ctxt->instate = XML_PARSER_EPILOG;
7622
7623
7624 /*
7625 * The Misc part at the end
7626 */
7627 xmlParseMisc(ctxt);
7628
7629 if (RAW != 0) {
7630 ctxt->errNo = XML_ERR_DOCUMENT_END;
7631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7632 ctxt->sax->error(ctxt->userData,
7633 "Extra content at the end of the document\n");
7634 ctxt->wellFormed = 0;
7635 ctxt->disableSAX = 1;
7636 }
7637 ctxt->instate = XML_PARSER_EOF;
7638 }
7639
7640 /*
7641 * SAX: end of the document processing.
7642 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007643 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007644 ctxt->sax->endDocument(ctxt->userData);
7645
Daniel Veillardc7612992002-02-17 22:47:37 +00007646 if (! ctxt->wellFormed) {
7647 ctxt->valid = 0;
7648 return(-1);
7649 }
Owen Taylor3473f882001-02-23 17:55:21 +00007650 return(0);
7651}
7652
7653/**
7654 * xmlParseExtParsedEnt:
7655 * @ctxt: an XML parser context
7656 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007657 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007658 * An external general parsed entity is well-formed if it matches the
7659 * production labeled extParsedEnt.
7660 *
7661 * [78] extParsedEnt ::= TextDecl? content
7662 *
7663 * Returns 0, -1 in case of error. the parser context is augmented
7664 * as a result of the parsing.
7665 */
7666
7667int
7668xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7669 xmlChar start[4];
7670 xmlCharEncoding enc;
7671
7672 xmlDefaultSAXHandlerInit();
7673
7674 GROW;
7675
7676 /*
7677 * SAX: beginning of the document processing.
7678 */
7679 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7680 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7681
7682 /*
7683 * Get the 4 first bytes and decode the charset
7684 * if enc != XML_CHAR_ENCODING_NONE
7685 * plug some encoding conversion routines.
7686 */
7687 start[0] = RAW;
7688 start[1] = NXT(1);
7689 start[2] = NXT(2);
7690 start[3] = NXT(3);
7691 enc = xmlDetectCharEncoding(start, 4);
7692 if (enc != XML_CHAR_ENCODING_NONE) {
7693 xmlSwitchEncoding(ctxt, enc);
7694 }
7695
7696
7697 if (CUR == 0) {
7698 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7699 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7700 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7701 ctxt->wellFormed = 0;
7702 ctxt->disableSAX = 1;
7703 }
7704
7705 /*
7706 * Check for the XMLDecl in the Prolog.
7707 */
7708 GROW;
7709 if ((RAW == '<') && (NXT(1) == '?') &&
7710 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7711 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7712
7713 /*
7714 * Note that we will switch encoding on the fly.
7715 */
7716 xmlParseXMLDecl(ctxt);
7717 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7718 /*
7719 * The XML REC instructs us to stop parsing right here
7720 */
7721 return(-1);
7722 }
7723 SKIP_BLANKS;
7724 } else {
7725 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7726 }
7727 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7728 ctxt->sax->startDocument(ctxt->userData);
7729
7730 /*
7731 * Doing validity checking on chunk doesn't make sense
7732 */
7733 ctxt->instate = XML_PARSER_CONTENT;
7734 ctxt->validate = 0;
7735 ctxt->loadsubset = 0;
7736 ctxt->depth = 0;
7737
7738 xmlParseContent(ctxt);
7739
7740 if ((RAW == '<') && (NXT(1) == '/')) {
7741 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7743 ctxt->sax->error(ctxt->userData,
7744 "chunk is not well balanced\n");
7745 ctxt->wellFormed = 0;
7746 ctxt->disableSAX = 1;
7747 } else if (RAW != 0) {
7748 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7750 ctxt->sax->error(ctxt->userData,
7751 "extra content at the end of well balanced chunk\n");
7752 ctxt->wellFormed = 0;
7753 ctxt->disableSAX = 1;
7754 }
7755
7756 /*
7757 * SAX: end of the document processing.
7758 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007759 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007760 ctxt->sax->endDocument(ctxt->userData);
7761
7762 if (! ctxt->wellFormed) return(-1);
7763 return(0);
7764}
7765
7766/************************************************************************
7767 * *
7768 * Progressive parsing interfaces *
7769 * *
7770 ************************************************************************/
7771
7772/**
7773 * xmlParseLookupSequence:
7774 * @ctxt: an XML parser context
7775 * @first: the first char to lookup
7776 * @next: the next char to lookup or zero
7777 * @third: the next char to lookup or zero
7778 *
7779 * Try to find if a sequence (first, next, third) or just (first next) or
7780 * (first) is available in the input stream.
7781 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7782 * to avoid rescanning sequences of bytes, it DOES change the state of the
7783 * parser, do not use liberally.
7784 *
7785 * Returns the index to the current parsing point if the full sequence
7786 * is available, -1 otherwise.
7787 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007788static int
Owen Taylor3473f882001-02-23 17:55:21 +00007789xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7790 xmlChar next, xmlChar third) {
7791 int base, len;
7792 xmlParserInputPtr in;
7793 const xmlChar *buf;
7794
7795 in = ctxt->input;
7796 if (in == NULL) return(-1);
7797 base = in->cur - in->base;
7798 if (base < 0) return(-1);
7799 if (ctxt->checkIndex > base)
7800 base = ctxt->checkIndex;
7801 if (in->buf == NULL) {
7802 buf = in->base;
7803 len = in->length;
7804 } else {
7805 buf = in->buf->buffer->content;
7806 len = in->buf->buffer->use;
7807 }
7808 /* take into account the sequence length */
7809 if (third) len -= 2;
7810 else if (next) len --;
7811 for (;base < len;base++) {
7812 if (buf[base] == first) {
7813 if (third != 0) {
7814 if ((buf[base + 1] != next) ||
7815 (buf[base + 2] != third)) continue;
7816 } else if (next != 0) {
7817 if (buf[base + 1] != next) continue;
7818 }
7819 ctxt->checkIndex = 0;
7820#ifdef DEBUG_PUSH
7821 if (next == 0)
7822 xmlGenericError(xmlGenericErrorContext,
7823 "PP: lookup '%c' found at %d\n",
7824 first, base);
7825 else if (third == 0)
7826 xmlGenericError(xmlGenericErrorContext,
7827 "PP: lookup '%c%c' found at %d\n",
7828 first, next, base);
7829 else
7830 xmlGenericError(xmlGenericErrorContext,
7831 "PP: lookup '%c%c%c' found at %d\n",
7832 first, next, third, base);
7833#endif
7834 return(base - (in->cur - in->base));
7835 }
7836 }
7837 ctxt->checkIndex = base;
7838#ifdef DEBUG_PUSH
7839 if (next == 0)
7840 xmlGenericError(xmlGenericErrorContext,
7841 "PP: lookup '%c' failed\n", first);
7842 else if (third == 0)
7843 xmlGenericError(xmlGenericErrorContext,
7844 "PP: lookup '%c%c' failed\n", first, next);
7845 else
7846 xmlGenericError(xmlGenericErrorContext,
7847 "PP: lookup '%c%c%c' failed\n", first, next, third);
7848#endif
7849 return(-1);
7850}
7851
7852/**
7853 * xmlParseTryOrFinish:
7854 * @ctxt: an XML parser context
7855 * @terminate: last chunk indicator
7856 *
7857 * Try to progress on parsing
7858 *
7859 * Returns zero if no parsing was possible
7860 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007861static int
Owen Taylor3473f882001-02-23 17:55:21 +00007862xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7863 int ret = 0;
7864 int avail;
7865 xmlChar cur, next;
7866
7867#ifdef DEBUG_PUSH
7868 switch (ctxt->instate) {
7869 case XML_PARSER_EOF:
7870 xmlGenericError(xmlGenericErrorContext,
7871 "PP: try EOF\n"); break;
7872 case XML_PARSER_START:
7873 xmlGenericError(xmlGenericErrorContext,
7874 "PP: try START\n"); break;
7875 case XML_PARSER_MISC:
7876 xmlGenericError(xmlGenericErrorContext,
7877 "PP: try MISC\n");break;
7878 case XML_PARSER_COMMENT:
7879 xmlGenericError(xmlGenericErrorContext,
7880 "PP: try COMMENT\n");break;
7881 case XML_PARSER_PROLOG:
7882 xmlGenericError(xmlGenericErrorContext,
7883 "PP: try PROLOG\n");break;
7884 case XML_PARSER_START_TAG:
7885 xmlGenericError(xmlGenericErrorContext,
7886 "PP: try START_TAG\n");break;
7887 case XML_PARSER_CONTENT:
7888 xmlGenericError(xmlGenericErrorContext,
7889 "PP: try CONTENT\n");break;
7890 case XML_PARSER_CDATA_SECTION:
7891 xmlGenericError(xmlGenericErrorContext,
7892 "PP: try CDATA_SECTION\n");break;
7893 case XML_PARSER_END_TAG:
7894 xmlGenericError(xmlGenericErrorContext,
7895 "PP: try END_TAG\n");break;
7896 case XML_PARSER_ENTITY_DECL:
7897 xmlGenericError(xmlGenericErrorContext,
7898 "PP: try ENTITY_DECL\n");break;
7899 case XML_PARSER_ENTITY_VALUE:
7900 xmlGenericError(xmlGenericErrorContext,
7901 "PP: try ENTITY_VALUE\n");break;
7902 case XML_PARSER_ATTRIBUTE_VALUE:
7903 xmlGenericError(xmlGenericErrorContext,
7904 "PP: try ATTRIBUTE_VALUE\n");break;
7905 case XML_PARSER_DTD:
7906 xmlGenericError(xmlGenericErrorContext,
7907 "PP: try DTD\n");break;
7908 case XML_PARSER_EPILOG:
7909 xmlGenericError(xmlGenericErrorContext,
7910 "PP: try EPILOG\n");break;
7911 case XML_PARSER_PI:
7912 xmlGenericError(xmlGenericErrorContext,
7913 "PP: try PI\n");break;
7914 case XML_PARSER_IGNORE:
7915 xmlGenericError(xmlGenericErrorContext,
7916 "PP: try IGNORE\n");break;
7917 }
7918#endif
7919
7920 while (1) {
7921 /*
7922 * Pop-up of finished entities.
7923 */
7924 while ((RAW == 0) && (ctxt->inputNr > 1))
7925 xmlPopInput(ctxt);
7926
7927 if (ctxt->input ==NULL) break;
7928 if (ctxt->input->buf == NULL)
7929 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00007930 else {
7931 /*
7932 * If we are operating on converted input, try to flush
7933 * remainng chars to avoid them stalling in the non-converted
7934 * buffer.
7935 */
7936 if ((ctxt->input->buf->raw != NULL) &&
7937 (ctxt->input->buf->raw->use > 0)) {
7938 int base = ctxt->input->base -
7939 ctxt->input->buf->buffer->content;
7940 int current = ctxt->input->cur - ctxt->input->base;
7941
7942 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
7943 ctxt->input->base = ctxt->input->buf->buffer->content + base;
7944 ctxt->input->cur = ctxt->input->base + current;
7945 ctxt->input->end =
7946 &ctxt->input->buf->buffer->content[
7947 ctxt->input->buf->buffer->use];
7948 }
7949 avail = ctxt->input->buf->buffer->use -
7950 (ctxt->input->cur - ctxt->input->base);
7951 }
Owen Taylor3473f882001-02-23 17:55:21 +00007952 if (avail < 1)
7953 goto done;
7954 switch (ctxt->instate) {
7955 case XML_PARSER_EOF:
7956 /*
7957 * Document parsing is done !
7958 */
7959 goto done;
7960 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007961 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7962 xmlChar start[4];
7963 xmlCharEncoding enc;
7964
7965 /*
7966 * Very first chars read from the document flow.
7967 */
7968 if (avail < 4)
7969 goto done;
7970
7971 /*
7972 * Get the 4 first bytes and decode the charset
7973 * if enc != XML_CHAR_ENCODING_NONE
7974 * plug some encoding conversion routines.
7975 */
7976 start[0] = RAW;
7977 start[1] = NXT(1);
7978 start[2] = NXT(2);
7979 start[3] = NXT(3);
7980 enc = xmlDetectCharEncoding(start, 4);
7981 if (enc != XML_CHAR_ENCODING_NONE) {
7982 xmlSwitchEncoding(ctxt, enc);
7983 }
7984 break;
7985 }
Owen Taylor3473f882001-02-23 17:55:21 +00007986
7987 cur = ctxt->input->cur[0];
7988 next = ctxt->input->cur[1];
7989 if (cur == 0) {
7990 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7991 ctxt->sax->setDocumentLocator(ctxt->userData,
7992 &xmlDefaultSAXLocator);
7993 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7995 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7996 ctxt->wellFormed = 0;
7997 ctxt->disableSAX = 1;
7998 ctxt->instate = XML_PARSER_EOF;
7999#ifdef DEBUG_PUSH
8000 xmlGenericError(xmlGenericErrorContext,
8001 "PP: entering EOF\n");
8002#endif
8003 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8004 ctxt->sax->endDocument(ctxt->userData);
8005 goto done;
8006 }
8007 if ((cur == '<') && (next == '?')) {
8008 /* PI or XML decl */
8009 if (avail < 5) return(ret);
8010 if ((!terminate) &&
8011 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8012 return(ret);
8013 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8014 ctxt->sax->setDocumentLocator(ctxt->userData,
8015 &xmlDefaultSAXLocator);
8016 if ((ctxt->input->cur[2] == 'x') &&
8017 (ctxt->input->cur[3] == 'm') &&
8018 (ctxt->input->cur[4] == 'l') &&
8019 (IS_BLANK(ctxt->input->cur[5]))) {
8020 ret += 5;
8021#ifdef DEBUG_PUSH
8022 xmlGenericError(xmlGenericErrorContext,
8023 "PP: Parsing XML Decl\n");
8024#endif
8025 xmlParseXMLDecl(ctxt);
8026 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8027 /*
8028 * The XML REC instructs us to stop parsing right
8029 * here
8030 */
8031 ctxt->instate = XML_PARSER_EOF;
8032 return(0);
8033 }
8034 ctxt->standalone = ctxt->input->standalone;
8035 if ((ctxt->encoding == NULL) &&
8036 (ctxt->input->encoding != NULL))
8037 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8038 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8039 (!ctxt->disableSAX))
8040 ctxt->sax->startDocument(ctxt->userData);
8041 ctxt->instate = XML_PARSER_MISC;
8042#ifdef DEBUG_PUSH
8043 xmlGenericError(xmlGenericErrorContext,
8044 "PP: entering MISC\n");
8045#endif
8046 } else {
8047 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8048 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8049 (!ctxt->disableSAX))
8050 ctxt->sax->startDocument(ctxt->userData);
8051 ctxt->instate = XML_PARSER_MISC;
8052#ifdef DEBUG_PUSH
8053 xmlGenericError(xmlGenericErrorContext,
8054 "PP: entering MISC\n");
8055#endif
8056 }
8057 } else {
8058 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8059 ctxt->sax->setDocumentLocator(ctxt->userData,
8060 &xmlDefaultSAXLocator);
8061 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8062 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8063 (!ctxt->disableSAX))
8064 ctxt->sax->startDocument(ctxt->userData);
8065 ctxt->instate = XML_PARSER_MISC;
8066#ifdef DEBUG_PUSH
8067 xmlGenericError(xmlGenericErrorContext,
8068 "PP: entering MISC\n");
8069#endif
8070 }
8071 break;
8072 case XML_PARSER_MISC:
8073 SKIP_BLANKS;
8074 if (ctxt->input->buf == NULL)
8075 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8076 else
8077 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8078 if (avail < 2)
8079 goto done;
8080 cur = ctxt->input->cur[0];
8081 next = ctxt->input->cur[1];
8082 if ((cur == '<') && (next == '?')) {
8083 if ((!terminate) &&
8084 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8085 goto done;
8086#ifdef DEBUG_PUSH
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: Parsing PI\n");
8089#endif
8090 xmlParsePI(ctxt);
8091 } else if ((cur == '<') && (next == '!') &&
8092 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8093 if ((!terminate) &&
8094 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8095 goto done;
8096#ifdef DEBUG_PUSH
8097 xmlGenericError(xmlGenericErrorContext,
8098 "PP: Parsing Comment\n");
8099#endif
8100 xmlParseComment(ctxt);
8101 ctxt->instate = XML_PARSER_MISC;
8102 } else if ((cur == '<') && (next == '!') &&
8103 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8104 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8105 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8106 (ctxt->input->cur[8] == 'E')) {
8107 if ((!terminate) &&
8108 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8109 goto done;
8110#ifdef DEBUG_PUSH
8111 xmlGenericError(xmlGenericErrorContext,
8112 "PP: Parsing internal subset\n");
8113#endif
8114 ctxt->inSubset = 1;
8115 xmlParseDocTypeDecl(ctxt);
8116 if (RAW == '[') {
8117 ctxt->instate = XML_PARSER_DTD;
8118#ifdef DEBUG_PUSH
8119 xmlGenericError(xmlGenericErrorContext,
8120 "PP: entering DTD\n");
8121#endif
8122 } else {
8123 /*
8124 * Create and update the external subset.
8125 */
8126 ctxt->inSubset = 2;
8127 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8128 (ctxt->sax->externalSubset != NULL))
8129 ctxt->sax->externalSubset(ctxt->userData,
8130 ctxt->intSubName, ctxt->extSubSystem,
8131 ctxt->extSubURI);
8132 ctxt->inSubset = 0;
8133 ctxt->instate = XML_PARSER_PROLOG;
8134#ifdef DEBUG_PUSH
8135 xmlGenericError(xmlGenericErrorContext,
8136 "PP: entering PROLOG\n");
8137#endif
8138 }
8139 } else if ((cur == '<') && (next == '!') &&
8140 (avail < 9)) {
8141 goto done;
8142 } else {
8143 ctxt->instate = XML_PARSER_START_TAG;
8144#ifdef DEBUG_PUSH
8145 xmlGenericError(xmlGenericErrorContext,
8146 "PP: entering START_TAG\n");
8147#endif
8148 }
8149 break;
8150 case XML_PARSER_IGNORE:
8151 xmlGenericError(xmlGenericErrorContext,
8152 "PP: internal error, state == IGNORE");
8153 ctxt->instate = XML_PARSER_DTD;
8154#ifdef DEBUG_PUSH
8155 xmlGenericError(xmlGenericErrorContext,
8156 "PP: entering DTD\n");
8157#endif
8158 break;
8159 case XML_PARSER_PROLOG:
8160 SKIP_BLANKS;
8161 if (ctxt->input->buf == NULL)
8162 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8163 else
8164 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8165 if (avail < 2)
8166 goto done;
8167 cur = ctxt->input->cur[0];
8168 next = ctxt->input->cur[1];
8169 if ((cur == '<') && (next == '?')) {
8170 if ((!terminate) &&
8171 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8172 goto done;
8173#ifdef DEBUG_PUSH
8174 xmlGenericError(xmlGenericErrorContext,
8175 "PP: Parsing PI\n");
8176#endif
8177 xmlParsePI(ctxt);
8178 } else if ((cur == '<') && (next == '!') &&
8179 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8180 if ((!terminate) &&
8181 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8182 goto done;
8183#ifdef DEBUG_PUSH
8184 xmlGenericError(xmlGenericErrorContext,
8185 "PP: Parsing Comment\n");
8186#endif
8187 xmlParseComment(ctxt);
8188 ctxt->instate = XML_PARSER_PROLOG;
8189 } else if ((cur == '<') && (next == '!') &&
8190 (avail < 4)) {
8191 goto done;
8192 } else {
8193 ctxt->instate = XML_PARSER_START_TAG;
8194#ifdef DEBUG_PUSH
8195 xmlGenericError(xmlGenericErrorContext,
8196 "PP: entering START_TAG\n");
8197#endif
8198 }
8199 break;
8200 case XML_PARSER_EPILOG:
8201 SKIP_BLANKS;
8202 if (ctxt->input->buf == NULL)
8203 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8204 else
8205 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8206 if (avail < 2)
8207 goto done;
8208 cur = ctxt->input->cur[0];
8209 next = ctxt->input->cur[1];
8210 if ((cur == '<') && (next == '?')) {
8211 if ((!terminate) &&
8212 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8213 goto done;
8214#ifdef DEBUG_PUSH
8215 xmlGenericError(xmlGenericErrorContext,
8216 "PP: Parsing PI\n");
8217#endif
8218 xmlParsePI(ctxt);
8219 ctxt->instate = XML_PARSER_EPILOG;
8220 } else if ((cur == '<') && (next == '!') &&
8221 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8222 if ((!terminate) &&
8223 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8224 goto done;
8225#ifdef DEBUG_PUSH
8226 xmlGenericError(xmlGenericErrorContext,
8227 "PP: Parsing Comment\n");
8228#endif
8229 xmlParseComment(ctxt);
8230 ctxt->instate = XML_PARSER_EPILOG;
8231 } else if ((cur == '<') && (next == '!') &&
8232 (avail < 4)) {
8233 goto done;
8234 } else {
8235 ctxt->errNo = XML_ERR_DOCUMENT_END;
8236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8237 ctxt->sax->error(ctxt->userData,
8238 "Extra content at the end of the document\n");
8239 ctxt->wellFormed = 0;
8240 ctxt->disableSAX = 1;
8241 ctxt->instate = XML_PARSER_EOF;
8242#ifdef DEBUG_PUSH
8243 xmlGenericError(xmlGenericErrorContext,
8244 "PP: entering EOF\n");
8245#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008246 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008247 ctxt->sax->endDocument(ctxt->userData);
8248 goto done;
8249 }
8250 break;
8251 case XML_PARSER_START_TAG: {
8252 xmlChar *name, *oldname;
8253
8254 if ((avail < 2) && (ctxt->inputNr == 1))
8255 goto done;
8256 cur = ctxt->input->cur[0];
8257 if (cur != '<') {
8258 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8259 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8260 ctxt->sax->error(ctxt->userData,
8261 "Start tag expect, '<' not found\n");
8262 ctxt->wellFormed = 0;
8263 ctxt->disableSAX = 1;
8264 ctxt->instate = XML_PARSER_EOF;
8265#ifdef DEBUG_PUSH
8266 xmlGenericError(xmlGenericErrorContext,
8267 "PP: entering EOF\n");
8268#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008269 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008270 ctxt->sax->endDocument(ctxt->userData);
8271 goto done;
8272 }
8273 if ((!terminate) &&
8274 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8275 goto done;
8276 if (ctxt->spaceNr == 0)
8277 spacePush(ctxt, -1);
8278 else
8279 spacePush(ctxt, *ctxt->space);
8280 name = xmlParseStartTag(ctxt);
8281 if (name == NULL) {
8282 spacePop(ctxt);
8283 ctxt->instate = XML_PARSER_EOF;
8284#ifdef DEBUG_PUSH
8285 xmlGenericError(xmlGenericErrorContext,
8286 "PP: entering EOF\n");
8287#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008288 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008289 ctxt->sax->endDocument(ctxt->userData);
8290 goto done;
8291 }
8292 namePush(ctxt, xmlStrdup(name));
8293
8294 /*
8295 * [ VC: Root Element Type ]
8296 * The Name in the document type declaration must match
8297 * the element type of the root element.
8298 */
8299 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8300 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8301 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8302
8303 /*
8304 * Check for an Empty Element.
8305 */
8306 if ((RAW == '/') && (NXT(1) == '>')) {
8307 SKIP(2);
8308 if ((ctxt->sax != NULL) &&
8309 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8310 ctxt->sax->endElement(ctxt->userData, name);
8311 xmlFree(name);
8312 oldname = namePop(ctxt);
8313 spacePop(ctxt);
8314 if (oldname != NULL) {
8315#ifdef DEBUG_STACK
8316 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8317#endif
8318 xmlFree(oldname);
8319 }
8320 if (ctxt->name == NULL) {
8321 ctxt->instate = XML_PARSER_EPILOG;
8322#ifdef DEBUG_PUSH
8323 xmlGenericError(xmlGenericErrorContext,
8324 "PP: entering EPILOG\n");
8325#endif
8326 } else {
8327 ctxt->instate = XML_PARSER_CONTENT;
8328#ifdef DEBUG_PUSH
8329 xmlGenericError(xmlGenericErrorContext,
8330 "PP: entering CONTENT\n");
8331#endif
8332 }
8333 break;
8334 }
8335 if (RAW == '>') {
8336 NEXT;
8337 } else {
8338 ctxt->errNo = XML_ERR_GT_REQUIRED;
8339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8340 ctxt->sax->error(ctxt->userData,
8341 "Couldn't find end of Start Tag %s\n",
8342 name);
8343 ctxt->wellFormed = 0;
8344 ctxt->disableSAX = 1;
8345
8346 /*
8347 * end of parsing of this node.
8348 */
8349 nodePop(ctxt);
8350 oldname = namePop(ctxt);
8351 spacePop(ctxt);
8352 if (oldname != NULL) {
8353#ifdef DEBUG_STACK
8354 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8355#endif
8356 xmlFree(oldname);
8357 }
8358 }
8359 xmlFree(name);
8360 ctxt->instate = XML_PARSER_CONTENT;
8361#ifdef DEBUG_PUSH
8362 xmlGenericError(xmlGenericErrorContext,
8363 "PP: entering CONTENT\n");
8364#endif
8365 break;
8366 }
8367 case XML_PARSER_CONTENT: {
8368 const xmlChar *test;
8369 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008370 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008371
8372 /*
8373 * Handle preparsed entities and charRef
8374 */
8375 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008376 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008377
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008378 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008379 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8380 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008381 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008382 ctxt->token = 0;
8383 }
8384 if ((avail < 2) && (ctxt->inputNr == 1))
8385 goto done;
8386 cur = ctxt->input->cur[0];
8387 next = ctxt->input->cur[1];
8388
8389 test = CUR_PTR;
8390 cons = ctxt->input->consumed;
8391 tok = ctxt->token;
8392 if ((cur == '<') && (next == '?')) {
8393 if ((!terminate) &&
8394 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8395 goto done;
8396#ifdef DEBUG_PUSH
8397 xmlGenericError(xmlGenericErrorContext,
8398 "PP: Parsing PI\n");
8399#endif
8400 xmlParsePI(ctxt);
8401 } else if ((cur == '<') && (next == '!') &&
8402 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8403 if ((!terminate) &&
8404 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8405 goto done;
8406#ifdef DEBUG_PUSH
8407 xmlGenericError(xmlGenericErrorContext,
8408 "PP: Parsing Comment\n");
8409#endif
8410 xmlParseComment(ctxt);
8411 ctxt->instate = XML_PARSER_CONTENT;
8412 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8413 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8414 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8415 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8416 (ctxt->input->cur[8] == '[')) {
8417 SKIP(9);
8418 ctxt->instate = XML_PARSER_CDATA_SECTION;
8419#ifdef DEBUG_PUSH
8420 xmlGenericError(xmlGenericErrorContext,
8421 "PP: entering CDATA_SECTION\n");
8422#endif
8423 break;
8424 } else if ((cur == '<') && (next == '!') &&
8425 (avail < 9)) {
8426 goto done;
8427 } else if ((cur == '<') && (next == '/')) {
8428 ctxt->instate = XML_PARSER_END_TAG;
8429#ifdef DEBUG_PUSH
8430 xmlGenericError(xmlGenericErrorContext,
8431 "PP: entering END_TAG\n");
8432#endif
8433 break;
8434 } else if (cur == '<') {
8435 ctxt->instate = XML_PARSER_START_TAG;
8436#ifdef DEBUG_PUSH
8437 xmlGenericError(xmlGenericErrorContext,
8438 "PP: entering START_TAG\n");
8439#endif
8440 break;
8441 } else if (cur == '&') {
8442 if ((!terminate) &&
8443 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8444 goto done;
8445#ifdef DEBUG_PUSH
8446 xmlGenericError(xmlGenericErrorContext,
8447 "PP: Parsing Reference\n");
8448#endif
8449 xmlParseReference(ctxt);
8450 } else {
8451 /* TODO Avoid the extra copy, handle directly !!! */
8452 /*
8453 * Goal of the following test is:
8454 * - minimize calls to the SAX 'character' callback
8455 * when they are mergeable
8456 * - handle an problem for isBlank when we only parse
8457 * a sequence of blank chars and the next one is
8458 * not available to check against '<' presence.
8459 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008460 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008461 * of the parser.
8462 */
8463 if ((ctxt->inputNr == 1) &&
8464 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8465 if ((!terminate) &&
8466 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8467 goto done;
8468 }
8469 ctxt->checkIndex = 0;
8470#ifdef DEBUG_PUSH
8471 xmlGenericError(xmlGenericErrorContext,
8472 "PP: Parsing char data\n");
8473#endif
8474 xmlParseCharData(ctxt, 0);
8475 }
8476 /*
8477 * Pop-up of finished entities.
8478 */
8479 while ((RAW == 0) && (ctxt->inputNr > 1))
8480 xmlPopInput(ctxt);
8481 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8482 (tok == ctxt->token)) {
8483 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8485 ctxt->sax->error(ctxt->userData,
8486 "detected an error in element content\n");
8487 ctxt->wellFormed = 0;
8488 ctxt->disableSAX = 1;
8489 ctxt->instate = XML_PARSER_EOF;
8490 break;
8491 }
8492 break;
8493 }
8494 case XML_PARSER_CDATA_SECTION: {
8495 /*
8496 * The Push mode need to have the SAX callback for
8497 * cdataBlock merge back contiguous callbacks.
8498 */
8499 int base;
8500
8501 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8502 if (base < 0) {
8503 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8504 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8505 if (ctxt->sax->cdataBlock != NULL)
8506 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8507 XML_PARSER_BIG_BUFFER_SIZE);
8508 }
8509 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8510 ctxt->checkIndex = 0;
8511 }
8512 goto done;
8513 } else {
8514 if ((ctxt->sax != NULL) && (base > 0) &&
8515 (!ctxt->disableSAX)) {
8516 if (ctxt->sax->cdataBlock != NULL)
8517 ctxt->sax->cdataBlock(ctxt->userData,
8518 ctxt->input->cur, base);
8519 }
8520 SKIP(base + 3);
8521 ctxt->checkIndex = 0;
8522 ctxt->instate = XML_PARSER_CONTENT;
8523#ifdef DEBUG_PUSH
8524 xmlGenericError(xmlGenericErrorContext,
8525 "PP: entering CONTENT\n");
8526#endif
8527 }
8528 break;
8529 }
8530 case XML_PARSER_END_TAG:
8531 if (avail < 2)
8532 goto done;
8533 if ((!terminate) &&
8534 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8535 goto done;
8536 xmlParseEndTag(ctxt);
8537 if (ctxt->name == NULL) {
8538 ctxt->instate = XML_PARSER_EPILOG;
8539#ifdef DEBUG_PUSH
8540 xmlGenericError(xmlGenericErrorContext,
8541 "PP: entering EPILOG\n");
8542#endif
8543 } else {
8544 ctxt->instate = XML_PARSER_CONTENT;
8545#ifdef DEBUG_PUSH
8546 xmlGenericError(xmlGenericErrorContext,
8547 "PP: entering CONTENT\n");
8548#endif
8549 }
8550 break;
8551 case XML_PARSER_DTD: {
8552 /*
8553 * Sorry but progressive parsing of the internal subset
8554 * is not expected to be supported. We first check that
8555 * the full content of the internal subset is available and
8556 * the parsing is launched only at that point.
8557 * Internal subset ends up with "']' S? '>'" in an unescaped
8558 * section and not in a ']]>' sequence which are conditional
8559 * sections (whoever argued to keep that crap in XML deserve
8560 * a place in hell !).
8561 */
8562 int base, i;
8563 xmlChar *buf;
8564 xmlChar quote = 0;
8565
8566 base = ctxt->input->cur - ctxt->input->base;
8567 if (base < 0) return(0);
8568 if (ctxt->checkIndex > base)
8569 base = ctxt->checkIndex;
8570 buf = ctxt->input->buf->buffer->content;
8571 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8572 base++) {
8573 if (quote != 0) {
8574 if (buf[base] == quote)
8575 quote = 0;
8576 continue;
8577 }
8578 if (buf[base] == '"') {
8579 quote = '"';
8580 continue;
8581 }
8582 if (buf[base] == '\'') {
8583 quote = '\'';
8584 continue;
8585 }
8586 if (buf[base] == ']') {
8587 if ((unsigned int) base +1 >=
8588 ctxt->input->buf->buffer->use)
8589 break;
8590 if (buf[base + 1] == ']') {
8591 /* conditional crap, skip both ']' ! */
8592 base++;
8593 continue;
8594 }
8595 for (i = 0;
8596 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8597 i++) {
8598 if (buf[base + i] == '>')
8599 goto found_end_int_subset;
8600 }
8601 break;
8602 }
8603 }
8604 /*
8605 * We didn't found the end of the Internal subset
8606 */
8607 if (quote == 0)
8608 ctxt->checkIndex = base;
8609#ifdef DEBUG_PUSH
8610 if (next == 0)
8611 xmlGenericError(xmlGenericErrorContext,
8612 "PP: lookup of int subset end filed\n");
8613#endif
8614 goto done;
8615
8616found_end_int_subset:
8617 xmlParseInternalSubset(ctxt);
8618 ctxt->inSubset = 2;
8619 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8620 (ctxt->sax->externalSubset != NULL))
8621 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8622 ctxt->extSubSystem, ctxt->extSubURI);
8623 ctxt->inSubset = 0;
8624 ctxt->instate = XML_PARSER_PROLOG;
8625 ctxt->checkIndex = 0;
8626#ifdef DEBUG_PUSH
8627 xmlGenericError(xmlGenericErrorContext,
8628 "PP: entering PROLOG\n");
8629#endif
8630 break;
8631 }
8632 case XML_PARSER_COMMENT:
8633 xmlGenericError(xmlGenericErrorContext,
8634 "PP: internal error, state == COMMENT\n");
8635 ctxt->instate = XML_PARSER_CONTENT;
8636#ifdef DEBUG_PUSH
8637 xmlGenericError(xmlGenericErrorContext,
8638 "PP: entering CONTENT\n");
8639#endif
8640 break;
8641 case XML_PARSER_PI:
8642 xmlGenericError(xmlGenericErrorContext,
8643 "PP: internal error, state == PI\n");
8644 ctxt->instate = XML_PARSER_CONTENT;
8645#ifdef DEBUG_PUSH
8646 xmlGenericError(xmlGenericErrorContext,
8647 "PP: entering CONTENT\n");
8648#endif
8649 break;
8650 case XML_PARSER_ENTITY_DECL:
8651 xmlGenericError(xmlGenericErrorContext,
8652 "PP: internal error, state == ENTITY_DECL\n");
8653 ctxt->instate = XML_PARSER_DTD;
8654#ifdef DEBUG_PUSH
8655 xmlGenericError(xmlGenericErrorContext,
8656 "PP: entering DTD\n");
8657#endif
8658 break;
8659 case XML_PARSER_ENTITY_VALUE:
8660 xmlGenericError(xmlGenericErrorContext,
8661 "PP: internal error, state == ENTITY_VALUE\n");
8662 ctxt->instate = XML_PARSER_CONTENT;
8663#ifdef DEBUG_PUSH
8664 xmlGenericError(xmlGenericErrorContext,
8665 "PP: entering DTD\n");
8666#endif
8667 break;
8668 case XML_PARSER_ATTRIBUTE_VALUE:
8669 xmlGenericError(xmlGenericErrorContext,
8670 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8671 ctxt->instate = XML_PARSER_START_TAG;
8672#ifdef DEBUG_PUSH
8673 xmlGenericError(xmlGenericErrorContext,
8674 "PP: entering START_TAG\n");
8675#endif
8676 break;
8677 case XML_PARSER_SYSTEM_LITERAL:
8678 xmlGenericError(xmlGenericErrorContext,
8679 "PP: internal error, state == SYSTEM_LITERAL\n");
8680 ctxt->instate = XML_PARSER_START_TAG;
8681#ifdef DEBUG_PUSH
8682 xmlGenericError(xmlGenericErrorContext,
8683 "PP: entering START_TAG\n");
8684#endif
8685 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008686 case XML_PARSER_PUBLIC_LITERAL:
8687 xmlGenericError(xmlGenericErrorContext,
8688 "PP: internal error, state == PUBLIC_LITERAL\n");
8689 ctxt->instate = XML_PARSER_START_TAG;
8690#ifdef DEBUG_PUSH
8691 xmlGenericError(xmlGenericErrorContext,
8692 "PP: entering START_TAG\n");
8693#endif
8694 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008695 }
8696 }
8697done:
8698#ifdef DEBUG_PUSH
8699 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8700#endif
8701 return(ret);
8702}
8703
8704/**
Owen Taylor3473f882001-02-23 17:55:21 +00008705 * xmlParseChunk:
8706 * @ctxt: an XML parser context
8707 * @chunk: an char array
8708 * @size: the size in byte of the chunk
8709 * @terminate: last chunk indicator
8710 *
8711 * Parse a Chunk of memory
8712 *
8713 * Returns zero if no error, the xmlParserErrors otherwise.
8714 */
8715int
8716xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8717 int terminate) {
8718 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8719 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8720 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8721 int cur = ctxt->input->cur - ctxt->input->base;
8722
8723 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8724 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8725 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008726 ctxt->input->end =
8727 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008728#ifdef DEBUG_PUSH
8729 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8730#endif
8731
8732 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8733 xmlParseTryOrFinish(ctxt, terminate);
8734 } else if (ctxt->instate != XML_PARSER_EOF) {
8735 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8736 xmlParserInputBufferPtr in = ctxt->input->buf;
8737 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8738 (in->raw != NULL)) {
8739 int nbchars;
8740
8741 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8742 if (nbchars < 0) {
8743 xmlGenericError(xmlGenericErrorContext,
8744 "xmlParseChunk: encoder error\n");
8745 return(XML_ERR_INVALID_ENCODING);
8746 }
8747 }
8748 }
8749 }
8750 xmlParseTryOrFinish(ctxt, terminate);
8751 if (terminate) {
8752 /*
8753 * Check for termination
8754 */
8755 if ((ctxt->instate != XML_PARSER_EOF) &&
8756 (ctxt->instate != XML_PARSER_EPILOG)) {
8757 ctxt->errNo = XML_ERR_DOCUMENT_END;
8758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8759 ctxt->sax->error(ctxt->userData,
8760 "Extra content at the end of the document\n");
8761 ctxt->wellFormed = 0;
8762 ctxt->disableSAX = 1;
8763 }
8764 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008765 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008766 ctxt->sax->endDocument(ctxt->userData);
8767 }
8768 ctxt->instate = XML_PARSER_EOF;
8769 }
8770 return((xmlParserErrors) ctxt->errNo);
8771}
8772
8773/************************************************************************
8774 * *
8775 * I/O front end functions to the parser *
8776 * *
8777 ************************************************************************/
8778
8779/**
8780 * xmlStopParser:
8781 * @ctxt: an XML parser context
8782 *
8783 * Blocks further parser processing
8784 */
8785void
8786xmlStopParser(xmlParserCtxtPtr ctxt) {
8787 ctxt->instate = XML_PARSER_EOF;
8788 if (ctxt->input != NULL)
8789 ctxt->input->cur = BAD_CAST"";
8790}
8791
8792/**
8793 * xmlCreatePushParserCtxt:
8794 * @sax: a SAX handler
8795 * @user_data: The user data returned on SAX callbacks
8796 * @chunk: a pointer to an array of chars
8797 * @size: number of chars in the array
8798 * @filename: an optional file name or URI
8799 *
8800 * Create a parser context for using the XML parser in push mode
8801 * To allow content encoding detection, @size should be >= 4
8802 * The value of @filename is used for fetching external entities
8803 * and error/warning reports.
8804 *
8805 * Returns the new parser context or NULL
8806 */
8807xmlParserCtxtPtr
8808xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8809 const char *chunk, int size, const char *filename) {
8810 xmlParserCtxtPtr ctxt;
8811 xmlParserInputPtr inputStream;
8812 xmlParserInputBufferPtr buf;
8813 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8814
8815 /*
8816 * plug some encoding conversion routines
8817 */
8818 if ((chunk != NULL) && (size >= 4))
8819 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8820
8821 buf = xmlAllocParserInputBuffer(enc);
8822 if (buf == NULL) return(NULL);
8823
8824 ctxt = xmlNewParserCtxt();
8825 if (ctxt == NULL) {
8826 xmlFree(buf);
8827 return(NULL);
8828 }
8829 if (sax != NULL) {
8830 if (ctxt->sax != &xmlDefaultSAXHandler)
8831 xmlFree(ctxt->sax);
8832 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8833 if (ctxt->sax == NULL) {
8834 xmlFree(buf);
8835 xmlFree(ctxt);
8836 return(NULL);
8837 }
8838 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8839 if (user_data != NULL)
8840 ctxt->userData = user_data;
8841 }
8842 if (filename == NULL) {
8843 ctxt->directory = NULL;
8844 } else {
8845 ctxt->directory = xmlParserGetDirectory(filename);
8846 }
8847
8848 inputStream = xmlNewInputStream(ctxt);
8849 if (inputStream == NULL) {
8850 xmlFreeParserCtxt(ctxt);
8851 return(NULL);
8852 }
8853
8854 if (filename == NULL)
8855 inputStream->filename = NULL;
8856 else
8857 inputStream->filename = xmlMemStrdup(filename);
8858 inputStream->buf = buf;
8859 inputStream->base = inputStream->buf->buffer->content;
8860 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008861 inputStream->end =
8862 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008863
8864 inputPush(ctxt, inputStream);
8865
8866 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8867 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008868 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8869 int cur = ctxt->input->cur - ctxt->input->base;
8870
Owen Taylor3473f882001-02-23 17:55:21 +00008871 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008872
8873 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8874 ctxt->input->cur = ctxt->input->base + cur;
8875 ctxt->input->end =
8876 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008877#ifdef DEBUG_PUSH
8878 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8879#endif
8880 }
8881
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008882 if (enc != XML_CHAR_ENCODING_NONE) {
8883 xmlSwitchEncoding(ctxt, enc);
8884 }
8885
Owen Taylor3473f882001-02-23 17:55:21 +00008886 return(ctxt);
8887}
8888
8889/**
8890 * xmlCreateIOParserCtxt:
8891 * @sax: a SAX handler
8892 * @user_data: The user data returned on SAX callbacks
8893 * @ioread: an I/O read function
8894 * @ioclose: an I/O close function
8895 * @ioctx: an I/O handler
8896 * @enc: the charset encoding if known
8897 *
8898 * Create a parser context for using the XML parser with an existing
8899 * I/O stream
8900 *
8901 * Returns the new parser context or NULL
8902 */
8903xmlParserCtxtPtr
8904xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8905 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8906 void *ioctx, xmlCharEncoding enc) {
8907 xmlParserCtxtPtr ctxt;
8908 xmlParserInputPtr inputStream;
8909 xmlParserInputBufferPtr buf;
8910
8911 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8912 if (buf == NULL) return(NULL);
8913
8914 ctxt = xmlNewParserCtxt();
8915 if (ctxt == NULL) {
8916 xmlFree(buf);
8917 return(NULL);
8918 }
8919 if (sax != NULL) {
8920 if (ctxt->sax != &xmlDefaultSAXHandler)
8921 xmlFree(ctxt->sax);
8922 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8923 if (ctxt->sax == NULL) {
8924 xmlFree(buf);
8925 xmlFree(ctxt);
8926 return(NULL);
8927 }
8928 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8929 if (user_data != NULL)
8930 ctxt->userData = user_data;
8931 }
8932
8933 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8934 if (inputStream == NULL) {
8935 xmlFreeParserCtxt(ctxt);
8936 return(NULL);
8937 }
8938 inputPush(ctxt, inputStream);
8939
8940 return(ctxt);
8941}
8942
8943/************************************************************************
8944 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008945 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00008946 * *
8947 ************************************************************************/
8948
8949/**
8950 * xmlIOParseDTD:
8951 * @sax: the SAX handler block or NULL
8952 * @input: an Input Buffer
8953 * @enc: the charset encoding if known
8954 *
8955 * Load and parse a DTD
8956 *
8957 * Returns the resulting xmlDtdPtr or NULL in case of error.
8958 * @input will be freed at parsing end.
8959 */
8960
8961xmlDtdPtr
8962xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8963 xmlCharEncoding enc) {
8964 xmlDtdPtr ret = NULL;
8965 xmlParserCtxtPtr ctxt;
8966 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008967 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008968
8969 if (input == NULL)
8970 return(NULL);
8971
8972 ctxt = xmlNewParserCtxt();
8973 if (ctxt == NULL) {
8974 return(NULL);
8975 }
8976
8977 /*
8978 * Set-up the SAX context
8979 */
8980 if (sax != NULL) {
8981 if (ctxt->sax != NULL)
8982 xmlFree(ctxt->sax);
8983 ctxt->sax = sax;
8984 ctxt->userData = NULL;
8985 }
8986
8987 /*
8988 * generate a parser input from the I/O handler
8989 */
8990
8991 pinput = xmlNewIOInputStream(ctxt, input, enc);
8992 if (pinput == NULL) {
8993 if (sax != NULL) ctxt->sax = NULL;
8994 xmlFreeParserCtxt(ctxt);
8995 return(NULL);
8996 }
8997
8998 /*
8999 * plug some encoding conversion routines here.
9000 */
9001 xmlPushInput(ctxt, pinput);
9002
9003 pinput->filename = NULL;
9004 pinput->line = 1;
9005 pinput->col = 1;
9006 pinput->base = ctxt->input->cur;
9007 pinput->cur = ctxt->input->cur;
9008 pinput->free = NULL;
9009
9010 /*
9011 * let's parse that entity knowing it's an external subset.
9012 */
9013 ctxt->inSubset = 2;
9014 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9015 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9016 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009017
9018 if (enc == XML_CHAR_ENCODING_NONE) {
9019 /*
9020 * Get the 4 first bytes and decode the charset
9021 * if enc != XML_CHAR_ENCODING_NONE
9022 * plug some encoding conversion routines.
9023 */
9024 start[0] = RAW;
9025 start[1] = NXT(1);
9026 start[2] = NXT(2);
9027 start[3] = NXT(3);
9028 enc = xmlDetectCharEncoding(start, 4);
9029 if (enc != XML_CHAR_ENCODING_NONE) {
9030 xmlSwitchEncoding(ctxt, enc);
9031 }
9032 }
9033
Owen Taylor3473f882001-02-23 17:55:21 +00009034 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9035
9036 if (ctxt->myDoc != NULL) {
9037 if (ctxt->wellFormed) {
9038 ret = ctxt->myDoc->extSubset;
9039 ctxt->myDoc->extSubset = NULL;
9040 } else {
9041 ret = NULL;
9042 }
9043 xmlFreeDoc(ctxt->myDoc);
9044 ctxt->myDoc = NULL;
9045 }
9046 if (sax != NULL) ctxt->sax = NULL;
9047 xmlFreeParserCtxt(ctxt);
9048
9049 return(ret);
9050}
9051
9052/**
9053 * xmlSAXParseDTD:
9054 * @sax: the SAX handler block
9055 * @ExternalID: a NAME* containing the External ID of the DTD
9056 * @SystemID: a NAME* containing the URL to the DTD
9057 *
9058 * Load and parse an external subset.
9059 *
9060 * Returns the resulting xmlDtdPtr or NULL in case of error.
9061 */
9062
9063xmlDtdPtr
9064xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9065 const xmlChar *SystemID) {
9066 xmlDtdPtr ret = NULL;
9067 xmlParserCtxtPtr ctxt;
9068 xmlParserInputPtr input = NULL;
9069 xmlCharEncoding enc;
9070
9071 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9072
9073 ctxt = xmlNewParserCtxt();
9074 if (ctxt == NULL) {
9075 return(NULL);
9076 }
9077
9078 /*
9079 * Set-up the SAX context
9080 */
9081 if (sax != NULL) {
9082 if (ctxt->sax != NULL)
9083 xmlFree(ctxt->sax);
9084 ctxt->sax = sax;
9085 ctxt->userData = NULL;
9086 }
9087
9088 /*
9089 * Ask the Entity resolver to load the damn thing
9090 */
9091
9092 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9093 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9094 if (input == NULL) {
9095 if (sax != NULL) ctxt->sax = NULL;
9096 xmlFreeParserCtxt(ctxt);
9097 return(NULL);
9098 }
9099
9100 /*
9101 * plug some encoding conversion routines here.
9102 */
9103 xmlPushInput(ctxt, input);
9104 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9105 xmlSwitchEncoding(ctxt, enc);
9106
9107 if (input->filename == NULL)
9108 input->filename = (char *) xmlStrdup(SystemID);
9109 input->line = 1;
9110 input->col = 1;
9111 input->base = ctxt->input->cur;
9112 input->cur = ctxt->input->cur;
9113 input->free = NULL;
9114
9115 /*
9116 * let's parse that entity knowing it's an external subset.
9117 */
9118 ctxt->inSubset = 2;
9119 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9120 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9121 ExternalID, SystemID);
9122 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9123
9124 if (ctxt->myDoc != NULL) {
9125 if (ctxt->wellFormed) {
9126 ret = ctxt->myDoc->extSubset;
9127 ctxt->myDoc->extSubset = NULL;
9128 } else {
9129 ret = NULL;
9130 }
9131 xmlFreeDoc(ctxt->myDoc);
9132 ctxt->myDoc = NULL;
9133 }
9134 if (sax != NULL) ctxt->sax = NULL;
9135 xmlFreeParserCtxt(ctxt);
9136
9137 return(ret);
9138}
9139
9140/**
9141 * xmlParseDTD:
9142 * @ExternalID: a NAME* containing the External ID of the DTD
9143 * @SystemID: a NAME* containing the URL to the DTD
9144 *
9145 * Load and parse an external subset.
9146 *
9147 * Returns the resulting xmlDtdPtr or NULL in case of error.
9148 */
9149
9150xmlDtdPtr
9151xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9152 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9153}
9154
9155/************************************************************************
9156 * *
9157 * Front ends when parsing an Entity *
9158 * *
9159 ************************************************************************/
9160
9161/**
Owen Taylor3473f882001-02-23 17:55:21 +00009162 * xmlParseCtxtExternalEntity:
9163 * @ctx: the existing parsing context
9164 * @URL: the URL for the entity to load
9165 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009166 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009167 *
9168 * Parse an external general entity within an existing parsing context
9169 * An external general parsed entity is well-formed if it matches the
9170 * production labeled extParsedEnt.
9171 *
9172 * [78] extParsedEnt ::= TextDecl? content
9173 *
9174 * Returns 0 if the entity is well formed, -1 in case of args problem and
9175 * the parser error code otherwise
9176 */
9177
9178int
9179xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009180 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009181 xmlParserCtxtPtr ctxt;
9182 xmlDocPtr newDoc;
9183 xmlSAXHandlerPtr oldsax = NULL;
9184 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009185 xmlChar start[4];
9186 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009187
9188 if (ctx->depth > 40) {
9189 return(XML_ERR_ENTITY_LOOP);
9190 }
9191
Daniel Veillardcda96922001-08-21 10:56:31 +00009192 if (lst != NULL)
9193 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009194 if ((URL == NULL) && (ID == NULL))
9195 return(-1);
9196 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9197 return(-1);
9198
9199
9200 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9201 if (ctxt == NULL) return(-1);
9202 ctxt->userData = ctxt;
9203 oldsax = ctxt->sax;
9204 ctxt->sax = ctx->sax;
9205 newDoc = xmlNewDoc(BAD_CAST "1.0");
9206 if (newDoc == NULL) {
9207 xmlFreeParserCtxt(ctxt);
9208 return(-1);
9209 }
9210 if (ctx->myDoc != NULL) {
9211 newDoc->intSubset = ctx->myDoc->intSubset;
9212 newDoc->extSubset = ctx->myDoc->extSubset;
9213 }
9214 if (ctx->myDoc->URL != NULL) {
9215 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9216 }
9217 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9218 if (newDoc->children == NULL) {
9219 ctxt->sax = oldsax;
9220 xmlFreeParserCtxt(ctxt);
9221 newDoc->intSubset = NULL;
9222 newDoc->extSubset = NULL;
9223 xmlFreeDoc(newDoc);
9224 return(-1);
9225 }
9226 nodePush(ctxt, newDoc->children);
9227 if (ctx->myDoc == NULL) {
9228 ctxt->myDoc = newDoc;
9229 } else {
9230 ctxt->myDoc = ctx->myDoc;
9231 newDoc->children->doc = ctx->myDoc;
9232 }
9233
Daniel Veillard87a764e2001-06-20 17:41:10 +00009234 /*
9235 * Get the 4 first bytes and decode the charset
9236 * if enc != XML_CHAR_ENCODING_NONE
9237 * plug some encoding conversion routines.
9238 */
9239 GROW
9240 start[0] = RAW;
9241 start[1] = NXT(1);
9242 start[2] = NXT(2);
9243 start[3] = NXT(3);
9244 enc = xmlDetectCharEncoding(start, 4);
9245 if (enc != XML_CHAR_ENCODING_NONE) {
9246 xmlSwitchEncoding(ctxt, enc);
9247 }
9248
Owen Taylor3473f882001-02-23 17:55:21 +00009249 /*
9250 * Parse a possible text declaration first
9251 */
Owen Taylor3473f882001-02-23 17:55:21 +00009252 if ((RAW == '<') && (NXT(1) == '?') &&
9253 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9254 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9255 xmlParseTextDecl(ctxt);
9256 }
9257
9258 /*
9259 * Doing validity checking on chunk doesn't make sense
9260 */
9261 ctxt->instate = XML_PARSER_CONTENT;
9262 ctxt->validate = ctx->validate;
9263 ctxt->loadsubset = ctx->loadsubset;
9264 ctxt->depth = ctx->depth + 1;
9265 ctxt->replaceEntities = ctx->replaceEntities;
9266 if (ctxt->validate) {
9267 ctxt->vctxt.error = ctx->vctxt.error;
9268 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009269 } else {
9270 ctxt->vctxt.error = NULL;
9271 ctxt->vctxt.warning = NULL;
9272 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009273 ctxt->vctxt.nodeTab = NULL;
9274 ctxt->vctxt.nodeNr = 0;
9275 ctxt->vctxt.nodeMax = 0;
9276 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009277
9278 xmlParseContent(ctxt);
9279
9280 if ((RAW == '<') && (NXT(1) == '/')) {
9281 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9282 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9283 ctxt->sax->error(ctxt->userData,
9284 "chunk is not well balanced\n");
9285 ctxt->wellFormed = 0;
9286 ctxt->disableSAX = 1;
9287 } else if (RAW != 0) {
9288 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9290 ctxt->sax->error(ctxt->userData,
9291 "extra content at the end of well balanced chunk\n");
9292 ctxt->wellFormed = 0;
9293 ctxt->disableSAX = 1;
9294 }
9295 if (ctxt->node != newDoc->children) {
9296 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9298 ctxt->sax->error(ctxt->userData,
9299 "chunk is not well balanced\n");
9300 ctxt->wellFormed = 0;
9301 ctxt->disableSAX = 1;
9302 }
9303
9304 if (!ctxt->wellFormed) {
9305 if (ctxt->errNo == 0)
9306 ret = 1;
9307 else
9308 ret = ctxt->errNo;
9309 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009310 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009311 xmlNodePtr cur;
9312
9313 /*
9314 * Return the newly created nodeset after unlinking it from
9315 * they pseudo parent.
9316 */
9317 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009318 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009319 while (cur != NULL) {
9320 cur->parent = NULL;
9321 cur = cur->next;
9322 }
9323 newDoc->children->children = NULL;
9324 }
9325 ret = 0;
9326 }
9327 ctxt->sax = oldsax;
9328 xmlFreeParserCtxt(ctxt);
9329 newDoc->intSubset = NULL;
9330 newDoc->extSubset = NULL;
9331 xmlFreeDoc(newDoc);
9332
9333 return(ret);
9334}
9335
9336/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009337 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009338 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009339 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009340 * @sax: the SAX handler bloc (possibly NULL)
9341 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9342 * @depth: Used for loop detection, use 0
9343 * @URL: the URL for the entity to load
9344 * @ID: the System ID for the entity to load
9345 * @list: the return value for the set of parsed nodes
9346 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009347 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009348 *
9349 * Returns 0 if the entity is well formed, -1 in case of args problem and
9350 * the parser error code otherwise
9351 */
9352
Daniel Veillard257d9102001-05-08 10:41:44 +00009353static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009354xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9355 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009356 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009357 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009358 xmlParserCtxtPtr ctxt;
9359 xmlDocPtr newDoc;
9360 xmlSAXHandlerPtr oldsax = NULL;
9361 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009362 xmlChar start[4];
9363 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009364
9365 if (depth > 40) {
9366 return(XML_ERR_ENTITY_LOOP);
9367 }
9368
9369
9370
9371 if (list != NULL)
9372 *list = NULL;
9373 if ((URL == NULL) && (ID == NULL))
9374 return(-1);
9375 if (doc == NULL) /* @@ relax but check for dereferences */
9376 return(-1);
9377
9378
9379 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9380 if (ctxt == NULL) return(-1);
9381 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009382 if (oldctxt != NULL) {
9383 ctxt->_private = oldctxt->_private;
9384 ctxt->loadsubset = oldctxt->loadsubset;
9385 ctxt->validate = oldctxt->validate;
9386 ctxt->external = oldctxt->external;
9387 } else {
9388 /*
9389 * Doing validity checking on chunk without context
9390 * doesn't make sense
9391 */
9392 ctxt->_private = NULL;
9393 ctxt->validate = 0;
9394 ctxt->external = 2;
9395 ctxt->loadsubset = 0;
9396 }
Owen Taylor3473f882001-02-23 17:55:21 +00009397 if (sax != NULL) {
9398 oldsax = ctxt->sax;
9399 ctxt->sax = sax;
9400 if (user_data != NULL)
9401 ctxt->userData = user_data;
9402 }
9403 newDoc = xmlNewDoc(BAD_CAST "1.0");
9404 if (newDoc == NULL) {
9405 xmlFreeParserCtxt(ctxt);
9406 return(-1);
9407 }
9408 if (doc != NULL) {
9409 newDoc->intSubset = doc->intSubset;
9410 newDoc->extSubset = doc->extSubset;
9411 }
9412 if (doc->URL != NULL) {
9413 newDoc->URL = xmlStrdup(doc->URL);
9414 }
9415 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9416 if (newDoc->children == NULL) {
9417 if (sax != NULL)
9418 ctxt->sax = oldsax;
9419 xmlFreeParserCtxt(ctxt);
9420 newDoc->intSubset = NULL;
9421 newDoc->extSubset = NULL;
9422 xmlFreeDoc(newDoc);
9423 return(-1);
9424 }
9425 nodePush(ctxt, newDoc->children);
9426 if (doc == NULL) {
9427 ctxt->myDoc = newDoc;
9428 } else {
9429 ctxt->myDoc = doc;
9430 newDoc->children->doc = doc;
9431 }
9432
Daniel Veillard87a764e2001-06-20 17:41:10 +00009433 /*
9434 * Get the 4 first bytes and decode the charset
9435 * if enc != XML_CHAR_ENCODING_NONE
9436 * plug some encoding conversion routines.
9437 */
9438 GROW;
9439 start[0] = RAW;
9440 start[1] = NXT(1);
9441 start[2] = NXT(2);
9442 start[3] = NXT(3);
9443 enc = xmlDetectCharEncoding(start, 4);
9444 if (enc != XML_CHAR_ENCODING_NONE) {
9445 xmlSwitchEncoding(ctxt, enc);
9446 }
9447
Owen Taylor3473f882001-02-23 17:55:21 +00009448 /*
9449 * Parse a possible text declaration first
9450 */
Owen Taylor3473f882001-02-23 17:55:21 +00009451 if ((RAW == '<') && (NXT(1) == '?') &&
9452 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9453 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9454 xmlParseTextDecl(ctxt);
9455 }
9456
Owen Taylor3473f882001-02-23 17:55:21 +00009457 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009458 ctxt->depth = depth;
9459
9460 xmlParseContent(ctxt);
9461
9462 if ((RAW == '<') && (NXT(1) == '/')) {
9463 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9465 ctxt->sax->error(ctxt->userData,
9466 "chunk is not well balanced\n");
9467 ctxt->wellFormed = 0;
9468 ctxt->disableSAX = 1;
9469 } else if (RAW != 0) {
9470 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9472 ctxt->sax->error(ctxt->userData,
9473 "extra content at the end of well balanced chunk\n");
9474 ctxt->wellFormed = 0;
9475 ctxt->disableSAX = 1;
9476 }
9477 if (ctxt->node != newDoc->children) {
9478 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9480 ctxt->sax->error(ctxt->userData,
9481 "chunk is not well balanced\n");
9482 ctxt->wellFormed = 0;
9483 ctxt->disableSAX = 1;
9484 }
9485
9486 if (!ctxt->wellFormed) {
9487 if (ctxt->errNo == 0)
9488 ret = 1;
9489 else
9490 ret = ctxt->errNo;
9491 } else {
9492 if (list != NULL) {
9493 xmlNodePtr cur;
9494
9495 /*
9496 * Return the newly created nodeset after unlinking it from
9497 * they pseudo parent.
9498 */
9499 cur = newDoc->children->children;
9500 *list = cur;
9501 while (cur != NULL) {
9502 cur->parent = NULL;
9503 cur = cur->next;
9504 }
9505 newDoc->children->children = NULL;
9506 }
9507 ret = 0;
9508 }
9509 if (sax != NULL)
9510 ctxt->sax = oldsax;
9511 xmlFreeParserCtxt(ctxt);
9512 newDoc->intSubset = NULL;
9513 newDoc->extSubset = NULL;
9514 xmlFreeDoc(newDoc);
9515
9516 return(ret);
9517}
9518
9519/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009520 * xmlParseExternalEntity:
9521 * @doc: the document the chunk pertains to
9522 * @sax: the SAX handler bloc (possibly NULL)
9523 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9524 * @depth: Used for loop detection, use 0
9525 * @URL: the URL for the entity to load
9526 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009527 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009528 *
9529 * Parse an external general entity
9530 * An external general parsed entity is well-formed if it matches the
9531 * production labeled extParsedEnt.
9532 *
9533 * [78] extParsedEnt ::= TextDecl? content
9534 *
9535 * Returns 0 if the entity is well formed, -1 in case of args problem and
9536 * the parser error code otherwise
9537 */
9538
9539int
9540xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009541 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009542 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009543 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009544}
9545
9546/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009547 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009548 * @doc: the document the chunk pertains to
9549 * @sax: the SAX handler bloc (possibly NULL)
9550 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9551 * @depth: Used for loop detection, use 0
9552 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009553 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009554 *
9555 * Parse a well-balanced chunk of an XML document
9556 * called by the parser
9557 * The allowed sequence for the Well Balanced Chunk is the one defined by
9558 * the content production in the XML grammar:
9559 *
9560 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9561 *
9562 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9563 * the parser error code otherwise
9564 */
9565
9566int
9567xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009568 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009569 xmlParserCtxtPtr ctxt;
9570 xmlDocPtr newDoc;
9571 xmlSAXHandlerPtr oldsax = NULL;
9572 int size;
9573 int ret = 0;
9574
9575 if (depth > 40) {
9576 return(XML_ERR_ENTITY_LOOP);
9577 }
9578
9579
Daniel Veillardcda96922001-08-21 10:56:31 +00009580 if (lst != NULL)
9581 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009582 if (string == NULL)
9583 return(-1);
9584
9585 size = xmlStrlen(string);
9586
9587 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9588 if (ctxt == NULL) return(-1);
9589 ctxt->userData = ctxt;
9590 if (sax != NULL) {
9591 oldsax = ctxt->sax;
9592 ctxt->sax = sax;
9593 if (user_data != NULL)
9594 ctxt->userData = user_data;
9595 }
9596 newDoc = xmlNewDoc(BAD_CAST "1.0");
9597 if (newDoc == NULL) {
9598 xmlFreeParserCtxt(ctxt);
9599 return(-1);
9600 }
9601 if (doc != NULL) {
9602 newDoc->intSubset = doc->intSubset;
9603 newDoc->extSubset = doc->extSubset;
9604 }
9605 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9606 if (newDoc->children == NULL) {
9607 if (sax != NULL)
9608 ctxt->sax = oldsax;
9609 xmlFreeParserCtxt(ctxt);
9610 newDoc->intSubset = NULL;
9611 newDoc->extSubset = NULL;
9612 xmlFreeDoc(newDoc);
9613 return(-1);
9614 }
9615 nodePush(ctxt, newDoc->children);
9616 if (doc == NULL) {
9617 ctxt->myDoc = newDoc;
9618 } else {
9619 ctxt->myDoc = doc;
9620 newDoc->children->doc = doc;
9621 }
9622 ctxt->instate = XML_PARSER_CONTENT;
9623 ctxt->depth = depth;
9624
9625 /*
9626 * Doing validity checking on chunk doesn't make sense
9627 */
9628 ctxt->validate = 0;
9629 ctxt->loadsubset = 0;
9630
9631 xmlParseContent(ctxt);
9632
9633 if ((RAW == '<') && (NXT(1) == '/')) {
9634 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9636 ctxt->sax->error(ctxt->userData,
9637 "chunk is not well balanced\n");
9638 ctxt->wellFormed = 0;
9639 ctxt->disableSAX = 1;
9640 } else if (RAW != 0) {
9641 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9643 ctxt->sax->error(ctxt->userData,
9644 "extra content at the end of well balanced chunk\n");
9645 ctxt->wellFormed = 0;
9646 ctxt->disableSAX = 1;
9647 }
9648 if (ctxt->node != newDoc->children) {
9649 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9651 ctxt->sax->error(ctxt->userData,
9652 "chunk is not well balanced\n");
9653 ctxt->wellFormed = 0;
9654 ctxt->disableSAX = 1;
9655 }
9656
9657 if (!ctxt->wellFormed) {
9658 if (ctxt->errNo == 0)
9659 ret = 1;
9660 else
9661 ret = ctxt->errNo;
9662 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009663 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009664 xmlNodePtr cur;
9665
9666 /*
9667 * Return the newly created nodeset after unlinking it from
9668 * they pseudo parent.
9669 */
9670 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009671 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009672 while (cur != NULL) {
9673 cur->parent = NULL;
9674 cur = cur->next;
9675 }
9676 newDoc->children->children = NULL;
9677 }
9678 ret = 0;
9679 }
9680 if (sax != NULL)
9681 ctxt->sax = oldsax;
9682 xmlFreeParserCtxt(ctxt);
9683 newDoc->intSubset = NULL;
9684 newDoc->extSubset = NULL;
9685 xmlFreeDoc(newDoc);
9686
9687 return(ret);
9688}
9689
9690/**
9691 * xmlSAXParseEntity:
9692 * @sax: the SAX handler block
9693 * @filename: the filename
9694 *
9695 * parse an XML external entity out of context and build a tree.
9696 * It use the given SAX function block to handle the parsing callback.
9697 * If sax is NULL, fallback to the default DOM tree building routines.
9698 *
9699 * [78] extParsedEnt ::= TextDecl? content
9700 *
9701 * This correspond to a "Well Balanced" chunk
9702 *
9703 * Returns the resulting document tree
9704 */
9705
9706xmlDocPtr
9707xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9708 xmlDocPtr ret;
9709 xmlParserCtxtPtr ctxt;
9710 char *directory = NULL;
9711
9712 ctxt = xmlCreateFileParserCtxt(filename);
9713 if (ctxt == NULL) {
9714 return(NULL);
9715 }
9716 if (sax != NULL) {
9717 if (ctxt->sax != NULL)
9718 xmlFree(ctxt->sax);
9719 ctxt->sax = sax;
9720 ctxt->userData = NULL;
9721 }
9722
9723 if ((ctxt->directory == NULL) && (directory == NULL))
9724 directory = xmlParserGetDirectory(filename);
9725
9726 xmlParseExtParsedEnt(ctxt);
9727
9728 if (ctxt->wellFormed)
9729 ret = ctxt->myDoc;
9730 else {
9731 ret = NULL;
9732 xmlFreeDoc(ctxt->myDoc);
9733 ctxt->myDoc = NULL;
9734 }
9735 if (sax != NULL)
9736 ctxt->sax = NULL;
9737 xmlFreeParserCtxt(ctxt);
9738
9739 return(ret);
9740}
9741
9742/**
9743 * xmlParseEntity:
9744 * @filename: the filename
9745 *
9746 * parse an XML external entity out of context and build a tree.
9747 *
9748 * [78] extParsedEnt ::= TextDecl? content
9749 *
9750 * This correspond to a "Well Balanced" chunk
9751 *
9752 * Returns the resulting document tree
9753 */
9754
9755xmlDocPtr
9756xmlParseEntity(const char *filename) {
9757 return(xmlSAXParseEntity(NULL, filename));
9758}
9759
9760/**
9761 * xmlCreateEntityParserCtxt:
9762 * @URL: the entity URL
9763 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009764 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009765 *
9766 * Create a parser context for an external entity
9767 * Automatic support for ZLIB/Compress compressed document is provided
9768 * by default if found at compile-time.
9769 *
9770 * Returns the new parser context or NULL
9771 */
9772xmlParserCtxtPtr
9773xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9774 const xmlChar *base) {
9775 xmlParserCtxtPtr ctxt;
9776 xmlParserInputPtr inputStream;
9777 char *directory = NULL;
9778 xmlChar *uri;
9779
9780 ctxt = xmlNewParserCtxt();
9781 if (ctxt == NULL) {
9782 return(NULL);
9783 }
9784
9785 uri = xmlBuildURI(URL, base);
9786
9787 if (uri == NULL) {
9788 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9789 if (inputStream == NULL) {
9790 xmlFreeParserCtxt(ctxt);
9791 return(NULL);
9792 }
9793
9794 inputPush(ctxt, inputStream);
9795
9796 if ((ctxt->directory == NULL) && (directory == NULL))
9797 directory = xmlParserGetDirectory((char *)URL);
9798 if ((ctxt->directory == NULL) && (directory != NULL))
9799 ctxt->directory = directory;
9800 } else {
9801 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9802 if (inputStream == NULL) {
9803 xmlFree(uri);
9804 xmlFreeParserCtxt(ctxt);
9805 return(NULL);
9806 }
9807
9808 inputPush(ctxt, inputStream);
9809
9810 if ((ctxt->directory == NULL) && (directory == NULL))
9811 directory = xmlParserGetDirectory((char *)uri);
9812 if ((ctxt->directory == NULL) && (directory != NULL))
9813 ctxt->directory = directory;
9814 xmlFree(uri);
9815 }
9816
9817 return(ctxt);
9818}
9819
9820/************************************************************************
9821 * *
9822 * Front ends when parsing from a file *
9823 * *
9824 ************************************************************************/
9825
9826/**
9827 * xmlCreateFileParserCtxt:
9828 * @filename: the filename
9829 *
9830 * Create a parser context for a file content.
9831 * Automatic support for ZLIB/Compress compressed document is provided
9832 * by default if found at compile-time.
9833 *
9834 * Returns the new parser context or NULL
9835 */
9836xmlParserCtxtPtr
9837xmlCreateFileParserCtxt(const char *filename)
9838{
9839 xmlParserCtxtPtr ctxt;
9840 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009841 char *directory = NULL;
9842
Owen Taylor3473f882001-02-23 17:55:21 +00009843 ctxt = xmlNewParserCtxt();
9844 if (ctxt == NULL) {
9845 if (xmlDefaultSAXHandler.error != NULL) {
9846 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9847 }
9848 return(NULL);
9849 }
9850
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009851 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009852 if (inputStream == NULL) {
9853 xmlFreeParserCtxt(ctxt);
9854 return(NULL);
9855 }
9856
Owen Taylor3473f882001-02-23 17:55:21 +00009857 inputPush(ctxt, inputStream);
9858 if ((ctxt->directory == NULL) && (directory == NULL))
9859 directory = xmlParserGetDirectory(filename);
9860 if ((ctxt->directory == NULL) && (directory != NULL))
9861 ctxt->directory = directory;
9862
9863 return(ctxt);
9864}
9865
9866/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009867 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009868 * @sax: the SAX handler block
9869 * @filename: the filename
9870 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9871 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009872 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009873 *
9874 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9875 * compressed document is provided by default if found at compile-time.
9876 * It use the given SAX function block to handle the parsing callback.
9877 * If sax is NULL, fallback to the default DOM tree building routines.
9878 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009879 * User data (void *) is stored within the parser context, so it is
9880 * available nearly everywhere in libxml.
9881 *
Owen Taylor3473f882001-02-23 17:55:21 +00009882 * Returns the resulting document tree
9883 */
9884
9885xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009886xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9887 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009888 xmlDocPtr ret;
9889 xmlParserCtxtPtr ctxt;
9890 char *directory = NULL;
9891
Daniel Veillard635ef722001-10-29 11:48:19 +00009892 xmlInitParser();
9893
Owen Taylor3473f882001-02-23 17:55:21 +00009894 ctxt = xmlCreateFileParserCtxt(filename);
9895 if (ctxt == NULL) {
9896 return(NULL);
9897 }
9898 if (sax != NULL) {
9899 if (ctxt->sax != NULL)
9900 xmlFree(ctxt->sax);
9901 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009902 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009903 if (data!=NULL) {
9904 ctxt->_private=data;
9905 }
Owen Taylor3473f882001-02-23 17:55:21 +00009906
9907 if ((ctxt->directory == NULL) && (directory == NULL))
9908 directory = xmlParserGetDirectory(filename);
9909 if ((ctxt->directory == NULL) && (directory != NULL))
9910 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9911
9912 xmlParseDocument(ctxt);
9913
9914 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9915 else {
9916 ret = NULL;
9917 xmlFreeDoc(ctxt->myDoc);
9918 ctxt->myDoc = NULL;
9919 }
9920 if (sax != NULL)
9921 ctxt->sax = NULL;
9922 xmlFreeParserCtxt(ctxt);
9923
9924 return(ret);
9925}
9926
9927/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009928 * xmlSAXParseFile:
9929 * @sax: the SAX handler block
9930 * @filename: the filename
9931 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9932 * documents
9933 *
9934 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9935 * compressed document is provided by default if found at compile-time.
9936 * It use the given SAX function block to handle the parsing callback.
9937 * If sax is NULL, fallback to the default DOM tree building routines.
9938 *
9939 * Returns the resulting document tree
9940 */
9941
9942xmlDocPtr
9943xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9944 int recovery) {
9945 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
9946}
9947
9948/**
Owen Taylor3473f882001-02-23 17:55:21 +00009949 * xmlRecoverDoc:
9950 * @cur: a pointer to an array of xmlChar
9951 *
9952 * parse an XML in-memory document and build a tree.
9953 * In the case the document is not Well Formed, a tree is built anyway
9954 *
9955 * Returns the resulting document tree
9956 */
9957
9958xmlDocPtr
9959xmlRecoverDoc(xmlChar *cur) {
9960 return(xmlSAXParseDoc(NULL, cur, 1));
9961}
9962
9963/**
9964 * xmlParseFile:
9965 * @filename: the filename
9966 *
9967 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9968 * compressed document is provided by default if found at compile-time.
9969 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009970 * Returns the resulting document tree if the file was wellformed,
9971 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009972 */
9973
9974xmlDocPtr
9975xmlParseFile(const char *filename) {
9976 return(xmlSAXParseFile(NULL, filename, 0));
9977}
9978
9979/**
9980 * xmlRecoverFile:
9981 * @filename: the filename
9982 *
9983 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9984 * compressed document is provided by default if found at compile-time.
9985 * In the case the document is not Well Formed, a tree is built anyway
9986 *
9987 * Returns the resulting document tree
9988 */
9989
9990xmlDocPtr
9991xmlRecoverFile(const char *filename) {
9992 return(xmlSAXParseFile(NULL, filename, 1));
9993}
9994
9995
9996/**
9997 * xmlSetupParserForBuffer:
9998 * @ctxt: an XML parser context
9999 * @buffer: a xmlChar * buffer
10000 * @filename: a file name
10001 *
10002 * Setup the parser context to parse a new buffer; Clears any prior
10003 * contents from the parser context. The buffer parameter must not be
10004 * NULL, but the filename parameter can be
10005 */
10006void
10007xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10008 const char* filename)
10009{
10010 xmlParserInputPtr input;
10011
10012 input = xmlNewInputStream(ctxt);
10013 if (input == NULL) {
10014 perror("malloc");
10015 xmlFree(ctxt);
10016 return;
10017 }
10018
10019 xmlClearParserCtxt(ctxt);
10020 if (filename != NULL)
10021 input->filename = xmlMemStrdup(filename);
10022 input->base = buffer;
10023 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010024 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010025 inputPush(ctxt, input);
10026}
10027
10028/**
10029 * xmlSAXUserParseFile:
10030 * @sax: a SAX handler
10031 * @user_data: The user data returned on SAX callbacks
10032 * @filename: a file name
10033 *
10034 * parse an XML file and call the given SAX handler routines.
10035 * Automatic support for ZLIB/Compress compressed document is provided
10036 *
10037 * Returns 0 in case of success or a error number otherwise
10038 */
10039int
10040xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10041 const char *filename) {
10042 int ret = 0;
10043 xmlParserCtxtPtr ctxt;
10044
10045 ctxt = xmlCreateFileParserCtxt(filename);
10046 if (ctxt == NULL) return -1;
10047 if (ctxt->sax != &xmlDefaultSAXHandler)
10048 xmlFree(ctxt->sax);
10049 ctxt->sax = sax;
10050 if (user_data != NULL)
10051 ctxt->userData = user_data;
10052
10053 xmlParseDocument(ctxt);
10054
10055 if (ctxt->wellFormed)
10056 ret = 0;
10057 else {
10058 if (ctxt->errNo != 0)
10059 ret = ctxt->errNo;
10060 else
10061 ret = -1;
10062 }
10063 if (sax != NULL)
10064 ctxt->sax = NULL;
10065 xmlFreeParserCtxt(ctxt);
10066
10067 return ret;
10068}
10069
10070/************************************************************************
10071 * *
10072 * Front ends when parsing from memory *
10073 * *
10074 ************************************************************************/
10075
10076/**
10077 * xmlCreateMemoryParserCtxt:
10078 * @buffer: a pointer to a char array
10079 * @size: the size of the array
10080 *
10081 * Create a parser context for an XML in-memory document.
10082 *
10083 * Returns the new parser context or NULL
10084 */
10085xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010086xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010087 xmlParserCtxtPtr ctxt;
10088 xmlParserInputPtr input;
10089 xmlParserInputBufferPtr buf;
10090
10091 if (buffer == NULL)
10092 return(NULL);
10093 if (size <= 0)
10094 return(NULL);
10095
10096 ctxt = xmlNewParserCtxt();
10097 if (ctxt == NULL)
10098 return(NULL);
10099
10100 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10101 if (buf == NULL) return(NULL);
10102
10103 input = xmlNewInputStream(ctxt);
10104 if (input == NULL) {
10105 xmlFreeParserCtxt(ctxt);
10106 return(NULL);
10107 }
10108
10109 input->filename = NULL;
10110 input->buf = buf;
10111 input->base = input->buf->buffer->content;
10112 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010113 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010114
10115 inputPush(ctxt, input);
10116 return(ctxt);
10117}
10118
10119/**
10120 * xmlSAXParseMemory:
10121 * @sax: the SAX handler block
10122 * @buffer: an pointer to a char array
10123 * @size: the size of the array
10124 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10125 * documents
10126 *
10127 * parse an XML in-memory block and use the given SAX function block
10128 * to handle the parsing callback. If sax is NULL, fallback to the default
10129 * DOM tree building routines.
10130 *
10131 * Returns the resulting document tree
10132 */
10133xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010134xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10135 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010136 xmlDocPtr ret;
10137 xmlParserCtxtPtr ctxt;
10138
10139 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10140 if (ctxt == NULL) return(NULL);
10141 if (sax != NULL) {
10142 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010143 }
10144
10145 xmlParseDocument(ctxt);
10146
10147 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10148 else {
10149 ret = NULL;
10150 xmlFreeDoc(ctxt->myDoc);
10151 ctxt->myDoc = NULL;
10152 }
10153 if (sax != NULL)
10154 ctxt->sax = NULL;
10155 xmlFreeParserCtxt(ctxt);
10156
10157 return(ret);
10158}
10159
10160/**
10161 * xmlParseMemory:
10162 * @buffer: an pointer to a char array
10163 * @size: the size of the array
10164 *
10165 * parse an XML in-memory block and build a tree.
10166 *
10167 * Returns the resulting document tree
10168 */
10169
Daniel Veillard50822cb2001-07-26 20:05:51 +000010170xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010171 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10172}
10173
10174/**
10175 * xmlRecoverMemory:
10176 * @buffer: an pointer to a char array
10177 * @size: the size of the array
10178 *
10179 * parse an XML in-memory block and build a tree.
10180 * In the case the document is not Well Formed, a tree is built anyway
10181 *
10182 * Returns the resulting document tree
10183 */
10184
Daniel Veillard50822cb2001-07-26 20:05:51 +000010185xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010186 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10187}
10188
10189/**
10190 * xmlSAXUserParseMemory:
10191 * @sax: a SAX handler
10192 * @user_data: The user data returned on SAX callbacks
10193 * @buffer: an in-memory XML document input
10194 * @size: the length of the XML document in bytes
10195 *
10196 * A better SAX parsing routine.
10197 * parse an XML in-memory buffer and call the given SAX handler routines.
10198 *
10199 * Returns 0 in case of success or a error number otherwise
10200 */
10201int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010202 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010203 int ret = 0;
10204 xmlParserCtxtPtr ctxt;
10205 xmlSAXHandlerPtr oldsax = NULL;
10206
10207 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10208 if (ctxt == NULL) return -1;
10209 if (sax != NULL) {
10210 oldsax = ctxt->sax;
10211 ctxt->sax = sax;
10212 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010213 if (user_data != NULL)
10214 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010215
10216 xmlParseDocument(ctxt);
10217
10218 if (ctxt->wellFormed)
10219 ret = 0;
10220 else {
10221 if (ctxt->errNo != 0)
10222 ret = ctxt->errNo;
10223 else
10224 ret = -1;
10225 }
10226 if (sax != NULL) {
10227 ctxt->sax = oldsax;
10228 }
10229 xmlFreeParserCtxt(ctxt);
10230
10231 return ret;
10232}
10233
10234/**
10235 * xmlCreateDocParserCtxt:
10236 * @cur: a pointer to an array of xmlChar
10237 *
10238 * Creates a parser context for an XML in-memory document.
10239 *
10240 * Returns the new parser context or NULL
10241 */
10242xmlParserCtxtPtr
10243xmlCreateDocParserCtxt(xmlChar *cur) {
10244 int len;
10245
10246 if (cur == NULL)
10247 return(NULL);
10248 len = xmlStrlen(cur);
10249 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10250}
10251
10252/**
10253 * xmlSAXParseDoc:
10254 * @sax: the SAX handler block
10255 * @cur: a pointer to an array of xmlChar
10256 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10257 * documents
10258 *
10259 * parse an XML in-memory document and build a tree.
10260 * It use the given SAX function block to handle the parsing callback.
10261 * If sax is NULL, fallback to the default DOM tree building routines.
10262 *
10263 * Returns the resulting document tree
10264 */
10265
10266xmlDocPtr
10267xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10268 xmlDocPtr ret;
10269 xmlParserCtxtPtr ctxt;
10270
10271 if (cur == NULL) return(NULL);
10272
10273
10274 ctxt = xmlCreateDocParserCtxt(cur);
10275 if (ctxt == NULL) return(NULL);
10276 if (sax != NULL) {
10277 ctxt->sax = sax;
10278 ctxt->userData = NULL;
10279 }
10280
10281 xmlParseDocument(ctxt);
10282 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10283 else {
10284 ret = NULL;
10285 xmlFreeDoc(ctxt->myDoc);
10286 ctxt->myDoc = NULL;
10287 }
10288 if (sax != NULL)
10289 ctxt->sax = NULL;
10290 xmlFreeParserCtxt(ctxt);
10291
10292 return(ret);
10293}
10294
10295/**
10296 * xmlParseDoc:
10297 * @cur: a pointer to an array of xmlChar
10298 *
10299 * parse an XML in-memory document and build a tree.
10300 *
10301 * Returns the resulting document tree
10302 */
10303
10304xmlDocPtr
10305xmlParseDoc(xmlChar *cur) {
10306 return(xmlSAXParseDoc(NULL, cur, 0));
10307}
10308
Daniel Veillard8107a222002-01-13 14:10:10 +000010309/************************************************************************
10310 * *
10311 * Specific function to keep track of entities references *
10312 * and used by the XSLT debugger *
10313 * *
10314 ************************************************************************/
10315
10316static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10317
10318/**
10319 * xmlAddEntityReference:
10320 * @ent : A valid entity
10321 * @firstNode : A valid first node for children of entity
10322 * @lastNode : A valid last node of children entity
10323 *
10324 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10325 */
10326static void
10327xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10328 xmlNodePtr lastNode)
10329{
10330 if (xmlEntityRefFunc != NULL) {
10331 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10332 }
10333}
10334
10335
10336/**
10337 * xmlSetEntityReferenceFunc:
10338 * @func : A valid function
10339 *
10340 * Set the function to call call back when a xml reference has been made
10341 */
10342void
10343xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10344{
10345 xmlEntityRefFunc = func;
10346}
Owen Taylor3473f882001-02-23 17:55:21 +000010347
10348/************************************************************************
10349 * *
10350 * Miscellaneous *
10351 * *
10352 ************************************************************************/
10353
10354#ifdef LIBXML_XPATH_ENABLED
10355#include <libxml/xpath.h>
10356#endif
10357
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010358extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010359static int xmlParserInitialized = 0;
10360
10361/**
10362 * xmlInitParser:
10363 *
10364 * Initialization function for the XML parser.
10365 * This is not reentrant. Call once before processing in case of
10366 * use in multithreaded programs.
10367 */
10368
10369void
10370xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010371 if (xmlParserInitialized != 0)
10372 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010373
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010374 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10375 (xmlGenericError == NULL))
10376 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010377 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010378 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010379 xmlInitCharEncodingHandlers();
10380 xmlInitializePredefinedEntities();
10381 xmlDefaultSAXHandlerInit();
10382 xmlRegisterDefaultInputCallbacks();
10383 xmlRegisterDefaultOutputCallbacks();
10384#ifdef LIBXML_HTML_ENABLED
10385 htmlInitAutoClose();
10386 htmlDefaultSAXHandlerInit();
10387#endif
10388#ifdef LIBXML_XPATH_ENABLED
10389 xmlXPathInit();
10390#endif
10391 xmlParserInitialized = 1;
10392}
10393
10394/**
10395 * xmlCleanupParser:
10396 *
10397 * Cleanup function for the XML parser. It tries to reclaim all
10398 * parsing related global memory allocated for the parser processing.
10399 * It doesn't deallocate any document related memory. Calling this
10400 * function should not prevent reusing the parser.
10401 */
10402
10403void
10404xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010405 xmlCleanupCharEncodingHandlers();
10406 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010407#ifdef LIBXML_CATALOG_ENABLED
10408 xmlCatalogCleanup();
10409#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010410 xmlCleanupThreads();
10411 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010412}