blob: 6c93e90b564d44dfddfcad517bc857c378e5bdd7 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Daniel Veillard3c5ed912002-01-08 10:36:16 +000035#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000044#include <libxml/threads.h>
45#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/tree.h>
47#include <libxml/parser.h>
48#include <libxml/parserInternals.h>
49#include <libxml/valid.h>
50#include <libxml/entities.h>
51#include <libxml/xmlerror.h>
52#include <libxml/encoding.h>
53#include <libxml/xmlIO.h>
54#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000055#ifdef LIBXML_CATALOG_ENABLED
56#include <libxml/catalog.h>
57#endif
Owen Taylor3473f882001-02-23 17:55:21 +000058
59#ifdef HAVE_CTYPE_H
60#include <ctype.h>
61#endif
62#ifdef HAVE_STDLIB_H
63#include <stdlib.h>
64#endif
65#ifdef HAVE_SYS_STAT_H
66#include <sys/stat.h>
67#endif
68#ifdef HAVE_FCNTL_H
69#include <fcntl.h>
70#endif
71#ifdef HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74#ifdef HAVE_ZLIB_H
75#include <zlib.h>
76#endif
77
78
Daniel Veillard21a0f912001-02-25 19:54:14 +000079#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000080#define XML_PARSER_BUFFER_SIZE 100
81
82/*
Owen Taylor3473f882001-02-23 17:55:21 +000083 * List of XML prefixed PI allowed by W3C specs
84 */
85
Daniel Veillardb44025c2001-10-11 22:55:55 +000086static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000087 "xml-stylesheet",
88 NULL
89};
90
91/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
92void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
93xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
94 const xmlChar **str);
95
Daniel Veillard257d9102001-05-08 10:41:44 +000096static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000097xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
98 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000099 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000100 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000101
Daniel Veillard8107a222002-01-13 14:10:10 +0000102static void
103xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
104 xmlNodePtr lastNode);
105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/************************************************************************
107 * *
108 * Parser stacks related functions and macros *
109 * *
110 ************************************************************************/
111
112xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
113 const xmlChar ** str);
114
115/*
116 * Generic function for accessing stacks in the Parser Context
117 */
118
119#define PUSH_AND_POP(scope, type, name) \
120scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
121 if (ctxt->name##Nr >= ctxt->name##Max) { \
122 ctxt->name##Max *= 2; \
123 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
124 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
125 if (ctxt->name##Tab == NULL) { \
126 xmlGenericError(xmlGenericErrorContext, \
127 "realloc failed !\n"); \
128 return(0); \
129 } \
130 } \
131 ctxt->name##Tab[ctxt->name##Nr] = value; \
132 ctxt->name = value; \
133 return(ctxt->name##Nr++); \
134} \
135scope type name##Pop(xmlParserCtxtPtr ctxt) { \
136 type ret; \
137 if (ctxt->name##Nr <= 0) return(0); \
138 ctxt->name##Nr--; \
139 if (ctxt->name##Nr > 0) \
140 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
141 else \
142 ctxt->name = NULL; \
143 ret = ctxt->name##Tab[ctxt->name##Nr]; \
144 ctxt->name##Tab[ctxt->name##Nr] = 0; \
145 return(ret); \
146} \
147
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000148/**
149 * inputPop:
150 * @ctxt: an XML parser context
151 *
152 * Pops the top parser input from the input stack
153 *
154 * Returns the input just removed
155 */
156/**
157 * inputPush:
158 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000159 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000160 *
161 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000162 *
163 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000164 */
165/**
166 * namePop:
167 * @ctxt: an XML parser context
168 *
169 * Pops the top element name from the name stack
170 *
171 * Returns the name just removed
172 */
173/**
174 * namePush:
175 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000176 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000177 *
178 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000179 *
180 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000181 */
182/**
183 * nodePop:
184 * @ctxt: an XML parser context
185 *
186 * Pops the top element node from the node stack
187 *
188 * Returns the node just removed
189 */
190/**
191 * nodePush:
192 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000193 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000194 *
195 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000196 *
197 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000198 */
Owen Taylor3473f882001-02-23 17:55:21 +0000199/*
200 * Those macros actually generate the functions
201 */
202PUSH_AND_POP(extern, xmlParserInputPtr, input)
203PUSH_AND_POP(extern, xmlNodePtr, node)
204PUSH_AND_POP(extern, xmlChar*, name)
205
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000206static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000207 if (ctxt->spaceNr >= ctxt->spaceMax) {
208 ctxt->spaceMax *= 2;
209 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
210 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
211 if (ctxt->spaceTab == NULL) {
212 xmlGenericError(xmlGenericErrorContext,
213 "realloc failed !\n");
214 return(0);
215 }
216 }
217 ctxt->spaceTab[ctxt->spaceNr] = val;
218 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
219 return(ctxt->spaceNr++);
220}
221
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000222static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000223 int ret;
224 if (ctxt->spaceNr <= 0) return(0);
225 ctxt->spaceNr--;
226 if (ctxt->spaceNr > 0)
227 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
228 else
229 ctxt->space = NULL;
230 ret = ctxt->spaceTab[ctxt->spaceNr];
231 ctxt->spaceTab[ctxt->spaceNr] = -1;
232 return(ret);
233}
234
235/*
236 * Macros for accessing the content. Those should be used only by the parser,
237 * and not exported.
238 *
239 * Dirty macros, i.e. one often need to make assumption on the context to
240 * use them
241 *
242 * CUR_PTR return the current pointer to the xmlChar to be parsed.
243 * To be used with extreme caution since operations consuming
244 * characters may move the input buffer to a different location !
245 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
246 * This should be used internally by the parser
247 * only to compare to ASCII values otherwise it would break when
248 * running with UTF-8 encoding.
249 * RAW same as CUR but in the input buffer, bypass any token
250 * extraction that may have been done
251 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
252 * to compare on ASCII based substring.
253 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
254 * strings within the parser.
255 *
256 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
257 *
258 * NEXT Skip to the next character, this does the proper decoding
259 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000260 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000261 * CUR_CHAR(l) returns the current unicode character (int), set l
262 * to the number of xmlChars used for the encoding [0-5].
263 * CUR_SCHAR same but operate on a string instead of the context
264 * COPY_BUF copy the current unicode char to the target buffer, increment
265 * the index
266 * GROW, SHRINK handling of input buffers
267 */
268
269#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
270#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
271#define NXT(val) ctxt->input->cur[(val)]
272#define CUR_PTR ctxt->input->cur
273
274#define SKIP(val) do { \
275 ctxt->nbChars += (val),ctxt->input->cur += (val); \
276 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000277 if ((*ctxt->input->cur == 0) && \
278 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
279 xmlPopInput(ctxt); \
280 } while (0)
281
Daniel Veillard48b2f892001-02-25 16:11:03 +0000282#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000283 xmlParserInputShrink(ctxt->input); \
284 if ((*ctxt->input->cur == 0) && \
285 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
286 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000287 }
Owen Taylor3473f882001-02-23 17:55:21 +0000288
Daniel Veillard48b2f892001-02-25 16:11:03 +0000289#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000290 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
291 if ((*ctxt->input->cur == 0) && \
292 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
293 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000294 }
Owen Taylor3473f882001-02-23 17:55:21 +0000295
296#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
297
298#define NEXT xmlNextChar(ctxt)
299
Daniel Veillard21a0f912001-02-25 19:54:14 +0000300#define NEXT1 { \
301 ctxt->input->cur++; \
302 ctxt->nbChars++; \
303 if (*ctxt->input->cur == 0) \
304 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
305 }
306
Owen Taylor3473f882001-02-23 17:55:21 +0000307#define NEXTL(l) do { \
308 if (*(ctxt->input->cur) == '\n') { \
309 ctxt->input->line++; ctxt->input->col = 1; \
310 } else ctxt->input->col++; \
311 ctxt->token = 0; ctxt->input->cur += l; \
312 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000313 } while (0)
314
315#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
316#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
317
318#define COPY_BUF(l,b,i,v) \
319 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000320 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000321
322/**
323 * xmlSkipBlankChars:
324 * @ctxt: the XML parser context
325 *
326 * skip all blanks character found at that point in the input streams.
327 * It pops up finished entities in the process if allowable at that point.
328 *
329 * Returns the number of space chars skipped
330 */
331
332int
333xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000334 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000335
Daniel Veillard02141ea2001-04-30 11:46:40 +0000336 if (ctxt->token != 0) {
337 if (!IS_BLANK(ctxt->token))
338 return(0);
339 ctxt->token = 0;
340 res++;
341 }
Owen Taylor3473f882001-02-23 17:55:21 +0000342 /*
343 * It's Okay to use CUR/NEXT here since all the blanks are on
344 * the ASCII range.
345 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000346 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
347 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000348 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000349 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000350 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000351 cur = ctxt->input->cur;
352 while (IS_BLANK(*cur)) {
353 if (*cur == '\n') {
354 ctxt->input->line++; ctxt->input->col = 1;
355 }
356 cur++;
357 res++;
358 if (*cur == 0) {
359 ctxt->input->cur = cur;
360 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
361 cur = ctxt->input->cur;
362 }
363 }
364 ctxt->input->cur = cur;
365 } else {
366 int cur;
367 do {
368 cur = CUR;
369 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
370 NEXT;
371 cur = CUR;
372 res++;
373 }
374 while ((cur == 0) && (ctxt->inputNr > 1) &&
375 (ctxt->instate != XML_PARSER_COMMENT)) {
376 xmlPopInput(ctxt);
377 cur = CUR;
378 }
379 /*
380 * Need to handle support of entities branching here
381 */
382 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
383 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
384 }
Owen Taylor3473f882001-02-23 17:55:21 +0000385 return(res);
386}
387
388/************************************************************************
389 * *
390 * Commodity functions to handle entities *
391 * *
392 ************************************************************************/
393
394/**
395 * xmlPopInput:
396 * @ctxt: an XML parser context
397 *
398 * xmlPopInput: the current input pointed by ctxt->input came to an end
399 * pop it and return the next char.
400 *
401 * Returns the current xmlChar in the parser context
402 */
403xmlChar
404xmlPopInput(xmlParserCtxtPtr ctxt) {
405 if (ctxt->inputNr == 1) return(0); /* End of main Input */
406 if (xmlParserDebugEntities)
407 xmlGenericError(xmlGenericErrorContext,
408 "Popping input %d\n", ctxt->inputNr);
409 xmlFreeInputStream(inputPop(ctxt));
410 if ((*ctxt->input->cur == 0) &&
411 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
412 return(xmlPopInput(ctxt));
413 return(CUR);
414}
415
416/**
417 * xmlPushInput:
418 * @ctxt: an XML parser context
419 * @input: an XML parser input fragment (entity, XML fragment ...).
420 *
421 * xmlPushInput: switch to a new input stream which is stacked on top
422 * of the previous one(s).
423 */
424void
425xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
426 if (input == NULL) return;
427
428 if (xmlParserDebugEntities) {
429 if ((ctxt->input != NULL) && (ctxt->input->filename))
430 xmlGenericError(xmlGenericErrorContext,
431 "%s(%d): ", ctxt->input->filename,
432 ctxt->input->line);
433 xmlGenericError(xmlGenericErrorContext,
434 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
435 }
436 inputPush(ctxt, input);
437 GROW;
438}
439
440/**
441 * xmlParseCharRef:
442 * @ctxt: an XML parser context
443 *
444 * parse Reference declarations
445 *
446 * [66] CharRef ::= '&#' [0-9]+ ';' |
447 * '&#x' [0-9a-fA-F]+ ';'
448 *
449 * [ WFC: Legal Character ]
450 * Characters referred to using character references must match the
451 * production for Char.
452 *
453 * Returns the value parsed (as an int), 0 in case of error
454 */
455int
456xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000457 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000458 int count = 0;
459
460 if (ctxt->token != 0) {
461 val = ctxt->token;
462 ctxt->token = 0;
463 return(val);
464 }
465 /*
466 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
467 */
468 if ((RAW == '&') && (NXT(1) == '#') &&
469 (NXT(2) == 'x')) {
470 SKIP(3);
471 GROW;
472 while (RAW != ';') { /* loop blocked by count */
473 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
474 val = val * 16 + (CUR - '0');
475 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
476 val = val * 16 + (CUR - 'a') + 10;
477 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
478 val = val * 16 + (CUR - 'A') + 10;
479 else {
480 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
482 ctxt->sax->error(ctxt->userData,
483 "xmlParseCharRef: invalid hexadecimal value\n");
484 ctxt->wellFormed = 0;
485 ctxt->disableSAX = 1;
486 val = 0;
487 break;
488 }
489 NEXT;
490 count++;
491 }
492 if (RAW == ';') {
493 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
494 ctxt->nbChars ++;
495 ctxt->input->cur++;
496 }
497 } else if ((RAW == '&') && (NXT(1) == '#')) {
498 SKIP(2);
499 GROW;
500 while (RAW != ';') { /* loop blocked by count */
501 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
502 val = val * 10 + (CUR - '0');
503 else {
504 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
506 ctxt->sax->error(ctxt->userData,
507 "xmlParseCharRef: invalid decimal value\n");
508 ctxt->wellFormed = 0;
509 ctxt->disableSAX = 1;
510 val = 0;
511 break;
512 }
513 NEXT;
514 count++;
515 }
516 if (RAW == ';') {
517 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
518 ctxt->nbChars ++;
519 ctxt->input->cur++;
520 }
521 } else {
522 ctxt->errNo = XML_ERR_INVALID_CHARREF;
523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
524 ctxt->sax->error(ctxt->userData,
525 "xmlParseCharRef: invalid value\n");
526 ctxt->wellFormed = 0;
527 ctxt->disableSAX = 1;
528 }
529
530 /*
531 * [ WFC: Legal Character ]
532 * Characters referred to using character references must match the
533 * production for Char.
534 */
535 if (IS_CHAR(val)) {
536 return(val);
537 } else {
538 ctxt->errNo = XML_ERR_INVALID_CHAR;
539 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000540 ctxt->sax->error(ctxt->userData,
541 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000542 val);
543 ctxt->wellFormed = 0;
544 ctxt->disableSAX = 1;
545 }
546 return(0);
547}
548
549/**
550 * xmlParseStringCharRef:
551 * @ctxt: an XML parser context
552 * @str: a pointer to an index in the string
553 *
554 * parse Reference declarations, variant parsing from a string rather
555 * than an an input flow.
556 *
557 * [66] CharRef ::= '&#' [0-9]+ ';' |
558 * '&#x' [0-9a-fA-F]+ ';'
559 *
560 * [ WFC: Legal Character ]
561 * Characters referred to using character references must match the
562 * production for Char.
563 *
564 * Returns the value parsed (as an int), 0 in case of error, str will be
565 * updated to the current value of the index
566 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000567static int
Owen Taylor3473f882001-02-23 17:55:21 +0000568xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
569 const xmlChar *ptr;
570 xmlChar cur;
571 int val = 0;
572
573 if ((str == NULL) || (*str == NULL)) return(0);
574 ptr = *str;
575 cur = *ptr;
576 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
577 ptr += 3;
578 cur = *ptr;
579 while (cur != ';') { /* Non input consuming loop */
580 if ((cur >= '0') && (cur <= '9'))
581 val = val * 16 + (cur - '0');
582 else if ((cur >= 'a') && (cur <= 'f'))
583 val = val * 16 + (cur - 'a') + 10;
584 else if ((cur >= 'A') && (cur <= 'F'))
585 val = val * 16 + (cur - 'A') + 10;
586 else {
587 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
589 ctxt->sax->error(ctxt->userData,
590 "xmlParseStringCharRef: invalid hexadecimal value\n");
591 ctxt->wellFormed = 0;
592 ctxt->disableSAX = 1;
593 val = 0;
594 break;
595 }
596 ptr++;
597 cur = *ptr;
598 }
599 if (cur == ';')
600 ptr++;
601 } else if ((cur == '&') && (ptr[1] == '#')){
602 ptr += 2;
603 cur = *ptr;
604 while (cur != ';') { /* Non input consuming loops */
605 if ((cur >= '0') && (cur <= '9'))
606 val = val * 10 + (cur - '0');
607 else {
608 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
610 ctxt->sax->error(ctxt->userData,
611 "xmlParseStringCharRef: invalid decimal value\n");
612 ctxt->wellFormed = 0;
613 ctxt->disableSAX = 1;
614 val = 0;
615 break;
616 }
617 ptr++;
618 cur = *ptr;
619 }
620 if (cur == ';')
621 ptr++;
622 } else {
623 ctxt->errNo = XML_ERR_INVALID_CHARREF;
624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
625 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000626 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000627 ctxt->wellFormed = 0;
628 ctxt->disableSAX = 1;
629 return(0);
630 }
631 *str = ptr;
632
633 /*
634 * [ WFC: Legal Character ]
635 * Characters referred to using character references must match the
636 * production for Char.
637 */
638 if (IS_CHAR(val)) {
639 return(val);
640 } else {
641 ctxt->errNo = XML_ERR_INVALID_CHAR;
642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
643 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000644 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000645 ctxt->wellFormed = 0;
646 ctxt->disableSAX = 1;
647 }
648 return(0);
649}
650
651/**
652 * xmlParserHandlePEReference:
653 * @ctxt: the parser context
654 *
655 * [69] PEReference ::= '%' Name ';'
656 *
657 * [ WFC: No Recursion ]
658 * A parsed entity must not contain a recursive
659 * reference to itself, either directly or indirectly.
660 *
661 * [ WFC: Entity Declared ]
662 * In a document without any DTD, a document with only an internal DTD
663 * subset which contains no parameter entity references, or a document
664 * with "standalone='yes'", ... ... The declaration of a parameter
665 * entity must precede any reference to it...
666 *
667 * [ VC: Entity Declared ]
668 * In a document with an external subset or external parameter entities
669 * with "standalone='no'", ... ... The declaration of a parameter entity
670 * must precede any reference to it...
671 *
672 * [ WFC: In DTD ]
673 * Parameter-entity references may only appear in the DTD.
674 * NOTE: misleading but this is handled.
675 *
676 * A PEReference may have been detected in the current input stream
677 * the handling is done accordingly to
678 * http://www.w3.org/TR/REC-xml#entproc
679 * i.e.
680 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000681 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000682 */
683void
684xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
685 xmlChar *name;
686 xmlEntityPtr entity = NULL;
687 xmlParserInputPtr input;
688
689 if (ctxt->token != 0) {
690 return;
691 }
692 if (RAW != '%') return;
693 switch(ctxt->instate) {
694 case XML_PARSER_CDATA_SECTION:
695 return;
696 case XML_PARSER_COMMENT:
697 return;
698 case XML_PARSER_START_TAG:
699 return;
700 case XML_PARSER_END_TAG:
701 return;
702 case XML_PARSER_EOF:
703 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
705 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
706 ctxt->wellFormed = 0;
707 ctxt->disableSAX = 1;
708 return;
709 case XML_PARSER_PROLOG:
710 case XML_PARSER_START:
711 case XML_PARSER_MISC:
712 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
714 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
715 ctxt->wellFormed = 0;
716 ctxt->disableSAX = 1;
717 return;
718 case XML_PARSER_ENTITY_DECL:
719 case XML_PARSER_CONTENT:
720 case XML_PARSER_ATTRIBUTE_VALUE:
721 case XML_PARSER_PI:
722 case XML_PARSER_SYSTEM_LITERAL:
723 /* we just ignore it there */
724 return;
725 case XML_PARSER_EPILOG:
726 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
728 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
729 ctxt->wellFormed = 0;
730 ctxt->disableSAX = 1;
731 return;
732 case XML_PARSER_ENTITY_VALUE:
733 /*
734 * NOTE: in the case of entity values, we don't do the
735 * substitution here since we need the literal
736 * entity value to be able to save the internal
737 * subset of the document.
738 * This will be handled by xmlStringDecodeEntities
739 */
740 return;
741 case XML_PARSER_DTD:
742 /*
743 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
744 * In the internal DTD subset, parameter-entity references
745 * can occur only where markup declarations can occur, not
746 * within markup declarations.
747 * In that case this is handled in xmlParseMarkupDecl
748 */
749 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
750 return;
751 break;
752 case XML_PARSER_IGNORE:
753 return;
754 }
755
756 NEXT;
757 name = xmlParseName(ctxt);
758 if (xmlParserDebugEntities)
759 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000760 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000761 if (name == NULL) {
762 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000764 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000765 ctxt->wellFormed = 0;
766 ctxt->disableSAX = 1;
767 } else {
768 if (RAW == ';') {
769 NEXT;
770 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
771 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
772 if (entity == NULL) {
773
774 /*
775 * [ WFC: Entity Declared ]
776 * In a document without any DTD, a document with only an
777 * internal DTD subset which contains no parameter entity
778 * references, or a document with "standalone='yes'", ...
779 * ... The declaration of a parameter entity must precede
780 * any reference to it...
781 */
782 if ((ctxt->standalone == 1) ||
783 ((ctxt->hasExternalSubset == 0) &&
784 (ctxt->hasPErefs == 0))) {
785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
786 ctxt->sax->error(ctxt->userData,
787 "PEReference: %%%s; not found\n", name);
788 ctxt->wellFormed = 0;
789 ctxt->disableSAX = 1;
790 } else {
791 /*
792 * [ VC: Entity Declared ]
793 * In a document with an external subset or external
794 * parameter entities with "standalone='no'", ...
795 * ... The declaration of a parameter entity must precede
796 * any reference to it...
797 */
798 if ((!ctxt->disableSAX) &&
799 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
800 ctxt->vctxt.error(ctxt->vctxt.userData,
801 "PEReference: %%%s; not found\n", name);
802 } else if ((!ctxt->disableSAX) &&
803 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
804 ctxt->sax->warning(ctxt->userData,
805 "PEReference: %%%s; not found\n", name);
806 ctxt->valid = 0;
807 }
808 } else {
809 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
810 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000811 xmlChar start[4];
812 xmlCharEncoding enc;
813
Owen Taylor3473f882001-02-23 17:55:21 +0000814 /*
815 * handle the extra spaces added before and after
816 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000817 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000818 */
819 input = xmlNewEntityInputStream(ctxt, entity);
820 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000821
822 /*
823 * Get the 4 first bytes and decode the charset
824 * if enc != XML_CHAR_ENCODING_NONE
825 * plug some encoding conversion routines.
826 */
827 GROW
828 start[0] = RAW;
829 start[1] = NXT(1);
830 start[2] = NXT(2);
831 start[3] = NXT(3);
832 enc = xmlDetectCharEncoding(start, 4);
833 if (enc != XML_CHAR_ENCODING_NONE) {
834 xmlSwitchEncoding(ctxt, enc);
835 }
836
Owen Taylor3473f882001-02-23 17:55:21 +0000837 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
838 (RAW == '<') && (NXT(1) == '?') &&
839 (NXT(2) == 'x') && (NXT(3) == 'm') &&
840 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
841 xmlParseTextDecl(ctxt);
842 }
843 if (ctxt->token == 0)
844 ctxt->token = ' ';
845 } else {
846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
847 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000848 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000849 name);
850 ctxt->wellFormed = 0;
851 ctxt->disableSAX = 1;
852 }
853 }
854 } else {
855 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
857 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000858 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000859 ctxt->wellFormed = 0;
860 ctxt->disableSAX = 1;
861 }
862 xmlFree(name);
863 }
864}
865
866/*
867 * Macro used to grow the current buffer.
868 */
869#define growBuffer(buffer) { \
870 buffer##_size *= 2; \
871 buffer = (xmlChar *) \
872 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
873 if (buffer == NULL) { \
874 perror("realloc failed"); \
875 return(NULL); \
876 } \
877}
878
879/**
880 * xmlStringDecodeEntities:
881 * @ctxt: the parser context
882 * @str: the input string
883 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
884 * @end: an end marker xmlChar, 0 if none
885 * @end2: an end marker xmlChar, 0 if none
886 * @end3: an end marker xmlChar, 0 if none
887 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000888 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000889 *
890 * [67] Reference ::= EntityRef | CharRef
891 *
892 * [69] PEReference ::= '%' Name ';'
893 *
894 * Returns A newly allocated string with the substitution done. The caller
895 * must deallocate it !
896 */
897xmlChar *
898xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
899 xmlChar end, xmlChar end2, xmlChar end3) {
900 xmlChar *buffer = NULL;
901 int buffer_size = 0;
902
903 xmlChar *current = NULL;
904 xmlEntityPtr ent;
905 int c,l;
906 int nbchars = 0;
907
908 if (str == NULL)
909 return(NULL);
910
911 if (ctxt->depth > 40) {
912 ctxt->errNo = XML_ERR_ENTITY_LOOP;
913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
914 ctxt->sax->error(ctxt->userData,
915 "Detected entity reference loop\n");
916 ctxt->wellFormed = 0;
917 ctxt->disableSAX = 1;
918 return(NULL);
919 }
920
921 /*
922 * allocate a translation buffer.
923 */
924 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
925 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
926 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000927 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000928 return(NULL);
929 }
930
931 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000932 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000933 * we are operating on already parsed values.
934 */
935 c = CUR_SCHAR(str, l);
936 while ((c != 0) && (c != end) && /* non input consuming loop */
937 (c != end2) && (c != end3)) {
938
939 if (c == 0) break;
940 if ((c == '&') && (str[1] == '#')) {
941 int val = xmlParseStringCharRef(ctxt, &str);
942 if (val != 0) {
943 COPY_BUF(0,buffer,nbchars,val);
944 }
945 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
946 if (xmlParserDebugEntities)
947 xmlGenericError(xmlGenericErrorContext,
948 "String decoding Entity Reference: %.30s\n",
949 str);
950 ent = xmlParseStringEntityRef(ctxt, &str);
951 if ((ent != NULL) &&
952 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
953 if (ent->content != NULL) {
954 COPY_BUF(0,buffer,nbchars,ent->content[0]);
955 } else {
956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
957 ctxt->sax->error(ctxt->userData,
958 "internal error entity has no content\n");
959 }
960 } else if ((ent != NULL) && (ent->content != NULL)) {
961 xmlChar *rep;
962
963 ctxt->depth++;
964 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
965 0, 0, 0);
966 ctxt->depth--;
967 if (rep != NULL) {
968 current = rep;
969 while (*current != 0) { /* non input consuming loop */
970 buffer[nbchars++] = *current++;
971 if (nbchars >
972 buffer_size - XML_PARSER_BUFFER_SIZE) {
973 growBuffer(buffer);
974 }
975 }
976 xmlFree(rep);
977 }
978 } else if (ent != NULL) {
979 int i = xmlStrlen(ent->name);
980 const xmlChar *cur = ent->name;
981
982 buffer[nbchars++] = '&';
983 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
984 growBuffer(buffer);
985 }
986 for (;i > 0;i--)
987 buffer[nbchars++] = *cur++;
988 buffer[nbchars++] = ';';
989 }
990 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
991 if (xmlParserDebugEntities)
992 xmlGenericError(xmlGenericErrorContext,
993 "String decoding PE Reference: %.30s\n", str);
994 ent = xmlParseStringPEReference(ctxt, &str);
995 if (ent != NULL) {
996 xmlChar *rep;
997
998 ctxt->depth++;
999 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1000 0, 0, 0);
1001 ctxt->depth--;
1002 if (rep != NULL) {
1003 current = rep;
1004 while (*current != 0) { /* non input consuming loop */
1005 buffer[nbchars++] = *current++;
1006 if (nbchars >
1007 buffer_size - XML_PARSER_BUFFER_SIZE) {
1008 growBuffer(buffer);
1009 }
1010 }
1011 xmlFree(rep);
1012 }
1013 }
1014 } else {
1015 COPY_BUF(l,buffer,nbchars,c);
1016 str += l;
1017 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1018 growBuffer(buffer);
1019 }
1020 }
1021 c = CUR_SCHAR(str, l);
1022 }
1023 buffer[nbchars++] = 0;
1024 return(buffer);
1025}
1026
1027
1028/************************************************************************
1029 * *
1030 * Commodity functions to handle xmlChars *
1031 * *
1032 ************************************************************************/
1033
1034/**
1035 * xmlStrndup:
1036 * @cur: the input xmlChar *
1037 * @len: the len of @cur
1038 *
1039 * a strndup for array of xmlChar's
1040 *
1041 * Returns a new xmlChar * or NULL
1042 */
1043xmlChar *
1044xmlStrndup(const xmlChar *cur, int len) {
1045 xmlChar *ret;
1046
1047 if ((cur == NULL) || (len < 0)) return(NULL);
1048 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1049 if (ret == NULL) {
1050 xmlGenericError(xmlGenericErrorContext,
1051 "malloc of %ld byte failed\n",
1052 (len + 1) * (long)sizeof(xmlChar));
1053 return(NULL);
1054 }
1055 memcpy(ret, cur, len * sizeof(xmlChar));
1056 ret[len] = 0;
1057 return(ret);
1058}
1059
1060/**
1061 * xmlStrdup:
1062 * @cur: the input xmlChar *
1063 *
1064 * a strdup for array of xmlChar's. Since they are supposed to be
1065 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1066 * a termination mark of '0'.
1067 *
1068 * Returns a new xmlChar * or NULL
1069 */
1070xmlChar *
1071xmlStrdup(const xmlChar *cur) {
1072 const xmlChar *p = cur;
1073
1074 if (cur == NULL) return(NULL);
1075 while (*p != 0) p++; /* non input consuming */
1076 return(xmlStrndup(cur, p - cur));
1077}
1078
1079/**
1080 * xmlCharStrndup:
1081 * @cur: the input char *
1082 * @len: the len of @cur
1083 *
1084 * a strndup for char's to xmlChar's
1085 *
1086 * Returns a new xmlChar * or NULL
1087 */
1088
1089xmlChar *
1090xmlCharStrndup(const char *cur, int len) {
1091 int i;
1092 xmlChar *ret;
1093
1094 if ((cur == NULL) || (len < 0)) return(NULL);
1095 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1096 if (ret == NULL) {
1097 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1098 (len + 1) * (long)sizeof(xmlChar));
1099 return(NULL);
1100 }
1101 for (i = 0;i < len;i++)
1102 ret[i] = (xmlChar) cur[i];
1103 ret[len] = 0;
1104 return(ret);
1105}
1106
1107/**
1108 * xmlCharStrdup:
1109 * @cur: the input char *
1110 * @len: the len of @cur
1111 *
1112 * a strdup for char's to xmlChar's
1113 *
1114 * Returns a new xmlChar * or NULL
1115 */
1116
1117xmlChar *
1118xmlCharStrdup(const char *cur) {
1119 const char *p = cur;
1120
1121 if (cur == NULL) return(NULL);
1122 while (*p != '\0') p++; /* non input consuming */
1123 return(xmlCharStrndup(cur, p - cur));
1124}
1125
1126/**
1127 * xmlStrcmp:
1128 * @str1: the first xmlChar *
1129 * @str2: the second xmlChar *
1130 *
1131 * a strcmp for xmlChar's
1132 *
1133 * Returns the integer result of the comparison
1134 */
1135
1136int
1137xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1138 register int tmp;
1139
1140 if (str1 == str2) return(0);
1141 if (str1 == NULL) return(-1);
1142 if (str2 == NULL) return(1);
1143 do {
1144 tmp = *str1++ - *str2;
1145 if (tmp != 0) return(tmp);
1146 } while (*str2++ != 0);
1147 return 0;
1148}
1149
1150/**
1151 * xmlStrEqual:
1152 * @str1: the first xmlChar *
1153 * @str2: the second xmlChar *
1154 *
1155 * Check if both string are equal of have same content
1156 * Should be a bit more readable and faster than xmlStrEqual()
1157 *
1158 * Returns 1 if they are equal, 0 if they are different
1159 */
1160
1161int
1162xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1163 if (str1 == str2) return(1);
1164 if (str1 == NULL) return(0);
1165 if (str2 == NULL) return(0);
1166 do {
1167 if (*str1++ != *str2) return(0);
1168 } while (*str2++);
1169 return(1);
1170}
1171
1172/**
1173 * xmlStrncmp:
1174 * @str1: the first xmlChar *
1175 * @str2: the second xmlChar *
1176 * @len: the max comparison length
1177 *
1178 * a strncmp for xmlChar's
1179 *
1180 * Returns the integer result of the comparison
1181 */
1182
1183int
1184xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1185 register int tmp;
1186
1187 if (len <= 0) return(0);
1188 if (str1 == str2) return(0);
1189 if (str1 == NULL) return(-1);
1190 if (str2 == NULL) return(1);
1191 do {
1192 tmp = *str1++ - *str2;
1193 if (tmp != 0 || --len == 0) return(tmp);
1194 } while (*str2++ != 0);
1195 return 0;
1196}
1197
Daniel Veillardb44025c2001-10-11 22:55:55 +00001198static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001199 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1200 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1201 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1202 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1203 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1204 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1205 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1206 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1207 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1208 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1209 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1210 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1211 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1212 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1213 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1214 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1215 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1216 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1217 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1218 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1219 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1220 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1221 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1222 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1223 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1224 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1225 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1226 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1227 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1228 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1229 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1230 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1231};
1232
1233/**
1234 * xmlStrcasecmp:
1235 * @str1: the first xmlChar *
1236 * @str2: the second xmlChar *
1237 *
1238 * a strcasecmp for xmlChar's
1239 *
1240 * Returns the integer result of the comparison
1241 */
1242
1243int
1244xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1245 register int tmp;
1246
1247 if (str1 == str2) return(0);
1248 if (str1 == NULL) return(-1);
1249 if (str2 == NULL) return(1);
1250 do {
1251 tmp = casemap[*str1++] - casemap[*str2];
1252 if (tmp != 0) return(tmp);
1253 } while (*str2++ != 0);
1254 return 0;
1255}
1256
1257/**
1258 * xmlStrncasecmp:
1259 * @str1: the first xmlChar *
1260 * @str2: the second xmlChar *
1261 * @len: the max comparison length
1262 *
1263 * a strncasecmp for xmlChar's
1264 *
1265 * Returns the integer result of the comparison
1266 */
1267
1268int
1269xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1270 register int tmp;
1271
1272 if (len <= 0) return(0);
1273 if (str1 == str2) return(0);
1274 if (str1 == NULL) return(-1);
1275 if (str2 == NULL) return(1);
1276 do {
1277 tmp = casemap[*str1++] - casemap[*str2];
1278 if (tmp != 0 || --len == 0) return(tmp);
1279 } while (*str2++ != 0);
1280 return 0;
1281}
1282
1283/**
1284 * xmlStrchr:
1285 * @str: the xmlChar * array
1286 * @val: the xmlChar to search
1287 *
1288 * a strchr for xmlChar's
1289 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001290 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001291 */
1292
1293const xmlChar *
1294xmlStrchr(const xmlChar *str, xmlChar val) {
1295 if (str == NULL) return(NULL);
1296 while (*str != 0) { /* non input consuming */
1297 if (*str == val) return((xmlChar *) str);
1298 str++;
1299 }
1300 return(NULL);
1301}
1302
1303/**
1304 * xmlStrstr:
1305 * @str: the xmlChar * array (haystack)
1306 * @val: the xmlChar to search (needle)
1307 *
1308 * a strstr for xmlChar's
1309 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001310 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001311 */
1312
1313const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001314xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001315 int n;
1316
1317 if (str == NULL) return(NULL);
1318 if (val == NULL) return(NULL);
1319 n = xmlStrlen(val);
1320
1321 if (n == 0) return(str);
1322 while (*str != 0) { /* non input consuming */
1323 if (*str == *val) {
1324 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1325 }
1326 str++;
1327 }
1328 return(NULL);
1329}
1330
1331/**
1332 * xmlStrcasestr:
1333 * @str: the xmlChar * array (haystack)
1334 * @val: the xmlChar to search (needle)
1335 *
1336 * a case-ignoring strstr for xmlChar's
1337 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001338 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001339 */
1340
1341const xmlChar *
1342xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1343 int n;
1344
1345 if (str == NULL) return(NULL);
1346 if (val == NULL) return(NULL);
1347 n = xmlStrlen(val);
1348
1349 if (n == 0) return(str);
1350 while (*str != 0) { /* non input consuming */
1351 if (casemap[*str] == casemap[*val])
1352 if (!xmlStrncasecmp(str, val, n)) return(str);
1353 str++;
1354 }
1355 return(NULL);
1356}
1357
1358/**
1359 * xmlStrsub:
1360 * @str: the xmlChar * array (haystack)
1361 * @start: the index of the first char (zero based)
1362 * @len: the length of the substring
1363 *
1364 * Extract a substring of a given string
1365 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001366 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001367 */
1368
1369xmlChar *
1370xmlStrsub(const xmlChar *str, int start, int len) {
1371 int i;
1372
1373 if (str == NULL) return(NULL);
1374 if (start < 0) return(NULL);
1375 if (len < 0) return(NULL);
1376
1377 for (i = 0;i < start;i++) {
1378 if (*str == 0) return(NULL);
1379 str++;
1380 }
1381 if (*str == 0) return(NULL);
1382 return(xmlStrndup(str, len));
1383}
1384
1385/**
1386 * xmlStrlen:
1387 * @str: the xmlChar * array
1388 *
1389 * length of a xmlChar's string
1390 *
1391 * Returns the number of xmlChar contained in the ARRAY.
1392 */
1393
1394int
1395xmlStrlen(const xmlChar *str) {
1396 int len = 0;
1397
1398 if (str == NULL) return(0);
1399 while (*str != 0) { /* non input consuming */
1400 str++;
1401 len++;
1402 }
1403 return(len);
1404}
1405
1406/**
1407 * xmlStrncat:
1408 * @cur: the original xmlChar * array
1409 * @add: the xmlChar * array added
1410 * @len: the length of @add
1411 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001412 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001413 * first bytes of @add.
1414 *
1415 * Returns a new xmlChar *, the original @cur is reallocated if needed
1416 * and should not be freed
1417 */
1418
1419xmlChar *
1420xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1421 int size;
1422 xmlChar *ret;
1423
1424 if ((add == NULL) || (len == 0))
1425 return(cur);
1426 if (cur == NULL)
1427 return(xmlStrndup(add, len));
1428
1429 size = xmlStrlen(cur);
1430 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1431 if (ret == NULL) {
1432 xmlGenericError(xmlGenericErrorContext,
1433 "xmlStrncat: realloc of %ld byte failed\n",
1434 (size + len + 1) * (long)sizeof(xmlChar));
1435 return(cur);
1436 }
1437 memcpy(&ret[size], add, len * sizeof(xmlChar));
1438 ret[size + len] = 0;
1439 return(ret);
1440}
1441
1442/**
1443 * xmlStrcat:
1444 * @cur: the original xmlChar * array
1445 * @add: the xmlChar * array added
1446 *
1447 * a strcat for array of xmlChar's. Since they are supposed to be
1448 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1449 * a termination mark of '0'.
1450 *
1451 * Returns a new xmlChar * containing the concatenated string.
1452 */
1453xmlChar *
1454xmlStrcat(xmlChar *cur, const xmlChar *add) {
1455 const xmlChar *p = add;
1456
1457 if (add == NULL) return(cur);
1458 if (cur == NULL)
1459 return(xmlStrdup(add));
1460
1461 while (*p != 0) p++; /* non input consuming */
1462 return(xmlStrncat(cur, add, p - add));
1463}
1464
1465/************************************************************************
1466 * *
1467 * Commodity functions, cleanup needed ? *
1468 * *
1469 ************************************************************************/
1470
1471/**
1472 * areBlanks:
1473 * @ctxt: an XML parser context
1474 * @str: a xmlChar *
1475 * @len: the size of @str
1476 *
1477 * Is this a sequence of blank chars that one can ignore ?
1478 *
1479 * Returns 1 if ignorable 0 otherwise.
1480 */
1481
1482static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1483 int i, ret;
1484 xmlNodePtr lastChild;
1485
Daniel Veillard05c13a22001-09-09 08:38:09 +00001486 /*
1487 * Don't spend time trying to differentiate them, the same callback is
1488 * used !
1489 */
1490 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001491 return(0);
1492
Owen Taylor3473f882001-02-23 17:55:21 +00001493 /*
1494 * Check for xml:space value.
1495 */
1496 if (*(ctxt->space) == 1)
1497 return(0);
1498
1499 /*
1500 * Check that the string is made of blanks
1501 */
1502 for (i = 0;i < len;i++)
1503 if (!(IS_BLANK(str[i]))) return(0);
1504
1505 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001506 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001507 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001508 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001509 if (ctxt->myDoc != NULL) {
1510 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1511 if (ret == 0) return(1);
1512 if (ret == 1) return(0);
1513 }
1514
1515 /*
1516 * Otherwise, heuristic :-\
1517 */
Owen Taylor3473f882001-02-23 17:55:21 +00001518 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001519 if ((ctxt->node->children == NULL) &&
1520 (RAW == '<') && (NXT(1) == '/')) return(0);
1521
1522 lastChild = xmlGetLastChild(ctxt->node);
1523 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001524 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1525 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001526 } else if (xmlNodeIsText(lastChild))
1527 return(0);
1528 else if ((ctxt->node->children != NULL) &&
1529 (xmlNodeIsText(ctxt->node->children)))
1530 return(0);
1531 return(1);
1532}
1533
1534/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001535 * Forward definition for recursive behavior.
Owen Taylor3473f882001-02-23 17:55:21 +00001536 */
1537void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1538void xmlParseReference(xmlParserCtxtPtr ctxt);
1539
1540/************************************************************************
1541 * *
1542 * Extra stuff for namespace support *
1543 * Relates to http://www.w3.org/TR/WD-xml-names *
1544 * *
1545 ************************************************************************/
1546
1547/**
1548 * xmlSplitQName:
1549 * @ctxt: an XML parser context
1550 * @name: an XML parser context
1551 * @prefix: a xmlChar **
1552 *
1553 * parse an UTF8 encoded XML qualified name string
1554 *
1555 * [NS 5] QName ::= (Prefix ':')? LocalPart
1556 *
1557 * [NS 6] Prefix ::= NCName
1558 *
1559 * [NS 7] LocalPart ::= NCName
1560 *
1561 * Returns the local part, and prefix is updated
1562 * to get the Prefix if any.
1563 */
1564
1565xmlChar *
1566xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1567 xmlChar buf[XML_MAX_NAMELEN + 5];
1568 xmlChar *buffer = NULL;
1569 int len = 0;
1570 int max = XML_MAX_NAMELEN;
1571 xmlChar *ret = NULL;
1572 const xmlChar *cur = name;
1573 int c;
1574
1575 *prefix = NULL;
1576
1577#ifndef XML_XML_NAMESPACE
1578 /* xml: prefix is not really a namespace */
1579 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1580 (cur[2] == 'l') && (cur[3] == ':'))
1581 return(xmlStrdup(name));
1582#endif
1583
1584 /* nasty but valid */
1585 if (cur[0] == ':')
1586 return(xmlStrdup(name));
1587
1588 c = *cur++;
1589 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1590 buf[len++] = c;
1591 c = *cur++;
1592 }
1593 if (len >= max) {
1594 /*
1595 * Okay someone managed to make a huge name, so he's ready to pay
1596 * for the processing speed.
1597 */
1598 max = len * 2;
1599
1600 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1601 if (buffer == NULL) {
1602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1603 ctxt->sax->error(ctxt->userData,
1604 "xmlSplitQName: out of memory\n");
1605 return(NULL);
1606 }
1607 memcpy(buffer, buf, len);
1608 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1609 if (len + 10 > max) {
1610 max *= 2;
1611 buffer = (xmlChar *) xmlRealloc(buffer,
1612 max * sizeof(xmlChar));
1613 if (buffer == NULL) {
1614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1615 ctxt->sax->error(ctxt->userData,
1616 "xmlSplitQName: out of memory\n");
1617 return(NULL);
1618 }
1619 }
1620 buffer[len++] = c;
1621 c = *cur++;
1622 }
1623 buffer[len] = 0;
1624 }
1625
1626 if (buffer == NULL)
1627 ret = xmlStrndup(buf, len);
1628 else {
1629 ret = buffer;
1630 buffer = NULL;
1631 max = XML_MAX_NAMELEN;
1632 }
1633
1634
1635 if (c == ':') {
1636 c = *cur++;
1637 if (c == 0) return(ret);
1638 *prefix = ret;
1639 len = 0;
1640
1641 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1642 buf[len++] = c;
1643 c = *cur++;
1644 }
1645 if (len >= max) {
1646 /*
1647 * Okay someone managed to make a huge name, so he's ready to pay
1648 * for the processing speed.
1649 */
1650 max = len * 2;
1651
1652 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1653 if (buffer == NULL) {
1654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1655 ctxt->sax->error(ctxt->userData,
1656 "xmlSplitQName: out of memory\n");
1657 return(NULL);
1658 }
1659 memcpy(buffer, buf, len);
1660 while (c != 0) { /* tested bigname2.xml */
1661 if (len + 10 > max) {
1662 max *= 2;
1663 buffer = (xmlChar *) xmlRealloc(buffer,
1664 max * sizeof(xmlChar));
1665 if (buffer == NULL) {
1666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1667 ctxt->sax->error(ctxt->userData,
1668 "xmlSplitQName: out of memory\n");
1669 return(NULL);
1670 }
1671 }
1672 buffer[len++] = c;
1673 c = *cur++;
1674 }
1675 buffer[len] = 0;
1676 }
1677
1678 if (buffer == NULL)
1679 ret = xmlStrndup(buf, len);
1680 else {
1681 ret = buffer;
1682 }
1683 }
1684
1685 return(ret);
1686}
1687
1688/************************************************************************
1689 * *
1690 * The parser itself *
1691 * Relates to http://www.w3.org/TR/REC-xml *
1692 * *
1693 ************************************************************************/
1694
Daniel Veillard76d66f42001-05-16 21:05:17 +00001695static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001696/**
1697 * xmlParseName:
1698 * @ctxt: an XML parser context
1699 *
1700 * parse an XML name.
1701 *
1702 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1703 * CombiningChar | Extender
1704 *
1705 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1706 *
1707 * [6] Names ::= Name (S Name)*
1708 *
1709 * Returns the Name parsed or NULL
1710 */
1711
1712xmlChar *
1713xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001714 const xmlChar *in;
1715 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001716 int count = 0;
1717
1718 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001719
1720 /*
1721 * Accelerator for simple ASCII names
1722 */
1723 in = ctxt->input->cur;
1724 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1725 ((*in >= 0x41) && (*in <= 0x5A)) ||
1726 (*in == '_') || (*in == ':')) {
1727 in++;
1728 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1729 ((*in >= 0x41) && (*in <= 0x5A)) ||
1730 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001731 (*in == '_') || (*in == '-') ||
1732 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001733 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001734 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001735 count = in - ctxt->input->cur;
1736 ret = xmlStrndup(ctxt->input->cur, count);
1737 ctxt->input->cur = in;
1738 return(ret);
1739 }
1740 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001741 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001742}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001743
Daniel Veillard76d66f42001-05-16 21:05:17 +00001744static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001745xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1746 xmlChar buf[XML_MAX_NAMELEN + 5];
1747 int len = 0, l;
1748 int c;
1749 int count = 0;
1750
1751 /*
1752 * Handler for more complex cases
1753 */
1754 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001755 c = CUR_CHAR(l);
1756 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1757 (!IS_LETTER(c) && (c != '_') &&
1758 (c != ':'))) {
1759 return(NULL);
1760 }
1761
1762 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1763 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1764 (c == '.') || (c == '-') ||
1765 (c == '_') || (c == ':') ||
1766 (IS_COMBINING(c)) ||
1767 (IS_EXTENDER(c)))) {
1768 if (count++ > 100) {
1769 count = 0;
1770 GROW;
1771 }
1772 COPY_BUF(l,buf,len,c);
1773 NEXTL(l);
1774 c = CUR_CHAR(l);
1775 if (len >= XML_MAX_NAMELEN) {
1776 /*
1777 * Okay someone managed to make a huge name, so he's ready to pay
1778 * for the processing speed.
1779 */
1780 xmlChar *buffer;
1781 int max = len * 2;
1782
1783 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1784 if (buffer == NULL) {
1785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1786 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001787 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001788 return(NULL);
1789 }
1790 memcpy(buffer, buf, len);
1791 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1792 (c == '.') || (c == '-') ||
1793 (c == '_') || (c == ':') ||
1794 (IS_COMBINING(c)) ||
1795 (IS_EXTENDER(c))) {
1796 if (count++ > 100) {
1797 count = 0;
1798 GROW;
1799 }
1800 if (len + 10 > max) {
1801 max *= 2;
1802 buffer = (xmlChar *) xmlRealloc(buffer,
1803 max * sizeof(xmlChar));
1804 if (buffer == NULL) {
1805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1806 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001807 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001808 return(NULL);
1809 }
1810 }
1811 COPY_BUF(l,buffer,len,c);
1812 NEXTL(l);
1813 c = CUR_CHAR(l);
1814 }
1815 buffer[len] = 0;
1816 return(buffer);
1817 }
1818 }
1819 return(xmlStrndup(buf, len));
1820}
1821
1822/**
1823 * xmlParseStringName:
1824 * @ctxt: an XML parser context
1825 * @str: a pointer to the string pointer (IN/OUT)
1826 *
1827 * parse an XML name.
1828 *
1829 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1830 * CombiningChar | Extender
1831 *
1832 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1833 *
1834 * [6] Names ::= Name (S Name)*
1835 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001836 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001837 * is updated to the current location in the string.
1838 */
1839
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001840static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001841xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1842 xmlChar buf[XML_MAX_NAMELEN + 5];
1843 const xmlChar *cur = *str;
1844 int len = 0, l;
1845 int c;
1846
1847 c = CUR_SCHAR(cur, l);
1848 if (!IS_LETTER(c) && (c != '_') &&
1849 (c != ':')) {
1850 return(NULL);
1851 }
1852
1853 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1854 (c == '.') || (c == '-') ||
1855 (c == '_') || (c == ':') ||
1856 (IS_COMBINING(c)) ||
1857 (IS_EXTENDER(c))) {
1858 COPY_BUF(l,buf,len,c);
1859 cur += l;
1860 c = CUR_SCHAR(cur, l);
1861 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1862 /*
1863 * Okay someone managed to make a huge name, so he's ready to pay
1864 * for the processing speed.
1865 */
1866 xmlChar *buffer;
1867 int max = len * 2;
1868
1869 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1870 if (buffer == NULL) {
1871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1872 ctxt->sax->error(ctxt->userData,
1873 "xmlParseStringName: out of memory\n");
1874 return(NULL);
1875 }
1876 memcpy(buffer, buf, len);
1877 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1878 (c == '.') || (c == '-') ||
1879 (c == '_') || (c == ':') ||
1880 (IS_COMBINING(c)) ||
1881 (IS_EXTENDER(c))) {
1882 if (len + 10 > max) {
1883 max *= 2;
1884 buffer = (xmlChar *) xmlRealloc(buffer,
1885 max * sizeof(xmlChar));
1886 if (buffer == NULL) {
1887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1888 ctxt->sax->error(ctxt->userData,
1889 "xmlParseStringName: out of memory\n");
1890 return(NULL);
1891 }
1892 }
1893 COPY_BUF(l,buffer,len,c);
1894 cur += l;
1895 c = CUR_SCHAR(cur, l);
1896 }
1897 buffer[len] = 0;
1898 *str = cur;
1899 return(buffer);
1900 }
1901 }
1902 *str = cur;
1903 return(xmlStrndup(buf, len));
1904}
1905
1906/**
1907 * xmlParseNmtoken:
1908 * @ctxt: an XML parser context
1909 *
1910 * parse an XML Nmtoken.
1911 *
1912 * [7] Nmtoken ::= (NameChar)+
1913 *
1914 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1915 *
1916 * Returns the Nmtoken parsed or NULL
1917 */
1918
1919xmlChar *
1920xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1921 xmlChar buf[XML_MAX_NAMELEN + 5];
1922 int len = 0, l;
1923 int c;
1924 int count = 0;
1925
1926 GROW;
1927 c = CUR_CHAR(l);
1928
1929 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1930 (c == '.') || (c == '-') ||
1931 (c == '_') || (c == ':') ||
1932 (IS_COMBINING(c)) ||
1933 (IS_EXTENDER(c))) {
1934 if (count++ > 100) {
1935 count = 0;
1936 GROW;
1937 }
1938 COPY_BUF(l,buf,len,c);
1939 NEXTL(l);
1940 c = CUR_CHAR(l);
1941 if (len >= XML_MAX_NAMELEN) {
1942 /*
1943 * Okay someone managed to make a huge token, so he's ready to pay
1944 * for the processing speed.
1945 */
1946 xmlChar *buffer;
1947 int max = len * 2;
1948
1949 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1950 if (buffer == NULL) {
1951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1952 ctxt->sax->error(ctxt->userData,
1953 "xmlParseNmtoken: out of memory\n");
1954 return(NULL);
1955 }
1956 memcpy(buffer, buf, len);
1957 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1958 (c == '.') || (c == '-') ||
1959 (c == '_') || (c == ':') ||
1960 (IS_COMBINING(c)) ||
1961 (IS_EXTENDER(c))) {
1962 if (count++ > 100) {
1963 count = 0;
1964 GROW;
1965 }
1966 if (len + 10 > max) {
1967 max *= 2;
1968 buffer = (xmlChar *) xmlRealloc(buffer,
1969 max * sizeof(xmlChar));
1970 if (buffer == NULL) {
1971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1972 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001973 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001974 return(NULL);
1975 }
1976 }
1977 COPY_BUF(l,buffer,len,c);
1978 NEXTL(l);
1979 c = CUR_CHAR(l);
1980 }
1981 buffer[len] = 0;
1982 return(buffer);
1983 }
1984 }
1985 if (len == 0)
1986 return(NULL);
1987 return(xmlStrndup(buf, len));
1988}
1989
1990/**
1991 * xmlParseEntityValue:
1992 * @ctxt: an XML parser context
1993 * @orig: if non-NULL store a copy of the original entity value
1994 *
1995 * parse a value for ENTITY declarations
1996 *
1997 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1998 * "'" ([^%&'] | PEReference | Reference)* "'"
1999 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002000 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002001 */
2002
2003xmlChar *
2004xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2005 xmlChar *buf = NULL;
2006 int len = 0;
2007 int size = XML_PARSER_BUFFER_SIZE;
2008 int c, l;
2009 xmlChar stop;
2010 xmlChar *ret = NULL;
2011 const xmlChar *cur = NULL;
2012 xmlParserInputPtr input;
2013
2014 if (RAW == '"') stop = '"';
2015 else if (RAW == '\'') stop = '\'';
2016 else {
2017 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2019 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2020 ctxt->wellFormed = 0;
2021 ctxt->disableSAX = 1;
2022 return(NULL);
2023 }
2024 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2025 if (buf == NULL) {
2026 xmlGenericError(xmlGenericErrorContext,
2027 "malloc of %d byte failed\n", size);
2028 return(NULL);
2029 }
2030
2031 /*
2032 * The content of the entity definition is copied in a buffer.
2033 */
2034
2035 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2036 input = ctxt->input;
2037 GROW;
2038 NEXT;
2039 c = CUR_CHAR(l);
2040 /*
2041 * NOTE: 4.4.5 Included in Literal
2042 * When a parameter entity reference appears in a literal entity
2043 * value, ... a single or double quote character in the replacement
2044 * text is always treated as a normal data character and will not
2045 * terminate the literal.
2046 * In practice it means we stop the loop only when back at parsing
2047 * the initial entity and the quote is found
2048 */
2049 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2050 (ctxt->input != input))) {
2051 if (len + 5 >= size) {
2052 size *= 2;
2053 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2054 if (buf == NULL) {
2055 xmlGenericError(xmlGenericErrorContext,
2056 "realloc of %d byte failed\n", size);
2057 return(NULL);
2058 }
2059 }
2060 COPY_BUF(l,buf,len,c);
2061 NEXTL(l);
2062 /*
2063 * Pop-up of finished entities.
2064 */
2065 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2066 xmlPopInput(ctxt);
2067
2068 GROW;
2069 c = CUR_CHAR(l);
2070 if (c == 0) {
2071 GROW;
2072 c = CUR_CHAR(l);
2073 }
2074 }
2075 buf[len] = 0;
2076
2077 /*
2078 * Raise problem w.r.t. '&' and '%' being used in non-entities
2079 * reference constructs. Note Charref will be handled in
2080 * xmlStringDecodeEntities()
2081 */
2082 cur = buf;
2083 while (*cur != 0) { /* non input consuming */
2084 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2085 xmlChar *name;
2086 xmlChar tmp = *cur;
2087
2088 cur++;
2089 name = xmlParseStringName(ctxt, &cur);
2090 if ((name == NULL) || (*cur != ';')) {
2091 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2093 ctxt->sax->error(ctxt->userData,
2094 "EntityValue: '%c' forbidden except for entities references\n",
2095 tmp);
2096 ctxt->wellFormed = 0;
2097 ctxt->disableSAX = 1;
2098 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002099 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2100 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002101 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2103 ctxt->sax->error(ctxt->userData,
2104 "EntityValue: PEReferences forbidden in internal subset\n",
2105 tmp);
2106 ctxt->wellFormed = 0;
2107 ctxt->disableSAX = 1;
2108 }
2109 if (name != NULL)
2110 xmlFree(name);
2111 }
2112 cur++;
2113 }
2114
2115 /*
2116 * Then PEReference entities are substituted.
2117 */
2118 if (c != stop) {
2119 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2120 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2121 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2122 ctxt->wellFormed = 0;
2123 ctxt->disableSAX = 1;
2124 xmlFree(buf);
2125 } else {
2126 NEXT;
2127 /*
2128 * NOTE: 4.4.7 Bypassed
2129 * When a general entity reference appears in the EntityValue in
2130 * an entity declaration, it is bypassed and left as is.
2131 * so XML_SUBSTITUTE_REF is not set here.
2132 */
2133 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2134 0, 0, 0);
2135 if (orig != NULL)
2136 *orig = buf;
2137 else
2138 xmlFree(buf);
2139 }
2140
2141 return(ret);
2142}
2143
2144/**
2145 * xmlParseAttValue:
2146 * @ctxt: an XML parser context
2147 *
2148 * parse a value for an attribute
2149 * Note: the parser won't do substitution of entities here, this
2150 * will be handled later in xmlStringGetNodeList
2151 *
2152 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2153 * "'" ([^<&'] | Reference)* "'"
2154 *
2155 * 3.3.3 Attribute-Value Normalization:
2156 * Before the value of an attribute is passed to the application or
2157 * checked for validity, the XML processor must normalize it as follows:
2158 * - a character reference is processed by appending the referenced
2159 * character to the attribute value
2160 * - an entity reference is processed by recursively processing the
2161 * replacement text of the entity
2162 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2163 * appending #x20 to the normalized value, except that only a single
2164 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2165 * parsed entity or the literal entity value of an internal parsed entity
2166 * - other characters are processed by appending them to the normalized value
2167 * If the declared value is not CDATA, then the XML processor must further
2168 * process the normalized attribute value by discarding any leading and
2169 * trailing space (#x20) characters, and by replacing sequences of space
2170 * (#x20) characters by a single space (#x20) character.
2171 * All attributes for which no declaration has been read should be treated
2172 * by a non-validating parser as if declared CDATA.
2173 *
2174 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2175 */
2176
2177xmlChar *
2178xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2179 xmlChar limit = 0;
2180 xmlChar *buf = NULL;
2181 int len = 0;
2182 int buf_size = 0;
2183 int c, l;
2184 xmlChar *current = NULL;
2185 xmlEntityPtr ent;
2186
2187
2188 SHRINK;
2189 if (NXT(0) == '"') {
2190 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2191 limit = '"';
2192 NEXT;
2193 } else if (NXT(0) == '\'') {
2194 limit = '\'';
2195 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2196 NEXT;
2197 } else {
2198 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2200 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2201 ctxt->wellFormed = 0;
2202 ctxt->disableSAX = 1;
2203 return(NULL);
2204 }
2205
2206 /*
2207 * allocate a translation buffer.
2208 */
2209 buf_size = XML_PARSER_BUFFER_SIZE;
2210 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2211 if (buf == NULL) {
2212 perror("xmlParseAttValue: malloc failed");
2213 return(NULL);
2214 }
2215
2216 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002217 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002218 */
2219 c = CUR_CHAR(l);
2220 while (((NXT(0) != limit) && /* checked */
2221 (c != '<')) || (ctxt->token != 0)) {
2222 if (c == 0) break;
2223 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002224 if (ctxt->replaceEntities) {
2225 if (len > buf_size - 10) {
2226 growBuffer(buf);
2227 }
2228 buf[len++] = '&';
2229 } else {
2230 /*
2231 * The reparsing will be done in xmlStringGetNodeList()
2232 * called by the attribute() function in SAX.c
2233 */
2234 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002235
Daniel Veillard319a7422001-09-11 09:27:09 +00002236 if (len > buf_size - 10) {
2237 growBuffer(buf);
2238 }
2239 current = &buffer[0];
2240 while (*current != 0) { /* non input consuming */
2241 buf[len++] = *current++;
2242 }
2243 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002244 }
Owen Taylor3473f882001-02-23 17:55:21 +00002245 } else if (c == '&') {
2246 if (NXT(1) == '#') {
2247 int val = xmlParseCharRef(ctxt);
2248 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002249 if (ctxt->replaceEntities) {
2250 if (len > buf_size - 10) {
2251 growBuffer(buf);
2252 }
2253 buf[len++] = '&';
2254 } else {
2255 /*
2256 * The reparsing will be done in xmlStringGetNodeList()
2257 * called by the attribute() function in SAX.c
2258 */
2259 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002260
Daniel Veillard319a7422001-09-11 09:27:09 +00002261 if (len > buf_size - 10) {
2262 growBuffer(buf);
2263 }
2264 current = &buffer[0];
2265 while (*current != 0) { /* non input consuming */
2266 buf[len++] = *current++;
2267 }
Owen Taylor3473f882001-02-23 17:55:21 +00002268 }
2269 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002270 if (len > buf_size - 10) {
2271 growBuffer(buf);
2272 }
Owen Taylor3473f882001-02-23 17:55:21 +00002273 len += xmlCopyChar(0, &buf[len], val);
2274 }
2275 } else {
2276 ent = xmlParseEntityRef(ctxt);
2277 if ((ent != NULL) &&
2278 (ctxt->replaceEntities != 0)) {
2279 xmlChar *rep;
2280
2281 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2282 rep = xmlStringDecodeEntities(ctxt, ent->content,
2283 XML_SUBSTITUTE_REF, 0, 0, 0);
2284 if (rep != NULL) {
2285 current = rep;
2286 while (*current != 0) { /* non input consuming */
2287 buf[len++] = *current++;
2288 if (len > buf_size - 10) {
2289 growBuffer(buf);
2290 }
2291 }
2292 xmlFree(rep);
2293 }
2294 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002295 if (len > buf_size - 10) {
2296 growBuffer(buf);
2297 }
Owen Taylor3473f882001-02-23 17:55:21 +00002298 if (ent->content != NULL)
2299 buf[len++] = ent->content[0];
2300 }
2301 } else if (ent != NULL) {
2302 int i = xmlStrlen(ent->name);
2303 const xmlChar *cur = ent->name;
2304
2305 /*
2306 * This may look absurd but is needed to detect
2307 * entities problems
2308 */
2309 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2310 (ent->content != NULL)) {
2311 xmlChar *rep;
2312 rep = xmlStringDecodeEntities(ctxt, ent->content,
2313 XML_SUBSTITUTE_REF, 0, 0, 0);
2314 if (rep != NULL)
2315 xmlFree(rep);
2316 }
2317
2318 /*
2319 * Just output the reference
2320 */
2321 buf[len++] = '&';
2322 if (len > buf_size - i - 10) {
2323 growBuffer(buf);
2324 }
2325 for (;i > 0;i--)
2326 buf[len++] = *cur++;
2327 buf[len++] = ';';
2328 }
2329 }
2330 } else {
2331 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2332 COPY_BUF(l,buf,len,0x20);
2333 if (len > buf_size - 10) {
2334 growBuffer(buf);
2335 }
2336 } else {
2337 COPY_BUF(l,buf,len,c);
2338 if (len > buf_size - 10) {
2339 growBuffer(buf);
2340 }
2341 }
2342 NEXTL(l);
2343 }
2344 GROW;
2345 c = CUR_CHAR(l);
2346 }
2347 buf[len++] = 0;
2348 if (RAW == '<') {
2349 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2351 ctxt->sax->error(ctxt->userData,
2352 "Unescaped '<' not allowed in attributes values\n");
2353 ctxt->wellFormed = 0;
2354 ctxt->disableSAX = 1;
2355 } else if (RAW != limit) {
2356 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2358 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2359 ctxt->wellFormed = 0;
2360 ctxt->disableSAX = 1;
2361 } else
2362 NEXT;
2363 return(buf);
2364}
2365
2366/**
2367 * xmlParseSystemLiteral:
2368 * @ctxt: an XML parser context
2369 *
2370 * parse an XML Literal
2371 *
2372 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2373 *
2374 * Returns the SystemLiteral parsed or NULL
2375 */
2376
2377xmlChar *
2378xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2379 xmlChar *buf = NULL;
2380 int len = 0;
2381 int size = XML_PARSER_BUFFER_SIZE;
2382 int cur, l;
2383 xmlChar stop;
2384 int state = ctxt->instate;
2385 int count = 0;
2386
2387 SHRINK;
2388 if (RAW == '"') {
2389 NEXT;
2390 stop = '"';
2391 } else if (RAW == '\'') {
2392 NEXT;
2393 stop = '\'';
2394 } else {
2395 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2397 ctxt->sax->error(ctxt->userData,
2398 "SystemLiteral \" or ' expected\n");
2399 ctxt->wellFormed = 0;
2400 ctxt->disableSAX = 1;
2401 return(NULL);
2402 }
2403
2404 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2405 if (buf == NULL) {
2406 xmlGenericError(xmlGenericErrorContext,
2407 "malloc of %d byte failed\n", size);
2408 return(NULL);
2409 }
2410 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2411 cur = CUR_CHAR(l);
2412 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2413 if (len + 5 >= size) {
2414 size *= 2;
2415 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2416 if (buf == NULL) {
2417 xmlGenericError(xmlGenericErrorContext,
2418 "realloc of %d byte failed\n", size);
2419 ctxt->instate = (xmlParserInputState) state;
2420 return(NULL);
2421 }
2422 }
2423 count++;
2424 if (count > 50) {
2425 GROW;
2426 count = 0;
2427 }
2428 COPY_BUF(l,buf,len,cur);
2429 NEXTL(l);
2430 cur = CUR_CHAR(l);
2431 if (cur == 0) {
2432 GROW;
2433 SHRINK;
2434 cur = CUR_CHAR(l);
2435 }
2436 }
2437 buf[len] = 0;
2438 ctxt->instate = (xmlParserInputState) state;
2439 if (!IS_CHAR(cur)) {
2440 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2442 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2443 ctxt->wellFormed = 0;
2444 ctxt->disableSAX = 1;
2445 } else {
2446 NEXT;
2447 }
2448 return(buf);
2449}
2450
2451/**
2452 * xmlParsePubidLiteral:
2453 * @ctxt: an XML parser context
2454 *
2455 * parse an XML public literal
2456 *
2457 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2458 *
2459 * Returns the PubidLiteral parsed or NULL.
2460 */
2461
2462xmlChar *
2463xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2464 xmlChar *buf = NULL;
2465 int len = 0;
2466 int size = XML_PARSER_BUFFER_SIZE;
2467 xmlChar cur;
2468 xmlChar stop;
2469 int count = 0;
2470
2471 SHRINK;
2472 if (RAW == '"') {
2473 NEXT;
2474 stop = '"';
2475 } else if (RAW == '\'') {
2476 NEXT;
2477 stop = '\'';
2478 } else {
2479 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2480 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2481 ctxt->sax->error(ctxt->userData,
2482 "SystemLiteral \" or ' expected\n");
2483 ctxt->wellFormed = 0;
2484 ctxt->disableSAX = 1;
2485 return(NULL);
2486 }
2487 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2488 if (buf == NULL) {
2489 xmlGenericError(xmlGenericErrorContext,
2490 "malloc of %d byte failed\n", size);
2491 return(NULL);
2492 }
2493 cur = CUR;
2494 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2495 if (len + 1 >= size) {
2496 size *= 2;
2497 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2498 if (buf == NULL) {
2499 xmlGenericError(xmlGenericErrorContext,
2500 "realloc of %d byte failed\n", size);
2501 return(NULL);
2502 }
2503 }
2504 buf[len++] = cur;
2505 count++;
2506 if (count > 50) {
2507 GROW;
2508 count = 0;
2509 }
2510 NEXT;
2511 cur = CUR;
2512 if (cur == 0) {
2513 GROW;
2514 SHRINK;
2515 cur = CUR;
2516 }
2517 }
2518 buf[len] = 0;
2519 if (cur != stop) {
2520 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2522 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2523 ctxt->wellFormed = 0;
2524 ctxt->disableSAX = 1;
2525 } else {
2526 NEXT;
2527 }
2528 return(buf);
2529}
2530
Daniel Veillard48b2f892001-02-25 16:11:03 +00002531void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002532/**
2533 * xmlParseCharData:
2534 * @ctxt: an XML parser context
2535 * @cdata: int indicating whether we are within a CDATA section
2536 *
2537 * parse a CharData section.
2538 * if we are within a CDATA section ']]>' marks an end of section.
2539 *
2540 * The right angle bracket (>) may be represented using the string "&gt;",
2541 * and must, for compatibility, be escaped using "&gt;" or a character
2542 * reference when it appears in the string "]]>" in content, when that
2543 * string is not marking the end of a CDATA section.
2544 *
2545 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2546 */
2547
2548void
2549xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002550 const xmlChar *in;
2551 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002552 int line = ctxt->input->line;
2553 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002554
2555 SHRINK;
2556 GROW;
2557 /*
2558 * Accelerated common case where input don't need to be
2559 * modified before passing it to the handler.
2560 */
2561 if ((ctxt->token == 0) && (!cdata)) {
2562 in = ctxt->input->cur;
2563 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002564get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002565 while (((*in >= 0x20) && (*in != '<') &&
2566 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2567 in++;
2568 if (*in == 0xA) {
2569 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002570 in++;
2571 while (*in == 0xA) {
2572 ctxt->input->line++;
2573 in++;
2574 }
2575 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002576 }
2577 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002578 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002579 if (IS_BLANK(*ctxt->input->cur)) {
2580 const xmlChar *tmp = ctxt->input->cur;
2581 ctxt->input->cur = in;
2582 if (areBlanks(ctxt, tmp, nbchar)) {
2583 if (ctxt->sax->ignorableWhitespace != NULL)
2584 ctxt->sax->ignorableWhitespace(ctxt->userData,
2585 tmp, nbchar);
2586 } else {
2587 if (ctxt->sax->characters != NULL)
2588 ctxt->sax->characters(ctxt->userData,
2589 tmp, nbchar);
2590 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002591 line = ctxt->input->line;
2592 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002593 } else {
2594 if (ctxt->sax->characters != NULL)
2595 ctxt->sax->characters(ctxt->userData,
2596 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002597 line = ctxt->input->line;
2598 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002599 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002600 }
2601 ctxt->input->cur = in;
2602 if (*in == 0xD) {
2603 in++;
2604 if (*in == 0xA) {
2605 ctxt->input->cur = in;
2606 in++;
2607 ctxt->input->line++;
2608 continue; /* while */
2609 }
2610 in--;
2611 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002612 if (*in == '<') {
2613 return;
2614 }
2615 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002616 return;
2617 }
2618 SHRINK;
2619 GROW;
2620 in = ctxt->input->cur;
2621 } while ((*in >= 0x20) && (*in <= 0x7F));
2622 nbchar = 0;
2623 }
Daniel Veillard50582112001-03-26 22:52:16 +00002624 ctxt->input->line = line;
2625 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002626 xmlParseCharDataComplex(ctxt, cdata);
2627}
2628
2629void
2630xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002631 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2632 int nbchar = 0;
2633 int cur, l;
2634 int count = 0;
2635
2636 SHRINK;
2637 GROW;
2638 cur = CUR_CHAR(l);
2639 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2640 ((cur != '&') || (ctxt->token == '&')) &&
2641 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2642 if ((cur == ']') && (NXT(1) == ']') &&
2643 (NXT(2) == '>')) {
2644 if (cdata) break;
2645 else {
2646 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2648 ctxt->sax->error(ctxt->userData,
2649 "Sequence ']]>' not allowed in content\n");
2650 /* Should this be relaxed ??? I see a "must here */
2651 ctxt->wellFormed = 0;
2652 ctxt->disableSAX = 1;
2653 }
2654 }
2655 COPY_BUF(l,buf,nbchar,cur);
2656 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2657 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002658 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002659 */
2660 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2661 if (areBlanks(ctxt, buf, nbchar)) {
2662 if (ctxt->sax->ignorableWhitespace != NULL)
2663 ctxt->sax->ignorableWhitespace(ctxt->userData,
2664 buf, nbchar);
2665 } else {
2666 if (ctxt->sax->characters != NULL)
2667 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2668 }
2669 }
2670 nbchar = 0;
2671 }
2672 count++;
2673 if (count > 50) {
2674 GROW;
2675 count = 0;
2676 }
2677 NEXTL(l);
2678 cur = CUR_CHAR(l);
2679 }
2680 if (nbchar != 0) {
2681 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002682 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002683 */
2684 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2685 if (areBlanks(ctxt, buf, nbchar)) {
2686 if (ctxt->sax->ignorableWhitespace != NULL)
2687 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2688 } else {
2689 if (ctxt->sax->characters != NULL)
2690 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2691 }
2692 }
2693 }
2694}
2695
2696/**
2697 * xmlParseExternalID:
2698 * @ctxt: an XML parser context
2699 * @publicID: a xmlChar** receiving PubidLiteral
2700 * @strict: indicate whether we should restrict parsing to only
2701 * production [75], see NOTE below
2702 *
2703 * Parse an External ID or a Public ID
2704 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002705 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002706 * 'PUBLIC' S PubidLiteral S SystemLiteral
2707 *
2708 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2709 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2710 *
2711 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2712 *
2713 * Returns the function returns SystemLiteral and in the second
2714 * case publicID receives PubidLiteral, is strict is off
2715 * it is possible to return NULL and have publicID set.
2716 */
2717
2718xmlChar *
2719xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2720 xmlChar *URI = NULL;
2721
2722 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002723
2724 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002725 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2726 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2727 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2728 SKIP(6);
2729 if (!IS_BLANK(CUR)) {
2730 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2732 ctxt->sax->error(ctxt->userData,
2733 "Space required after 'SYSTEM'\n");
2734 ctxt->wellFormed = 0;
2735 ctxt->disableSAX = 1;
2736 }
2737 SKIP_BLANKS;
2738 URI = xmlParseSystemLiteral(ctxt);
2739 if (URI == NULL) {
2740 ctxt->errNo = XML_ERR_URI_REQUIRED;
2741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2742 ctxt->sax->error(ctxt->userData,
2743 "xmlParseExternalID: SYSTEM, no URI\n");
2744 ctxt->wellFormed = 0;
2745 ctxt->disableSAX = 1;
2746 }
2747 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2748 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2749 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2750 SKIP(6);
2751 if (!IS_BLANK(CUR)) {
2752 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2753 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2754 ctxt->sax->error(ctxt->userData,
2755 "Space required after 'PUBLIC'\n");
2756 ctxt->wellFormed = 0;
2757 ctxt->disableSAX = 1;
2758 }
2759 SKIP_BLANKS;
2760 *publicID = xmlParsePubidLiteral(ctxt);
2761 if (*publicID == NULL) {
2762 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2764 ctxt->sax->error(ctxt->userData,
2765 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2766 ctxt->wellFormed = 0;
2767 ctxt->disableSAX = 1;
2768 }
2769 if (strict) {
2770 /*
2771 * We don't handle [83] so "S SystemLiteral" is required.
2772 */
2773 if (!IS_BLANK(CUR)) {
2774 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2776 ctxt->sax->error(ctxt->userData,
2777 "Space required after the Public Identifier\n");
2778 ctxt->wellFormed = 0;
2779 ctxt->disableSAX = 1;
2780 }
2781 } else {
2782 /*
2783 * We handle [83] so we return immediately, if
2784 * "S SystemLiteral" is not detected. From a purely parsing
2785 * point of view that's a nice mess.
2786 */
2787 const xmlChar *ptr;
2788 GROW;
2789
2790 ptr = CUR_PTR;
2791 if (!IS_BLANK(*ptr)) return(NULL);
2792
2793 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2794 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2795 }
2796 SKIP_BLANKS;
2797 URI = xmlParseSystemLiteral(ctxt);
2798 if (URI == NULL) {
2799 ctxt->errNo = XML_ERR_URI_REQUIRED;
2800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2801 ctxt->sax->error(ctxt->userData,
2802 "xmlParseExternalID: PUBLIC, no URI\n");
2803 ctxt->wellFormed = 0;
2804 ctxt->disableSAX = 1;
2805 }
2806 }
2807 return(URI);
2808}
2809
2810/**
2811 * xmlParseComment:
2812 * @ctxt: an XML parser context
2813 *
2814 * Skip an XML (SGML) comment <!-- .... -->
2815 * The spec says that "For compatibility, the string "--" (double-hyphen)
2816 * must not occur within comments. "
2817 *
2818 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2819 */
2820void
2821xmlParseComment(xmlParserCtxtPtr ctxt) {
2822 xmlChar *buf = NULL;
2823 int len;
2824 int size = XML_PARSER_BUFFER_SIZE;
2825 int q, ql;
2826 int r, rl;
2827 int cur, l;
2828 xmlParserInputState state;
2829 xmlParserInputPtr input = ctxt->input;
2830 int count = 0;
2831
2832 /*
2833 * Check that there is a comment right here.
2834 */
2835 if ((RAW != '<') || (NXT(1) != '!') ||
2836 (NXT(2) != '-') || (NXT(3) != '-')) return;
2837
2838 state = ctxt->instate;
2839 ctxt->instate = XML_PARSER_COMMENT;
2840 SHRINK;
2841 SKIP(4);
2842 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2843 if (buf == NULL) {
2844 xmlGenericError(xmlGenericErrorContext,
2845 "malloc of %d byte failed\n", size);
2846 ctxt->instate = state;
2847 return;
2848 }
2849 q = CUR_CHAR(ql);
2850 NEXTL(ql);
2851 r = CUR_CHAR(rl);
2852 NEXTL(rl);
2853 cur = CUR_CHAR(l);
2854 len = 0;
2855 while (IS_CHAR(cur) && /* checked */
2856 ((cur != '>') ||
2857 (r != '-') || (q != '-'))) {
2858 if ((r == '-') && (q == '-') && (len > 1)) {
2859 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2861 ctxt->sax->error(ctxt->userData,
2862 "Comment must not contain '--' (double-hyphen)`\n");
2863 ctxt->wellFormed = 0;
2864 ctxt->disableSAX = 1;
2865 }
2866 if (len + 5 >= size) {
2867 size *= 2;
2868 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2869 if (buf == NULL) {
2870 xmlGenericError(xmlGenericErrorContext,
2871 "realloc of %d byte failed\n", size);
2872 ctxt->instate = state;
2873 return;
2874 }
2875 }
2876 COPY_BUF(ql,buf,len,q);
2877 q = r;
2878 ql = rl;
2879 r = cur;
2880 rl = l;
2881
2882 count++;
2883 if (count > 50) {
2884 GROW;
2885 count = 0;
2886 }
2887 NEXTL(l);
2888 cur = CUR_CHAR(l);
2889 if (cur == 0) {
2890 SHRINK;
2891 GROW;
2892 cur = CUR_CHAR(l);
2893 }
2894 }
2895 buf[len] = 0;
2896 if (!IS_CHAR(cur)) {
2897 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2899 ctxt->sax->error(ctxt->userData,
2900 "Comment not terminated \n<!--%.50s\n", buf);
2901 ctxt->wellFormed = 0;
2902 ctxt->disableSAX = 1;
2903 xmlFree(buf);
2904 } else {
2905 if (input != ctxt->input) {
2906 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2908 ctxt->sax->error(ctxt->userData,
2909"Comment doesn't start and stop in the same entity\n");
2910 ctxt->wellFormed = 0;
2911 ctxt->disableSAX = 1;
2912 }
2913 NEXT;
2914 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2915 (!ctxt->disableSAX))
2916 ctxt->sax->comment(ctxt->userData, buf);
2917 xmlFree(buf);
2918 }
2919 ctxt->instate = state;
2920}
2921
2922/**
2923 * xmlParsePITarget:
2924 * @ctxt: an XML parser context
2925 *
2926 * parse the name of a PI
2927 *
2928 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2929 *
2930 * Returns the PITarget name or NULL
2931 */
2932
2933xmlChar *
2934xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2935 xmlChar *name;
2936
2937 name = xmlParseName(ctxt);
2938 if ((name != NULL) &&
2939 ((name[0] == 'x') || (name[0] == 'X')) &&
2940 ((name[1] == 'm') || (name[1] == 'M')) &&
2941 ((name[2] == 'l') || (name[2] == 'L'))) {
2942 int i;
2943 if ((name[0] == 'x') && (name[1] == 'm') &&
2944 (name[2] == 'l') && (name[3] == 0)) {
2945 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2947 ctxt->sax->error(ctxt->userData,
2948 "XML declaration allowed only at the start of the document\n");
2949 ctxt->wellFormed = 0;
2950 ctxt->disableSAX = 1;
2951 return(name);
2952 } else if (name[3] == 0) {
2953 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2955 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2956 ctxt->wellFormed = 0;
2957 ctxt->disableSAX = 1;
2958 return(name);
2959 }
2960 for (i = 0;;i++) {
2961 if (xmlW3CPIs[i] == NULL) break;
2962 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2963 return(name);
2964 }
2965 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2966 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2967 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002968 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002969 }
2970 }
2971 return(name);
2972}
2973
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002974#ifdef LIBXML_CATALOG_ENABLED
2975/**
2976 * xmlParseCatalogPI:
2977 * @ctxt: an XML parser context
2978 * @catalog: the PI value string
2979 *
2980 * parse an XML Catalog Processing Instruction.
2981 *
2982 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
2983 *
2984 * Occurs only if allowed by the user and if happening in the Misc
2985 * part of the document before any doctype informations
2986 * This will add the given catalog to the parsing context in order
2987 * to be used if there is a resolution need further down in the document
2988 */
2989
2990static void
2991xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
2992 xmlChar *URL = NULL;
2993 const xmlChar *tmp, *base;
2994 xmlChar marker;
2995
2996 tmp = catalog;
2997 while (IS_BLANK(*tmp)) tmp++;
2998 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
2999 goto error;
3000 tmp += 7;
3001 while (IS_BLANK(*tmp)) tmp++;
3002 if (*tmp != '=') {
3003 return;
3004 }
3005 tmp++;
3006 while (IS_BLANK(*tmp)) tmp++;
3007 marker = *tmp;
3008 if ((marker != '\'') && (marker != '"'))
3009 goto error;
3010 tmp++;
3011 base = tmp;
3012 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3013 if (*tmp == 0)
3014 goto error;
3015 URL = xmlStrndup(base, tmp - base);
3016 tmp++;
3017 while (IS_BLANK(*tmp)) tmp++;
3018 if (*tmp != 0)
3019 goto error;
3020
3021 if (URL != NULL) {
3022 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3023 xmlFree(URL);
3024 }
3025 return;
3026
3027error:
3028 ctxt->errNo = XML_WAR_CATALOG_PI;
3029 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3030 ctxt->sax->warning(ctxt->userData,
3031 "Catalog PI syntax error: %s\n", catalog);
3032 if (URL != NULL)
3033 xmlFree(URL);
3034}
3035#endif
3036
Owen Taylor3473f882001-02-23 17:55:21 +00003037/**
3038 * xmlParsePI:
3039 * @ctxt: an XML parser context
3040 *
3041 * parse an XML Processing Instruction.
3042 *
3043 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3044 *
3045 * The processing is transfered to SAX once parsed.
3046 */
3047
3048void
3049xmlParsePI(xmlParserCtxtPtr ctxt) {
3050 xmlChar *buf = NULL;
3051 int len = 0;
3052 int size = XML_PARSER_BUFFER_SIZE;
3053 int cur, l;
3054 xmlChar *target;
3055 xmlParserInputState state;
3056 int count = 0;
3057
3058 if ((RAW == '<') && (NXT(1) == '?')) {
3059 xmlParserInputPtr input = ctxt->input;
3060 state = ctxt->instate;
3061 ctxt->instate = XML_PARSER_PI;
3062 /*
3063 * this is a Processing Instruction.
3064 */
3065 SKIP(2);
3066 SHRINK;
3067
3068 /*
3069 * Parse the target name and check for special support like
3070 * namespace.
3071 */
3072 target = xmlParsePITarget(ctxt);
3073 if (target != NULL) {
3074 if ((RAW == '?') && (NXT(1) == '>')) {
3075 if (input != ctxt->input) {
3076 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3078 ctxt->sax->error(ctxt->userData,
3079 "PI declaration doesn't start and stop in the same entity\n");
3080 ctxt->wellFormed = 0;
3081 ctxt->disableSAX = 1;
3082 }
3083 SKIP(2);
3084
3085 /*
3086 * SAX: PI detected.
3087 */
3088 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3089 (ctxt->sax->processingInstruction != NULL))
3090 ctxt->sax->processingInstruction(ctxt->userData,
3091 target, NULL);
3092 ctxt->instate = state;
3093 xmlFree(target);
3094 return;
3095 }
3096 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3097 if (buf == NULL) {
3098 xmlGenericError(xmlGenericErrorContext,
3099 "malloc of %d byte failed\n", size);
3100 ctxt->instate = state;
3101 return;
3102 }
3103 cur = CUR;
3104 if (!IS_BLANK(cur)) {
3105 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3106 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3107 ctxt->sax->error(ctxt->userData,
3108 "xmlParsePI: PI %s space expected\n", target);
3109 ctxt->wellFormed = 0;
3110 ctxt->disableSAX = 1;
3111 }
3112 SKIP_BLANKS;
3113 cur = CUR_CHAR(l);
3114 while (IS_CHAR(cur) && /* checked */
3115 ((cur != '?') || (NXT(1) != '>'))) {
3116 if (len + 5 >= size) {
3117 size *= 2;
3118 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3119 if (buf == NULL) {
3120 xmlGenericError(xmlGenericErrorContext,
3121 "realloc of %d byte failed\n", size);
3122 ctxt->instate = state;
3123 return;
3124 }
3125 }
3126 count++;
3127 if (count > 50) {
3128 GROW;
3129 count = 0;
3130 }
3131 COPY_BUF(l,buf,len,cur);
3132 NEXTL(l);
3133 cur = CUR_CHAR(l);
3134 if (cur == 0) {
3135 SHRINK;
3136 GROW;
3137 cur = CUR_CHAR(l);
3138 }
3139 }
3140 buf[len] = 0;
3141 if (cur != '?') {
3142 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3144 ctxt->sax->error(ctxt->userData,
3145 "xmlParsePI: PI %s never end ...\n", target);
3146 ctxt->wellFormed = 0;
3147 ctxt->disableSAX = 1;
3148 } else {
3149 if (input != ctxt->input) {
3150 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3152 ctxt->sax->error(ctxt->userData,
3153 "PI declaration doesn't start and stop in the same entity\n");
3154 ctxt->wellFormed = 0;
3155 ctxt->disableSAX = 1;
3156 }
3157 SKIP(2);
3158
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003159#ifdef LIBXML_CATALOG_ENABLED
3160 if (((state == XML_PARSER_MISC) ||
3161 (state == XML_PARSER_START)) &&
3162 (xmlStrEqual(target, XML_CATALOG_PI))) {
3163 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3164 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3165 (allow == XML_CATA_ALLOW_ALL))
3166 xmlParseCatalogPI(ctxt, buf);
3167 }
3168#endif
3169
3170
Owen Taylor3473f882001-02-23 17:55:21 +00003171 /*
3172 * SAX: PI detected.
3173 */
3174 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3175 (ctxt->sax->processingInstruction != NULL))
3176 ctxt->sax->processingInstruction(ctxt->userData,
3177 target, buf);
3178 }
3179 xmlFree(buf);
3180 xmlFree(target);
3181 } else {
3182 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3184 ctxt->sax->error(ctxt->userData,
3185 "xmlParsePI : no target name\n");
3186 ctxt->wellFormed = 0;
3187 ctxt->disableSAX = 1;
3188 }
3189 ctxt->instate = state;
3190 }
3191}
3192
3193/**
3194 * xmlParseNotationDecl:
3195 * @ctxt: an XML parser context
3196 *
3197 * parse a notation declaration
3198 *
3199 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3200 *
3201 * Hence there is actually 3 choices:
3202 * 'PUBLIC' S PubidLiteral
3203 * 'PUBLIC' S PubidLiteral S SystemLiteral
3204 * and 'SYSTEM' S SystemLiteral
3205 *
3206 * See the NOTE on xmlParseExternalID().
3207 */
3208
3209void
3210xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3211 xmlChar *name;
3212 xmlChar *Pubid;
3213 xmlChar *Systemid;
3214
3215 if ((RAW == '<') && (NXT(1) == '!') &&
3216 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3217 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3218 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3219 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3220 xmlParserInputPtr input = ctxt->input;
3221 SHRINK;
3222 SKIP(10);
3223 if (!IS_BLANK(CUR)) {
3224 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3226 ctxt->sax->error(ctxt->userData,
3227 "Space required after '<!NOTATION'\n");
3228 ctxt->wellFormed = 0;
3229 ctxt->disableSAX = 1;
3230 return;
3231 }
3232 SKIP_BLANKS;
3233
Daniel Veillard76d66f42001-05-16 21:05:17 +00003234 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003235 if (name == NULL) {
3236 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3238 ctxt->sax->error(ctxt->userData,
3239 "NOTATION: Name expected here\n");
3240 ctxt->wellFormed = 0;
3241 ctxt->disableSAX = 1;
3242 return;
3243 }
3244 if (!IS_BLANK(CUR)) {
3245 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3247 ctxt->sax->error(ctxt->userData,
3248 "Space required after the NOTATION name'\n");
3249 ctxt->wellFormed = 0;
3250 ctxt->disableSAX = 1;
3251 return;
3252 }
3253 SKIP_BLANKS;
3254
3255 /*
3256 * Parse the IDs.
3257 */
3258 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3259 SKIP_BLANKS;
3260
3261 if (RAW == '>') {
3262 if (input != ctxt->input) {
3263 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3265 ctxt->sax->error(ctxt->userData,
3266"Notation declaration doesn't start and stop in the same entity\n");
3267 ctxt->wellFormed = 0;
3268 ctxt->disableSAX = 1;
3269 }
3270 NEXT;
3271 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3272 (ctxt->sax->notationDecl != NULL))
3273 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3274 } else {
3275 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3277 ctxt->sax->error(ctxt->userData,
3278 "'>' required to close NOTATION declaration\n");
3279 ctxt->wellFormed = 0;
3280 ctxt->disableSAX = 1;
3281 }
3282 xmlFree(name);
3283 if (Systemid != NULL) xmlFree(Systemid);
3284 if (Pubid != NULL) xmlFree(Pubid);
3285 }
3286}
3287
3288/**
3289 * xmlParseEntityDecl:
3290 * @ctxt: an XML parser context
3291 *
3292 * parse <!ENTITY declarations
3293 *
3294 * [70] EntityDecl ::= GEDecl | PEDecl
3295 *
3296 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3297 *
3298 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3299 *
3300 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3301 *
3302 * [74] PEDef ::= EntityValue | ExternalID
3303 *
3304 * [76] NDataDecl ::= S 'NDATA' S Name
3305 *
3306 * [ VC: Notation Declared ]
3307 * The Name must match the declared name of a notation.
3308 */
3309
3310void
3311xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3312 xmlChar *name = NULL;
3313 xmlChar *value = NULL;
3314 xmlChar *URI = NULL, *literal = NULL;
3315 xmlChar *ndata = NULL;
3316 int isParameter = 0;
3317 xmlChar *orig = NULL;
3318
3319 GROW;
3320 if ((RAW == '<') && (NXT(1) == '!') &&
3321 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3322 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3323 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3324 xmlParserInputPtr input = ctxt->input;
3325 ctxt->instate = XML_PARSER_ENTITY_DECL;
3326 SHRINK;
3327 SKIP(8);
3328 if (!IS_BLANK(CUR)) {
3329 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3331 ctxt->sax->error(ctxt->userData,
3332 "Space required after '<!ENTITY'\n");
3333 ctxt->wellFormed = 0;
3334 ctxt->disableSAX = 1;
3335 }
3336 SKIP_BLANKS;
3337
3338 if (RAW == '%') {
3339 NEXT;
3340 if (!IS_BLANK(CUR)) {
3341 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3343 ctxt->sax->error(ctxt->userData,
3344 "Space required after '%'\n");
3345 ctxt->wellFormed = 0;
3346 ctxt->disableSAX = 1;
3347 }
3348 SKIP_BLANKS;
3349 isParameter = 1;
3350 }
3351
Daniel Veillard76d66f42001-05-16 21:05:17 +00003352 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003353 if (name == NULL) {
3354 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3356 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3357 ctxt->wellFormed = 0;
3358 ctxt->disableSAX = 1;
3359 return;
3360 }
3361 if (!IS_BLANK(CUR)) {
3362 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3364 ctxt->sax->error(ctxt->userData,
3365 "Space required after the entity name\n");
3366 ctxt->wellFormed = 0;
3367 ctxt->disableSAX = 1;
3368 }
3369 SKIP_BLANKS;
3370
3371 /*
3372 * handle the various case of definitions...
3373 */
3374 if (isParameter) {
3375 if ((RAW == '"') || (RAW == '\'')) {
3376 value = xmlParseEntityValue(ctxt, &orig);
3377 if (value) {
3378 if ((ctxt->sax != NULL) &&
3379 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3380 ctxt->sax->entityDecl(ctxt->userData, name,
3381 XML_INTERNAL_PARAMETER_ENTITY,
3382 NULL, NULL, value);
3383 }
3384 } else {
3385 URI = xmlParseExternalID(ctxt, &literal, 1);
3386 if ((URI == NULL) && (literal == NULL)) {
3387 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3389 ctxt->sax->error(ctxt->userData,
3390 "Entity value required\n");
3391 ctxt->wellFormed = 0;
3392 ctxt->disableSAX = 1;
3393 }
3394 if (URI) {
3395 xmlURIPtr uri;
3396
3397 uri = xmlParseURI((const char *) URI);
3398 if (uri == NULL) {
3399 ctxt->errNo = XML_ERR_INVALID_URI;
3400 if ((ctxt->sax != NULL) &&
3401 (!ctxt->disableSAX) &&
3402 (ctxt->sax->error != NULL))
3403 ctxt->sax->error(ctxt->userData,
3404 "Invalid URI: %s\n", URI);
3405 ctxt->wellFormed = 0;
3406 } else {
3407 if (uri->fragment != NULL) {
3408 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3409 if ((ctxt->sax != NULL) &&
3410 (!ctxt->disableSAX) &&
3411 (ctxt->sax->error != NULL))
3412 ctxt->sax->error(ctxt->userData,
3413 "Fragment not allowed: %s\n", URI);
3414 ctxt->wellFormed = 0;
3415 } else {
3416 if ((ctxt->sax != NULL) &&
3417 (!ctxt->disableSAX) &&
3418 (ctxt->sax->entityDecl != NULL))
3419 ctxt->sax->entityDecl(ctxt->userData, name,
3420 XML_EXTERNAL_PARAMETER_ENTITY,
3421 literal, URI, NULL);
3422 }
3423 xmlFreeURI(uri);
3424 }
3425 }
3426 }
3427 } else {
3428 if ((RAW == '"') || (RAW == '\'')) {
3429 value = xmlParseEntityValue(ctxt, &orig);
3430 if ((ctxt->sax != NULL) &&
3431 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3432 ctxt->sax->entityDecl(ctxt->userData, name,
3433 XML_INTERNAL_GENERAL_ENTITY,
3434 NULL, NULL, value);
3435 } else {
3436 URI = xmlParseExternalID(ctxt, &literal, 1);
3437 if ((URI == NULL) && (literal == NULL)) {
3438 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3440 ctxt->sax->error(ctxt->userData,
3441 "Entity value required\n");
3442 ctxt->wellFormed = 0;
3443 ctxt->disableSAX = 1;
3444 }
3445 if (URI) {
3446 xmlURIPtr uri;
3447
3448 uri = xmlParseURI((const char *)URI);
3449 if (uri == NULL) {
3450 ctxt->errNo = XML_ERR_INVALID_URI;
3451 if ((ctxt->sax != NULL) &&
3452 (!ctxt->disableSAX) &&
3453 (ctxt->sax->error != NULL))
3454 ctxt->sax->error(ctxt->userData,
3455 "Invalid URI: %s\n", URI);
3456 ctxt->wellFormed = 0;
3457 } else {
3458 if (uri->fragment != NULL) {
3459 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3460 if ((ctxt->sax != NULL) &&
3461 (!ctxt->disableSAX) &&
3462 (ctxt->sax->error != NULL))
3463 ctxt->sax->error(ctxt->userData,
3464 "Fragment not allowed: %s\n", URI);
3465 ctxt->wellFormed = 0;
3466 }
3467 xmlFreeURI(uri);
3468 }
3469 }
3470 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3471 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3473 ctxt->sax->error(ctxt->userData,
3474 "Space required before 'NDATA'\n");
3475 ctxt->wellFormed = 0;
3476 ctxt->disableSAX = 1;
3477 }
3478 SKIP_BLANKS;
3479 if ((RAW == 'N') && (NXT(1) == 'D') &&
3480 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3481 (NXT(4) == 'A')) {
3482 SKIP(5);
3483 if (!IS_BLANK(CUR)) {
3484 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3486 ctxt->sax->error(ctxt->userData,
3487 "Space required after 'NDATA'\n");
3488 ctxt->wellFormed = 0;
3489 ctxt->disableSAX = 1;
3490 }
3491 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003492 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003493 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3494 (ctxt->sax->unparsedEntityDecl != NULL))
3495 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3496 literal, URI, ndata);
3497 } else {
3498 if ((ctxt->sax != NULL) &&
3499 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3500 ctxt->sax->entityDecl(ctxt->userData, name,
3501 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3502 literal, URI, NULL);
3503 }
3504 }
3505 }
3506 SKIP_BLANKS;
3507 if (RAW != '>') {
3508 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3510 ctxt->sax->error(ctxt->userData,
3511 "xmlParseEntityDecl: entity %s not terminated\n", name);
3512 ctxt->wellFormed = 0;
3513 ctxt->disableSAX = 1;
3514 } else {
3515 if (input != ctxt->input) {
3516 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3518 ctxt->sax->error(ctxt->userData,
3519"Entity declaration doesn't start and stop in the same entity\n");
3520 ctxt->wellFormed = 0;
3521 ctxt->disableSAX = 1;
3522 }
3523 NEXT;
3524 }
3525 if (orig != NULL) {
3526 /*
3527 * Ugly mechanism to save the raw entity value.
3528 */
3529 xmlEntityPtr cur = NULL;
3530
3531 if (isParameter) {
3532 if ((ctxt->sax != NULL) &&
3533 (ctxt->sax->getParameterEntity != NULL))
3534 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3535 } else {
3536 if ((ctxt->sax != NULL) &&
3537 (ctxt->sax->getEntity != NULL))
3538 cur = ctxt->sax->getEntity(ctxt->userData, name);
3539 }
3540 if (cur != NULL) {
3541 if (cur->orig != NULL)
3542 xmlFree(orig);
3543 else
3544 cur->orig = orig;
3545 } else
3546 xmlFree(orig);
3547 }
3548 if (name != NULL) xmlFree(name);
3549 if (value != NULL) xmlFree(value);
3550 if (URI != NULL) xmlFree(URI);
3551 if (literal != NULL) xmlFree(literal);
3552 if (ndata != NULL) xmlFree(ndata);
3553 }
3554}
3555
3556/**
3557 * xmlParseDefaultDecl:
3558 * @ctxt: an XML parser context
3559 * @value: Receive a possible fixed default value for the attribute
3560 *
3561 * Parse an attribute default declaration
3562 *
3563 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3564 *
3565 * [ VC: Required Attribute ]
3566 * if the default declaration is the keyword #REQUIRED, then the
3567 * attribute must be specified for all elements of the type in the
3568 * attribute-list declaration.
3569 *
3570 * [ VC: Attribute Default Legal ]
3571 * The declared default value must meet the lexical constraints of
3572 * the declared attribute type c.f. xmlValidateAttributeDecl()
3573 *
3574 * [ VC: Fixed Attribute Default ]
3575 * if an attribute has a default value declared with the #FIXED
3576 * keyword, instances of that attribute must match the default value.
3577 *
3578 * [ WFC: No < in Attribute Values ]
3579 * handled in xmlParseAttValue()
3580 *
3581 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3582 * or XML_ATTRIBUTE_FIXED.
3583 */
3584
3585int
3586xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3587 int val;
3588 xmlChar *ret;
3589
3590 *value = NULL;
3591 if ((RAW == '#') && (NXT(1) == 'R') &&
3592 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3593 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3594 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3595 (NXT(8) == 'D')) {
3596 SKIP(9);
3597 return(XML_ATTRIBUTE_REQUIRED);
3598 }
3599 if ((RAW == '#') && (NXT(1) == 'I') &&
3600 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3601 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3602 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3603 SKIP(8);
3604 return(XML_ATTRIBUTE_IMPLIED);
3605 }
3606 val = XML_ATTRIBUTE_NONE;
3607 if ((RAW == '#') && (NXT(1) == 'F') &&
3608 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3609 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3610 SKIP(6);
3611 val = XML_ATTRIBUTE_FIXED;
3612 if (!IS_BLANK(CUR)) {
3613 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3615 ctxt->sax->error(ctxt->userData,
3616 "Space required after '#FIXED'\n");
3617 ctxt->wellFormed = 0;
3618 ctxt->disableSAX = 1;
3619 }
3620 SKIP_BLANKS;
3621 }
3622 ret = xmlParseAttValue(ctxt);
3623 ctxt->instate = XML_PARSER_DTD;
3624 if (ret == NULL) {
3625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3626 ctxt->sax->error(ctxt->userData,
3627 "Attribute default value declaration error\n");
3628 ctxt->wellFormed = 0;
3629 ctxt->disableSAX = 1;
3630 } else
3631 *value = ret;
3632 return(val);
3633}
3634
3635/**
3636 * xmlParseNotationType:
3637 * @ctxt: an XML parser context
3638 *
3639 * parse an Notation attribute type.
3640 *
3641 * Note: the leading 'NOTATION' S part has already being parsed...
3642 *
3643 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3644 *
3645 * [ VC: Notation Attributes ]
3646 * Values of this type must match one of the notation names included
3647 * in the declaration; all notation names in the declaration must be declared.
3648 *
3649 * Returns: the notation attribute tree built while parsing
3650 */
3651
3652xmlEnumerationPtr
3653xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3654 xmlChar *name;
3655 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3656
3657 if (RAW != '(') {
3658 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3660 ctxt->sax->error(ctxt->userData,
3661 "'(' required to start 'NOTATION'\n");
3662 ctxt->wellFormed = 0;
3663 ctxt->disableSAX = 1;
3664 return(NULL);
3665 }
3666 SHRINK;
3667 do {
3668 NEXT;
3669 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003670 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003671 if (name == NULL) {
3672 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3674 ctxt->sax->error(ctxt->userData,
3675 "Name expected in NOTATION declaration\n");
3676 ctxt->wellFormed = 0;
3677 ctxt->disableSAX = 1;
3678 return(ret);
3679 }
3680 cur = xmlCreateEnumeration(name);
3681 xmlFree(name);
3682 if (cur == NULL) return(ret);
3683 if (last == NULL) ret = last = cur;
3684 else {
3685 last->next = cur;
3686 last = cur;
3687 }
3688 SKIP_BLANKS;
3689 } while (RAW == '|');
3690 if (RAW != ')') {
3691 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3693 ctxt->sax->error(ctxt->userData,
3694 "')' required to finish NOTATION declaration\n");
3695 ctxt->wellFormed = 0;
3696 ctxt->disableSAX = 1;
3697 if ((last != NULL) && (last != ret))
3698 xmlFreeEnumeration(last);
3699 return(ret);
3700 }
3701 NEXT;
3702 return(ret);
3703}
3704
3705/**
3706 * xmlParseEnumerationType:
3707 * @ctxt: an XML parser context
3708 *
3709 * parse an Enumeration attribute type.
3710 *
3711 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3712 *
3713 * [ VC: Enumeration ]
3714 * Values of this type must match one of the Nmtoken tokens in
3715 * the declaration
3716 *
3717 * Returns: the enumeration attribute tree built while parsing
3718 */
3719
3720xmlEnumerationPtr
3721xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3722 xmlChar *name;
3723 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3724
3725 if (RAW != '(') {
3726 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3728 ctxt->sax->error(ctxt->userData,
3729 "'(' required to start ATTLIST enumeration\n");
3730 ctxt->wellFormed = 0;
3731 ctxt->disableSAX = 1;
3732 return(NULL);
3733 }
3734 SHRINK;
3735 do {
3736 NEXT;
3737 SKIP_BLANKS;
3738 name = xmlParseNmtoken(ctxt);
3739 if (name == NULL) {
3740 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3742 ctxt->sax->error(ctxt->userData,
3743 "NmToken expected in ATTLIST enumeration\n");
3744 ctxt->wellFormed = 0;
3745 ctxt->disableSAX = 1;
3746 return(ret);
3747 }
3748 cur = xmlCreateEnumeration(name);
3749 xmlFree(name);
3750 if (cur == NULL) return(ret);
3751 if (last == NULL) ret = last = cur;
3752 else {
3753 last->next = cur;
3754 last = cur;
3755 }
3756 SKIP_BLANKS;
3757 } while (RAW == '|');
3758 if (RAW != ')') {
3759 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3761 ctxt->sax->error(ctxt->userData,
3762 "')' required to finish ATTLIST enumeration\n");
3763 ctxt->wellFormed = 0;
3764 ctxt->disableSAX = 1;
3765 return(ret);
3766 }
3767 NEXT;
3768 return(ret);
3769}
3770
3771/**
3772 * xmlParseEnumeratedType:
3773 * @ctxt: an XML parser context
3774 * @tree: the enumeration tree built while parsing
3775 *
3776 * parse an Enumerated attribute type.
3777 *
3778 * [57] EnumeratedType ::= NotationType | Enumeration
3779 *
3780 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3781 *
3782 *
3783 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3784 */
3785
3786int
3787xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3788 if ((RAW == 'N') && (NXT(1) == 'O') &&
3789 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3790 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3791 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3792 SKIP(8);
3793 if (!IS_BLANK(CUR)) {
3794 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3796 ctxt->sax->error(ctxt->userData,
3797 "Space required after 'NOTATION'\n");
3798 ctxt->wellFormed = 0;
3799 ctxt->disableSAX = 1;
3800 return(0);
3801 }
3802 SKIP_BLANKS;
3803 *tree = xmlParseNotationType(ctxt);
3804 if (*tree == NULL) return(0);
3805 return(XML_ATTRIBUTE_NOTATION);
3806 }
3807 *tree = xmlParseEnumerationType(ctxt);
3808 if (*tree == NULL) return(0);
3809 return(XML_ATTRIBUTE_ENUMERATION);
3810}
3811
3812/**
3813 * xmlParseAttributeType:
3814 * @ctxt: an XML parser context
3815 * @tree: the enumeration tree built while parsing
3816 *
3817 * parse the Attribute list def for an element
3818 *
3819 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3820 *
3821 * [55] StringType ::= 'CDATA'
3822 *
3823 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3824 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3825 *
3826 * Validity constraints for attribute values syntax are checked in
3827 * xmlValidateAttributeValue()
3828 *
3829 * [ VC: ID ]
3830 * Values of type ID must match the Name production. A name must not
3831 * appear more than once in an XML document as a value of this type;
3832 * i.e., ID values must uniquely identify the elements which bear them.
3833 *
3834 * [ VC: One ID per Element Type ]
3835 * No element type may have more than one ID attribute specified.
3836 *
3837 * [ VC: ID Attribute Default ]
3838 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3839 *
3840 * [ VC: IDREF ]
3841 * Values of type IDREF must match the Name production, and values
3842 * of type IDREFS must match Names; each IDREF Name must match the value
3843 * of an ID attribute on some element in the XML document; i.e. IDREF
3844 * values must match the value of some ID attribute.
3845 *
3846 * [ VC: Entity Name ]
3847 * Values of type ENTITY must match the Name production, values
3848 * of type ENTITIES must match Names; each Entity Name must match the
3849 * name of an unparsed entity declared in the DTD.
3850 *
3851 * [ VC: Name Token ]
3852 * Values of type NMTOKEN must match the Nmtoken production; values
3853 * of type NMTOKENS must match Nmtokens.
3854 *
3855 * Returns the attribute type
3856 */
3857int
3858xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3859 SHRINK;
3860 if ((RAW == 'C') && (NXT(1) == 'D') &&
3861 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3862 (NXT(4) == 'A')) {
3863 SKIP(5);
3864 return(XML_ATTRIBUTE_CDATA);
3865 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3866 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3867 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3868 SKIP(6);
3869 return(XML_ATTRIBUTE_IDREFS);
3870 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3871 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3872 (NXT(4) == 'F')) {
3873 SKIP(5);
3874 return(XML_ATTRIBUTE_IDREF);
3875 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3876 SKIP(2);
3877 return(XML_ATTRIBUTE_ID);
3878 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3879 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3880 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3881 SKIP(6);
3882 return(XML_ATTRIBUTE_ENTITY);
3883 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3884 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3885 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3886 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3887 SKIP(8);
3888 return(XML_ATTRIBUTE_ENTITIES);
3889 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3890 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3891 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3892 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3893 SKIP(8);
3894 return(XML_ATTRIBUTE_NMTOKENS);
3895 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3896 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3897 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3898 (NXT(6) == 'N')) {
3899 SKIP(7);
3900 return(XML_ATTRIBUTE_NMTOKEN);
3901 }
3902 return(xmlParseEnumeratedType(ctxt, tree));
3903}
3904
3905/**
3906 * xmlParseAttributeListDecl:
3907 * @ctxt: an XML parser context
3908 *
3909 * : parse the Attribute list def for an element
3910 *
3911 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3912 *
3913 * [53] AttDef ::= S Name S AttType S DefaultDecl
3914 *
3915 */
3916void
3917xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3918 xmlChar *elemName;
3919 xmlChar *attrName;
3920 xmlEnumerationPtr tree;
3921
3922 if ((RAW == '<') && (NXT(1) == '!') &&
3923 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3924 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3925 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3926 (NXT(8) == 'T')) {
3927 xmlParserInputPtr input = ctxt->input;
3928
3929 SKIP(9);
3930 if (!IS_BLANK(CUR)) {
3931 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3933 ctxt->sax->error(ctxt->userData,
3934 "Space required after '<!ATTLIST'\n");
3935 ctxt->wellFormed = 0;
3936 ctxt->disableSAX = 1;
3937 }
3938 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003939 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003940 if (elemName == NULL) {
3941 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3943 ctxt->sax->error(ctxt->userData,
3944 "ATTLIST: no name for Element\n");
3945 ctxt->wellFormed = 0;
3946 ctxt->disableSAX = 1;
3947 return;
3948 }
3949 SKIP_BLANKS;
3950 GROW;
3951 while (RAW != '>') {
3952 const xmlChar *check = CUR_PTR;
3953 int type;
3954 int def;
3955 xmlChar *defaultValue = NULL;
3956
3957 GROW;
3958 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003959 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003960 if (attrName == NULL) {
3961 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3963 ctxt->sax->error(ctxt->userData,
3964 "ATTLIST: no name for Attribute\n");
3965 ctxt->wellFormed = 0;
3966 ctxt->disableSAX = 1;
3967 break;
3968 }
3969 GROW;
3970 if (!IS_BLANK(CUR)) {
3971 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3973 ctxt->sax->error(ctxt->userData,
3974 "Space required after the attribute name\n");
3975 ctxt->wellFormed = 0;
3976 ctxt->disableSAX = 1;
3977 if (attrName != NULL)
3978 xmlFree(attrName);
3979 if (defaultValue != NULL)
3980 xmlFree(defaultValue);
3981 break;
3982 }
3983 SKIP_BLANKS;
3984
3985 type = xmlParseAttributeType(ctxt, &tree);
3986 if (type <= 0) {
3987 if (attrName != NULL)
3988 xmlFree(attrName);
3989 if (defaultValue != NULL)
3990 xmlFree(defaultValue);
3991 break;
3992 }
3993
3994 GROW;
3995 if (!IS_BLANK(CUR)) {
3996 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3998 ctxt->sax->error(ctxt->userData,
3999 "Space required after the attribute type\n");
4000 ctxt->wellFormed = 0;
4001 ctxt->disableSAX = 1;
4002 if (attrName != NULL)
4003 xmlFree(attrName);
4004 if (defaultValue != NULL)
4005 xmlFree(defaultValue);
4006 if (tree != NULL)
4007 xmlFreeEnumeration(tree);
4008 break;
4009 }
4010 SKIP_BLANKS;
4011
4012 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4013 if (def <= 0) {
4014 if (attrName != NULL)
4015 xmlFree(attrName);
4016 if (defaultValue != NULL)
4017 xmlFree(defaultValue);
4018 if (tree != NULL)
4019 xmlFreeEnumeration(tree);
4020 break;
4021 }
4022
4023 GROW;
4024 if (RAW != '>') {
4025 if (!IS_BLANK(CUR)) {
4026 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4028 ctxt->sax->error(ctxt->userData,
4029 "Space required after the attribute default value\n");
4030 ctxt->wellFormed = 0;
4031 ctxt->disableSAX = 1;
4032 if (attrName != NULL)
4033 xmlFree(attrName);
4034 if (defaultValue != NULL)
4035 xmlFree(defaultValue);
4036 if (tree != NULL)
4037 xmlFreeEnumeration(tree);
4038 break;
4039 }
4040 SKIP_BLANKS;
4041 }
4042 if (check == CUR_PTR) {
4043 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4045 ctxt->sax->error(ctxt->userData,
4046 "xmlParseAttributeListDecl: detected internal error\n");
4047 if (attrName != NULL)
4048 xmlFree(attrName);
4049 if (defaultValue != NULL)
4050 xmlFree(defaultValue);
4051 if (tree != NULL)
4052 xmlFreeEnumeration(tree);
4053 break;
4054 }
4055 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4056 (ctxt->sax->attributeDecl != NULL))
4057 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4058 type, def, defaultValue, tree);
4059 if (attrName != NULL)
4060 xmlFree(attrName);
4061 if (defaultValue != NULL)
4062 xmlFree(defaultValue);
4063 GROW;
4064 }
4065 if (RAW == '>') {
4066 if (input != ctxt->input) {
4067 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4069 ctxt->sax->error(ctxt->userData,
4070"Attribute list declaration doesn't start and stop in the same entity\n");
4071 ctxt->wellFormed = 0;
4072 ctxt->disableSAX = 1;
4073 }
4074 NEXT;
4075 }
4076
4077 xmlFree(elemName);
4078 }
4079}
4080
4081/**
4082 * xmlParseElementMixedContentDecl:
4083 * @ctxt: an XML parser context
4084 *
4085 * parse the declaration for a Mixed Element content
4086 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4087 *
4088 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4089 * '(' S? '#PCDATA' S? ')'
4090 *
4091 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4092 *
4093 * [ VC: No Duplicate Types ]
4094 * The same name must not appear more than once in a single
4095 * mixed-content declaration.
4096 *
4097 * returns: the list of the xmlElementContentPtr describing the element choices
4098 */
4099xmlElementContentPtr
4100xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
4101 xmlElementContentPtr ret = NULL, cur = NULL, n;
4102 xmlChar *elem = NULL;
4103
4104 GROW;
4105 if ((RAW == '#') && (NXT(1) == 'P') &&
4106 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4107 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4108 (NXT(6) == 'A')) {
4109 SKIP(7);
4110 SKIP_BLANKS;
4111 SHRINK;
4112 if (RAW == ')') {
4113 ctxt->entity = ctxt->input;
4114 NEXT;
4115 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4116 if (RAW == '*') {
4117 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4118 NEXT;
4119 }
4120 return(ret);
4121 }
4122 if ((RAW == '(') || (RAW == '|')) {
4123 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4124 if (ret == NULL) return(NULL);
4125 }
4126 while (RAW == '|') {
4127 NEXT;
4128 if (elem == NULL) {
4129 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4130 if (ret == NULL) return(NULL);
4131 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004132 if (cur != NULL)
4133 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004134 cur = ret;
4135 } else {
4136 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4137 if (n == NULL) return(NULL);
4138 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004139 if (n->c1 != NULL)
4140 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004141 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004142 if (n != NULL)
4143 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004144 cur = n;
4145 xmlFree(elem);
4146 }
4147 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004148 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004149 if (elem == NULL) {
4150 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4152 ctxt->sax->error(ctxt->userData,
4153 "xmlParseElementMixedContentDecl : Name expected\n");
4154 ctxt->wellFormed = 0;
4155 ctxt->disableSAX = 1;
4156 xmlFreeElementContent(cur);
4157 return(NULL);
4158 }
4159 SKIP_BLANKS;
4160 GROW;
4161 }
4162 if ((RAW == ')') && (NXT(1) == '*')) {
4163 if (elem != NULL) {
4164 cur->c2 = xmlNewElementContent(elem,
4165 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004166 if (cur->c2 != NULL)
4167 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004168 xmlFree(elem);
4169 }
4170 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4171 ctxt->entity = ctxt->input;
4172 SKIP(2);
4173 } else {
4174 if (elem != NULL) xmlFree(elem);
4175 xmlFreeElementContent(ret);
4176 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4178 ctxt->sax->error(ctxt->userData,
4179 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4180 ctxt->wellFormed = 0;
4181 ctxt->disableSAX = 1;
4182 return(NULL);
4183 }
4184
4185 } else {
4186 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4188 ctxt->sax->error(ctxt->userData,
4189 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4190 ctxt->wellFormed = 0;
4191 ctxt->disableSAX = 1;
4192 }
4193 return(ret);
4194}
4195
4196/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004197 * xmlParseElementChildrenContentD:
4198 * @ctxt: an XML parser context
4199 *
4200 * VMS version of xmlParseElementChildrenContentDecl()
4201 *
4202 * Returns the tree of xmlElementContentPtr describing the element
4203 * hierarchy.
4204 */
4205/**
Owen Taylor3473f882001-02-23 17:55:21 +00004206 * xmlParseElementChildrenContentDecl:
4207 * @ctxt: an XML parser context
4208 *
4209 * parse the declaration for a Mixed Element content
4210 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4211 *
4212 *
4213 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4214 *
4215 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4216 *
4217 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4218 *
4219 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4220 *
4221 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4222 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004223 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004224 * opening or closing parentheses in a choice, seq, or Mixed
4225 * construct is contained in the replacement text for a parameter
4226 * entity, both must be contained in the same replacement text. For
4227 * interoperability, if a parameter-entity reference appears in a
4228 * choice, seq, or Mixed construct, its replacement text should not
4229 * be empty, and neither the first nor last non-blank character of
4230 * the replacement text should be a connector (| or ,).
4231 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004232 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004233 * hierarchy.
4234 */
4235xmlElementContentPtr
4236#ifdef VMS
4237xmlParseElementChildrenContentD
4238#else
4239xmlParseElementChildrenContentDecl
4240#endif
4241(xmlParserCtxtPtr ctxt) {
4242 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4243 xmlChar *elem;
4244 xmlChar type = 0;
4245
4246 SKIP_BLANKS;
4247 GROW;
4248 if (RAW == '(') {
4249 /* Recurse on first child */
4250 NEXT;
4251 SKIP_BLANKS;
4252 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4253 SKIP_BLANKS;
4254 GROW;
4255 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004256 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004257 if (elem == NULL) {
4258 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4259 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4260 ctxt->sax->error(ctxt->userData,
4261 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4262 ctxt->wellFormed = 0;
4263 ctxt->disableSAX = 1;
4264 return(NULL);
4265 }
4266 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4267 GROW;
4268 if (RAW == '?') {
4269 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4270 NEXT;
4271 } else if (RAW == '*') {
4272 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4273 NEXT;
4274 } else if (RAW == '+') {
4275 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4276 NEXT;
4277 } else {
4278 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4279 }
4280 xmlFree(elem);
4281 GROW;
4282 }
4283 SKIP_BLANKS;
4284 SHRINK;
4285 while (RAW != ')') {
4286 /*
4287 * Each loop we parse one separator and one element.
4288 */
4289 if (RAW == ',') {
4290 if (type == 0) type = CUR;
4291
4292 /*
4293 * Detect "Name | Name , Name" error
4294 */
4295 else if (type != CUR) {
4296 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4298 ctxt->sax->error(ctxt->userData,
4299 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4300 type);
4301 ctxt->wellFormed = 0;
4302 ctxt->disableSAX = 1;
4303 if ((op != NULL) && (op != ret))
4304 xmlFreeElementContent(op);
4305 if ((last != NULL) && (last != ret) &&
4306 (last != ret->c1) && (last != ret->c2))
4307 xmlFreeElementContent(last);
4308 if (ret != NULL)
4309 xmlFreeElementContent(ret);
4310 return(NULL);
4311 }
4312 NEXT;
4313
4314 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4315 if (op == NULL) {
4316 xmlFreeElementContent(ret);
4317 return(NULL);
4318 }
4319 if (last == NULL) {
4320 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004321 if (ret != NULL)
4322 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004323 ret = cur = op;
4324 } else {
4325 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004326 if (op != NULL)
4327 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004328 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004329 if (last != NULL)
4330 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004331 cur =op;
4332 last = NULL;
4333 }
4334 } else if (RAW == '|') {
4335 if (type == 0) type = CUR;
4336
4337 /*
4338 * Detect "Name , Name | Name" error
4339 */
4340 else if (type != CUR) {
4341 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4343 ctxt->sax->error(ctxt->userData,
4344 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4345 type);
4346 ctxt->wellFormed = 0;
4347 ctxt->disableSAX = 1;
4348 if ((op != NULL) && (op != ret) && (op != last))
4349 xmlFreeElementContent(op);
4350 if ((last != NULL) && (last != ret) &&
4351 (last != ret->c1) && (last != ret->c2))
4352 xmlFreeElementContent(last);
4353 if (ret != NULL)
4354 xmlFreeElementContent(ret);
4355 return(NULL);
4356 }
4357 NEXT;
4358
4359 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4360 if (op == NULL) {
4361 if ((op != NULL) && (op != ret))
4362 xmlFreeElementContent(op);
4363 if ((last != NULL) && (last != ret) &&
4364 (last != ret->c1) && (last != ret->c2))
4365 xmlFreeElementContent(last);
4366 if (ret != NULL)
4367 xmlFreeElementContent(ret);
4368 return(NULL);
4369 }
4370 if (last == NULL) {
4371 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004372 if (ret != NULL)
4373 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004374 ret = cur = op;
4375 } else {
4376 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004377 if (op != NULL)
4378 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004379 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004380 if (last != NULL)
4381 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004382 cur =op;
4383 last = NULL;
4384 }
4385 } else {
4386 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4388 ctxt->sax->error(ctxt->userData,
4389 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4390 ctxt->wellFormed = 0;
4391 ctxt->disableSAX = 1;
4392 if ((op != NULL) && (op != ret))
4393 xmlFreeElementContent(op);
4394 if ((last != NULL) && (last != ret) &&
4395 (last != ret->c1) && (last != ret->c2))
4396 xmlFreeElementContent(last);
4397 if (ret != NULL)
4398 xmlFreeElementContent(ret);
4399 return(NULL);
4400 }
4401 GROW;
4402 SKIP_BLANKS;
4403 GROW;
4404 if (RAW == '(') {
4405 /* Recurse on second child */
4406 NEXT;
4407 SKIP_BLANKS;
4408 last = xmlParseElementChildrenContentDecl(ctxt);
4409 SKIP_BLANKS;
4410 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004411 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004412 if (elem == NULL) {
4413 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4415 ctxt->sax->error(ctxt->userData,
4416 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4417 ctxt->wellFormed = 0;
4418 ctxt->disableSAX = 1;
4419 if ((op != NULL) && (op != ret))
4420 xmlFreeElementContent(op);
4421 if ((last != NULL) && (last != ret) &&
4422 (last != ret->c1) && (last != ret->c2))
4423 xmlFreeElementContent(last);
4424 if (ret != NULL)
4425 xmlFreeElementContent(ret);
4426 return(NULL);
4427 }
4428 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4429 xmlFree(elem);
4430 if (RAW == '?') {
4431 last->ocur = XML_ELEMENT_CONTENT_OPT;
4432 NEXT;
4433 } else if (RAW == '*') {
4434 last->ocur = XML_ELEMENT_CONTENT_MULT;
4435 NEXT;
4436 } else if (RAW == '+') {
4437 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4438 NEXT;
4439 } else {
4440 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4441 }
4442 }
4443 SKIP_BLANKS;
4444 GROW;
4445 }
4446 if ((cur != NULL) && (last != NULL)) {
4447 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004448 if (last != NULL)
4449 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004450 }
4451 ctxt->entity = ctxt->input;
4452 NEXT;
4453 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004454 if (ret != NULL)
4455 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004456 NEXT;
4457 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004458 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004459 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004460 cur = ret;
4461 /*
4462 * Some normalization:
4463 * (a | b* | c?)* == (a | b | c)*
4464 */
4465 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4466 if ((cur->c1 != NULL) &&
4467 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4468 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4469 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4470 if ((cur->c2 != NULL) &&
4471 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4472 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4473 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4474 cur = cur->c2;
4475 }
4476 }
Owen Taylor3473f882001-02-23 17:55:21 +00004477 NEXT;
4478 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004479 if (ret != NULL) {
4480 int found = 0;
4481
Daniel Veillarde470df72001-04-18 21:41:07 +00004482 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004483 /*
4484 * Some normalization:
4485 * (a | b*)+ == (a | b)*
4486 * (a | b?)+ == (a | b)*
4487 */
4488 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4489 if ((cur->c1 != NULL) &&
4490 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4491 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4492 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4493 found = 1;
4494 }
4495 if ((cur->c2 != NULL) &&
4496 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4497 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4498 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4499 found = 1;
4500 }
4501 cur = cur->c2;
4502 }
4503 if (found)
4504 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4505 }
Owen Taylor3473f882001-02-23 17:55:21 +00004506 NEXT;
4507 }
4508 return(ret);
4509}
4510
4511/**
4512 * xmlParseElementContentDecl:
4513 * @ctxt: an XML parser context
4514 * @name: the name of the element being defined.
4515 * @result: the Element Content pointer will be stored here if any
4516 *
4517 * parse the declaration for an Element content either Mixed or Children,
4518 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4519 *
4520 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4521 *
4522 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4523 */
4524
4525int
4526xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4527 xmlElementContentPtr *result) {
4528
4529 xmlElementContentPtr tree = NULL;
4530 xmlParserInputPtr input = ctxt->input;
4531 int res;
4532
4533 *result = NULL;
4534
4535 if (RAW != '(') {
4536 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4538 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004539 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004540 ctxt->wellFormed = 0;
4541 ctxt->disableSAX = 1;
4542 return(-1);
4543 }
4544 NEXT;
4545 GROW;
4546 SKIP_BLANKS;
4547 if ((RAW == '#') && (NXT(1) == 'P') &&
4548 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4549 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4550 (NXT(6) == 'A')) {
4551 tree = xmlParseElementMixedContentDecl(ctxt);
4552 res = XML_ELEMENT_TYPE_MIXED;
4553 } else {
4554 tree = xmlParseElementChildrenContentDecl(ctxt);
4555 res = XML_ELEMENT_TYPE_ELEMENT;
4556 }
4557 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4558 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4560 ctxt->sax->error(ctxt->userData,
4561"Element content declaration doesn't start and stop in the same entity\n");
4562 ctxt->wellFormed = 0;
4563 ctxt->disableSAX = 1;
4564 }
4565 SKIP_BLANKS;
4566 *result = tree;
4567 return(res);
4568}
4569
4570/**
4571 * xmlParseElementDecl:
4572 * @ctxt: an XML parser context
4573 *
4574 * parse an Element declaration.
4575 *
4576 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4577 *
4578 * [ VC: Unique Element Type Declaration ]
4579 * No element type may be declared more than once
4580 *
4581 * Returns the type of the element, or -1 in case of error
4582 */
4583int
4584xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4585 xmlChar *name;
4586 int ret = -1;
4587 xmlElementContentPtr content = NULL;
4588
4589 GROW;
4590 if ((RAW == '<') && (NXT(1) == '!') &&
4591 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4592 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4593 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4594 (NXT(8) == 'T')) {
4595 xmlParserInputPtr input = ctxt->input;
4596
4597 SKIP(9);
4598 if (!IS_BLANK(CUR)) {
4599 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4601 ctxt->sax->error(ctxt->userData,
4602 "Space required after 'ELEMENT'\n");
4603 ctxt->wellFormed = 0;
4604 ctxt->disableSAX = 1;
4605 }
4606 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004607 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004608 if (name == NULL) {
4609 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4611 ctxt->sax->error(ctxt->userData,
4612 "xmlParseElementDecl: no name for Element\n");
4613 ctxt->wellFormed = 0;
4614 ctxt->disableSAX = 1;
4615 return(-1);
4616 }
4617 while ((RAW == 0) && (ctxt->inputNr > 1))
4618 xmlPopInput(ctxt);
4619 if (!IS_BLANK(CUR)) {
4620 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4622 ctxt->sax->error(ctxt->userData,
4623 "Space required after the element name\n");
4624 ctxt->wellFormed = 0;
4625 ctxt->disableSAX = 1;
4626 }
4627 SKIP_BLANKS;
4628 if ((RAW == 'E') && (NXT(1) == 'M') &&
4629 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4630 (NXT(4) == 'Y')) {
4631 SKIP(5);
4632 /*
4633 * Element must always be empty.
4634 */
4635 ret = XML_ELEMENT_TYPE_EMPTY;
4636 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4637 (NXT(2) == 'Y')) {
4638 SKIP(3);
4639 /*
4640 * Element is a generic container.
4641 */
4642 ret = XML_ELEMENT_TYPE_ANY;
4643 } else if (RAW == '(') {
4644 ret = xmlParseElementContentDecl(ctxt, name, &content);
4645 } else {
4646 /*
4647 * [ WFC: PEs in Internal Subset ] error handling.
4648 */
4649 if ((RAW == '%') && (ctxt->external == 0) &&
4650 (ctxt->inputNr == 1)) {
4651 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4653 ctxt->sax->error(ctxt->userData,
4654 "PEReference: forbidden within markup decl in internal subset\n");
4655 } else {
4656 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4658 ctxt->sax->error(ctxt->userData,
4659 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4660 }
4661 ctxt->wellFormed = 0;
4662 ctxt->disableSAX = 1;
4663 if (name != NULL) xmlFree(name);
4664 return(-1);
4665 }
4666
4667 SKIP_BLANKS;
4668 /*
4669 * Pop-up of finished entities.
4670 */
4671 while ((RAW == 0) && (ctxt->inputNr > 1))
4672 xmlPopInput(ctxt);
4673 SKIP_BLANKS;
4674
4675 if (RAW != '>') {
4676 ctxt->errNo = XML_ERR_GT_REQUIRED;
4677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4678 ctxt->sax->error(ctxt->userData,
4679 "xmlParseElementDecl: expected '>' at the end\n");
4680 ctxt->wellFormed = 0;
4681 ctxt->disableSAX = 1;
4682 } else {
4683 if (input != ctxt->input) {
4684 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4686 ctxt->sax->error(ctxt->userData,
4687"Element declaration doesn't start and stop in the same entity\n");
4688 ctxt->wellFormed = 0;
4689 ctxt->disableSAX = 1;
4690 }
4691
4692 NEXT;
4693 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4694 (ctxt->sax->elementDecl != NULL))
4695 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4696 content);
4697 }
4698 if (content != NULL) {
4699 xmlFreeElementContent(content);
4700 }
4701 if (name != NULL) {
4702 xmlFree(name);
4703 }
4704 }
4705 return(ret);
4706}
4707
4708/**
Owen Taylor3473f882001-02-23 17:55:21 +00004709 * xmlParseConditionalSections
4710 * @ctxt: an XML parser context
4711 *
4712 * [61] conditionalSect ::= includeSect | ignoreSect
4713 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4714 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4715 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4716 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4717 */
4718
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004719static void
Owen Taylor3473f882001-02-23 17:55:21 +00004720xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4721 SKIP(3);
4722 SKIP_BLANKS;
4723 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4724 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4725 (NXT(6) == 'E')) {
4726 SKIP(7);
4727 SKIP_BLANKS;
4728 if (RAW != '[') {
4729 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4731 ctxt->sax->error(ctxt->userData,
4732 "XML conditional section '[' expected\n");
4733 ctxt->wellFormed = 0;
4734 ctxt->disableSAX = 1;
4735 } else {
4736 NEXT;
4737 }
4738 if (xmlParserDebugEntities) {
4739 if ((ctxt->input != NULL) && (ctxt->input->filename))
4740 xmlGenericError(xmlGenericErrorContext,
4741 "%s(%d): ", ctxt->input->filename,
4742 ctxt->input->line);
4743 xmlGenericError(xmlGenericErrorContext,
4744 "Entering INCLUDE Conditional Section\n");
4745 }
4746
4747 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4748 (NXT(2) != '>'))) {
4749 const xmlChar *check = CUR_PTR;
4750 int cons = ctxt->input->consumed;
4751 int tok = ctxt->token;
4752
4753 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4754 xmlParseConditionalSections(ctxt);
4755 } else if (IS_BLANK(CUR)) {
4756 NEXT;
4757 } else if (RAW == '%') {
4758 xmlParsePEReference(ctxt);
4759 } else
4760 xmlParseMarkupDecl(ctxt);
4761
4762 /*
4763 * Pop-up of finished entities.
4764 */
4765 while ((RAW == 0) && (ctxt->inputNr > 1))
4766 xmlPopInput(ctxt);
4767
4768 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4769 (tok == ctxt->token)) {
4770 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4772 ctxt->sax->error(ctxt->userData,
4773 "Content error in the external subset\n");
4774 ctxt->wellFormed = 0;
4775 ctxt->disableSAX = 1;
4776 break;
4777 }
4778 }
4779 if (xmlParserDebugEntities) {
4780 if ((ctxt->input != NULL) && (ctxt->input->filename))
4781 xmlGenericError(xmlGenericErrorContext,
4782 "%s(%d): ", ctxt->input->filename,
4783 ctxt->input->line);
4784 xmlGenericError(xmlGenericErrorContext,
4785 "Leaving INCLUDE Conditional Section\n");
4786 }
4787
4788 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4789 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4790 int state;
4791 int instate;
4792 int depth = 0;
4793
4794 SKIP(6);
4795 SKIP_BLANKS;
4796 if (RAW != '[') {
4797 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4799 ctxt->sax->error(ctxt->userData,
4800 "XML conditional section '[' expected\n");
4801 ctxt->wellFormed = 0;
4802 ctxt->disableSAX = 1;
4803 } else {
4804 NEXT;
4805 }
4806 if (xmlParserDebugEntities) {
4807 if ((ctxt->input != NULL) && (ctxt->input->filename))
4808 xmlGenericError(xmlGenericErrorContext,
4809 "%s(%d): ", ctxt->input->filename,
4810 ctxt->input->line);
4811 xmlGenericError(xmlGenericErrorContext,
4812 "Entering IGNORE Conditional Section\n");
4813 }
4814
4815 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004816 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004817 * But disable SAX event generating DTD building in the meantime
4818 */
4819 state = ctxt->disableSAX;
4820 instate = ctxt->instate;
4821 ctxt->disableSAX = 1;
4822 ctxt->instate = XML_PARSER_IGNORE;
4823
4824 while (depth >= 0) {
4825 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4826 depth++;
4827 SKIP(3);
4828 continue;
4829 }
4830 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4831 if (--depth >= 0) SKIP(3);
4832 continue;
4833 }
4834 NEXT;
4835 continue;
4836 }
4837
4838 ctxt->disableSAX = state;
4839 ctxt->instate = instate;
4840
4841 if (xmlParserDebugEntities) {
4842 if ((ctxt->input != NULL) && (ctxt->input->filename))
4843 xmlGenericError(xmlGenericErrorContext,
4844 "%s(%d): ", ctxt->input->filename,
4845 ctxt->input->line);
4846 xmlGenericError(xmlGenericErrorContext,
4847 "Leaving IGNORE Conditional Section\n");
4848 }
4849
4850 } else {
4851 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4852 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4853 ctxt->sax->error(ctxt->userData,
4854 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4855 ctxt->wellFormed = 0;
4856 ctxt->disableSAX = 1;
4857 }
4858
4859 if (RAW == 0)
4860 SHRINK;
4861
4862 if (RAW == 0) {
4863 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4865 ctxt->sax->error(ctxt->userData,
4866 "XML conditional section not closed\n");
4867 ctxt->wellFormed = 0;
4868 ctxt->disableSAX = 1;
4869 } else {
4870 SKIP(3);
4871 }
4872}
4873
4874/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004875 * xmlParseMarkupDecl:
4876 * @ctxt: an XML parser context
4877 *
4878 * parse Markup declarations
4879 *
4880 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4881 * NotationDecl | PI | Comment
4882 *
4883 * [ VC: Proper Declaration/PE Nesting ]
4884 * Parameter-entity replacement text must be properly nested with
4885 * markup declarations. That is to say, if either the first character
4886 * or the last character of a markup declaration (markupdecl above) is
4887 * contained in the replacement text for a parameter-entity reference,
4888 * both must be contained in the same replacement text.
4889 *
4890 * [ WFC: PEs in Internal Subset ]
4891 * In the internal DTD subset, parameter-entity references can occur
4892 * only where markup declarations can occur, not within markup declarations.
4893 * (This does not apply to references that occur in external parameter
4894 * entities or to the external subset.)
4895 */
4896void
4897xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4898 GROW;
4899 xmlParseElementDecl(ctxt);
4900 xmlParseAttributeListDecl(ctxt);
4901 xmlParseEntityDecl(ctxt);
4902 xmlParseNotationDecl(ctxt);
4903 xmlParsePI(ctxt);
4904 xmlParseComment(ctxt);
4905 /*
4906 * This is only for internal subset. On external entities,
4907 * the replacement is done before parsing stage
4908 */
4909 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4910 xmlParsePEReference(ctxt);
4911
4912 /*
4913 * Conditional sections are allowed from entities included
4914 * by PE References in the internal subset.
4915 */
4916 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4917 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4918 xmlParseConditionalSections(ctxt);
4919 }
4920 }
4921
4922 ctxt->instate = XML_PARSER_DTD;
4923}
4924
4925/**
4926 * xmlParseTextDecl:
4927 * @ctxt: an XML parser context
4928 *
4929 * parse an XML declaration header for external entities
4930 *
4931 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4932 *
4933 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4934 */
4935
4936void
4937xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4938 xmlChar *version;
4939
4940 /*
4941 * We know that '<?xml' is here.
4942 */
4943 if ((RAW == '<') && (NXT(1) == '?') &&
4944 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4945 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4946 SKIP(5);
4947 } else {
4948 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4950 ctxt->sax->error(ctxt->userData,
4951 "Text declaration '<?xml' required\n");
4952 ctxt->wellFormed = 0;
4953 ctxt->disableSAX = 1;
4954
4955 return;
4956 }
4957
4958 if (!IS_BLANK(CUR)) {
4959 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4961 ctxt->sax->error(ctxt->userData,
4962 "Space needed after '<?xml'\n");
4963 ctxt->wellFormed = 0;
4964 ctxt->disableSAX = 1;
4965 }
4966 SKIP_BLANKS;
4967
4968 /*
4969 * We may have the VersionInfo here.
4970 */
4971 version = xmlParseVersionInfo(ctxt);
4972 if (version == NULL)
4973 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00004974 else {
4975 if (!IS_BLANK(CUR)) {
4976 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4978 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4979 ctxt->wellFormed = 0;
4980 ctxt->disableSAX = 1;
4981 }
4982 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004983 ctxt->input->version = version;
4984
4985 /*
4986 * We must have the encoding declaration
4987 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004988 xmlParseEncodingDecl(ctxt);
4989 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4990 /*
4991 * The XML REC instructs us to stop parsing right here
4992 */
4993 return;
4994 }
4995
4996 SKIP_BLANKS;
4997 if ((RAW == '?') && (NXT(1) == '>')) {
4998 SKIP(2);
4999 } else if (RAW == '>') {
5000 /* Deprecated old WD ... */
5001 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5003 ctxt->sax->error(ctxt->userData,
5004 "XML declaration must end-up with '?>'\n");
5005 ctxt->wellFormed = 0;
5006 ctxt->disableSAX = 1;
5007 NEXT;
5008 } else {
5009 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5011 ctxt->sax->error(ctxt->userData,
5012 "parsing XML declaration: '?>' expected\n");
5013 ctxt->wellFormed = 0;
5014 ctxt->disableSAX = 1;
5015 MOVETO_ENDTAG(CUR_PTR);
5016 NEXT;
5017 }
5018}
5019
5020/**
Owen Taylor3473f882001-02-23 17:55:21 +00005021 * xmlParseExternalSubset:
5022 * @ctxt: an XML parser context
5023 * @ExternalID: the external identifier
5024 * @SystemID: the system identifier (or URL)
5025 *
5026 * parse Markup declarations from an external subset
5027 *
5028 * [30] extSubset ::= textDecl? extSubsetDecl
5029 *
5030 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5031 */
5032void
5033xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5034 const xmlChar *SystemID) {
5035 GROW;
5036 if ((RAW == '<') && (NXT(1) == '?') &&
5037 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5038 (NXT(4) == 'l')) {
5039 xmlParseTextDecl(ctxt);
5040 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5041 /*
5042 * The XML REC instructs us to stop parsing right here
5043 */
5044 ctxt->instate = XML_PARSER_EOF;
5045 return;
5046 }
5047 }
5048 if (ctxt->myDoc == NULL) {
5049 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5050 }
5051 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5052 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5053
5054 ctxt->instate = XML_PARSER_DTD;
5055 ctxt->external = 1;
5056 while (((RAW == '<') && (NXT(1) == '?')) ||
5057 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005058 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005059 const xmlChar *check = CUR_PTR;
5060 int cons = ctxt->input->consumed;
5061 int tok = ctxt->token;
5062
5063 GROW;
5064 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5065 xmlParseConditionalSections(ctxt);
5066 } else if (IS_BLANK(CUR)) {
5067 NEXT;
5068 } else if (RAW == '%') {
5069 xmlParsePEReference(ctxt);
5070 } else
5071 xmlParseMarkupDecl(ctxt);
5072
5073 /*
5074 * Pop-up of finished entities.
5075 */
5076 while ((RAW == 0) && (ctxt->inputNr > 1))
5077 xmlPopInput(ctxt);
5078
5079 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5080 (tok == ctxt->token)) {
5081 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5082 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5083 ctxt->sax->error(ctxt->userData,
5084 "Content error in the external subset\n");
5085 ctxt->wellFormed = 0;
5086 ctxt->disableSAX = 1;
5087 break;
5088 }
5089 }
5090
5091 if (RAW != 0) {
5092 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5094 ctxt->sax->error(ctxt->userData,
5095 "Extra content at the end of the document\n");
5096 ctxt->wellFormed = 0;
5097 ctxt->disableSAX = 1;
5098 }
5099
5100}
5101
5102/**
5103 * xmlParseReference:
5104 * @ctxt: an XML parser context
5105 *
5106 * parse and handle entity references in content, depending on the SAX
5107 * interface, this may end-up in a call to character() if this is a
5108 * CharRef, a predefined entity, if there is no reference() callback.
5109 * or if the parser was asked to switch to that mode.
5110 *
5111 * [67] Reference ::= EntityRef | CharRef
5112 */
5113void
5114xmlParseReference(xmlParserCtxtPtr ctxt) {
5115 xmlEntityPtr ent;
5116 xmlChar *val;
5117 if (RAW != '&') return;
5118
5119 if (NXT(1) == '#') {
5120 int i = 0;
5121 xmlChar out[10];
5122 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005123 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005124
5125 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5126 /*
5127 * So we are using non-UTF-8 buffers
5128 * Check that the char fit on 8bits, if not
5129 * generate a CharRef.
5130 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005131 if (value <= 0xFF) {
5132 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005133 out[1] = 0;
5134 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5135 (!ctxt->disableSAX))
5136 ctxt->sax->characters(ctxt->userData, out, 1);
5137 } else {
5138 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005139 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005140 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005141 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005142 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5143 (!ctxt->disableSAX))
5144 ctxt->sax->reference(ctxt->userData, out);
5145 }
5146 } else {
5147 /*
5148 * Just encode the value in UTF-8
5149 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005150 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005151 out[i] = 0;
5152 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5153 (!ctxt->disableSAX))
5154 ctxt->sax->characters(ctxt->userData, out, i);
5155 }
5156 } else {
5157 ent = xmlParseEntityRef(ctxt);
5158 if (ent == NULL) return;
5159 if ((ent->name != NULL) &&
5160 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5161 xmlNodePtr list = NULL;
5162 int ret;
5163
5164
5165 /*
5166 * The first reference to the entity trigger a parsing phase
5167 * where the ent->children is filled with the result from
5168 * the parsing.
5169 */
5170 if (ent->children == NULL) {
5171 xmlChar *value;
5172 value = ent->content;
5173
5174 /*
5175 * Check that this entity is well formed
5176 */
5177 if ((value != NULL) &&
5178 (value[1] == 0) && (value[0] == '<') &&
5179 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5180 /*
5181 * DONE: get definite answer on this !!!
5182 * Lots of entity decls are used to declare a single
5183 * char
5184 * <!ENTITY lt "<">
5185 * Which seems to be valid since
5186 * 2.4: The ampersand character (&) and the left angle
5187 * bracket (<) may appear in their literal form only
5188 * when used ... They are also legal within the literal
5189 * entity value of an internal entity declaration;i
5190 * see "4.3.2 Well-Formed Parsed Entities".
5191 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5192 * Looking at the OASIS test suite and James Clark
5193 * tests, this is broken. However the XML REC uses
5194 * it. Is the XML REC not well-formed ????
5195 * This is a hack to avoid this problem
5196 *
5197 * ANSWER: since lt gt amp .. are already defined,
5198 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005199 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005200 * is lousy but acceptable.
5201 */
5202 list = xmlNewDocText(ctxt->myDoc, value);
5203 if (list != NULL) {
5204 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5205 (ent->children == NULL)) {
5206 ent->children = list;
5207 ent->last = list;
5208 list->parent = (xmlNodePtr) ent;
5209 } else {
5210 xmlFreeNodeList(list);
5211 }
5212 } else if (list != NULL) {
5213 xmlFreeNodeList(list);
5214 }
5215 } else {
5216 /*
5217 * 4.3.2: An internal general parsed entity is well-formed
5218 * if its replacement text matches the production labeled
5219 * content.
5220 */
5221 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5222 ctxt->depth++;
5223 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5224 ctxt->sax, NULL, ctxt->depth,
5225 value, &list);
5226 ctxt->depth--;
5227 } else if (ent->etype ==
5228 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5229 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005230 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005231 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005232 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005233 ctxt->depth--;
5234 } else {
5235 ret = -1;
5236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5237 ctxt->sax->error(ctxt->userData,
5238 "Internal: invalid entity type\n");
5239 }
5240 if (ret == XML_ERR_ENTITY_LOOP) {
5241 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5242 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5243 ctxt->sax->error(ctxt->userData,
5244 "Detected entity reference loop\n");
5245 ctxt->wellFormed = 0;
5246 ctxt->disableSAX = 1;
5247 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005248 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5249 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005250 (ent->children == NULL)) {
5251 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005252 if (ctxt->replaceEntities) {
5253 /*
5254 * Prune it directly in the generated document
5255 * except for single text nodes.
5256 */
5257 if ((list->type == XML_TEXT_NODE) &&
5258 (list->next == NULL)) {
5259 list->parent = (xmlNodePtr) ent;
5260 list = NULL;
5261 } else {
5262 while (list != NULL) {
5263 list->parent = (xmlNodePtr) ctxt->node;
5264 if (list->next == NULL)
5265 ent->last = list;
5266 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005267 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005268 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005269 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5270 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005271 }
5272 } else {
5273 while (list != NULL) {
5274 list->parent = (xmlNodePtr) ent;
5275 if (list->next == NULL)
5276 ent->last = list;
5277 list = list->next;
5278 }
Owen Taylor3473f882001-02-23 17:55:21 +00005279 }
5280 } else {
5281 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005282 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005283 }
5284 } else if (ret > 0) {
5285 ctxt->errNo = ret;
5286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5287 ctxt->sax->error(ctxt->userData,
5288 "Entity value required\n");
5289 ctxt->wellFormed = 0;
5290 ctxt->disableSAX = 1;
5291 } else if (list != NULL) {
5292 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005293 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005294 }
5295 }
5296 }
5297 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5298 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5299 /*
5300 * Create a node.
5301 */
5302 ctxt->sax->reference(ctxt->userData, ent->name);
5303 return;
5304 } else if (ctxt->replaceEntities) {
5305 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5306 /*
5307 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005308 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005309 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005310 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005311 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005312 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005313 cur = ent->children;
5314 while (cur != NULL) {
5315 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005316 if (firstChild == NULL){
5317 firstChild = new;
5318 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005319 xmlAddChild(ctxt->node, new);
5320 if (cur == ent->last)
5321 break;
5322 cur = cur->next;
5323 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005324 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5325 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005326 } else {
5327 /*
5328 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005329 * node with a possible previous text one which
5330 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005331 */
5332 if (ent->children->type == XML_TEXT_NODE)
5333 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5334 if ((ent->last != ent->children) &&
5335 (ent->last->type == XML_TEXT_NODE))
5336 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5337 xmlAddChildList(ctxt->node, ent->children);
5338 }
5339
Owen Taylor3473f882001-02-23 17:55:21 +00005340 /*
5341 * This is to avoid a nasty side effect, see
5342 * characters() in SAX.c
5343 */
5344 ctxt->nodemem = 0;
5345 ctxt->nodelen = 0;
5346 return;
5347 } else {
5348 /*
5349 * Probably running in SAX mode
5350 */
5351 xmlParserInputPtr input;
5352
5353 input = xmlNewEntityInputStream(ctxt, ent);
5354 xmlPushInput(ctxt, input);
5355 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5356 (RAW == '<') && (NXT(1) == '?') &&
5357 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5358 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5359 xmlParseTextDecl(ctxt);
5360 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5361 /*
5362 * The XML REC instructs us to stop parsing right here
5363 */
5364 ctxt->instate = XML_PARSER_EOF;
5365 return;
5366 }
5367 if (input->standalone == 1) {
5368 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5370 ctxt->sax->error(ctxt->userData,
5371 "external parsed entities cannot be standalone\n");
5372 ctxt->wellFormed = 0;
5373 ctxt->disableSAX = 1;
5374 }
5375 }
5376 return;
5377 }
5378 }
5379 } else {
5380 val = ent->content;
5381 if (val == NULL) return;
5382 /*
5383 * inline the entity.
5384 */
5385 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5386 (!ctxt->disableSAX))
5387 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5388 }
5389 }
5390}
5391
5392/**
5393 * xmlParseEntityRef:
5394 * @ctxt: an XML parser context
5395 *
5396 * parse ENTITY references declarations
5397 *
5398 * [68] EntityRef ::= '&' Name ';'
5399 *
5400 * [ WFC: Entity Declared ]
5401 * In a document without any DTD, a document with only an internal DTD
5402 * subset which contains no parameter entity references, or a document
5403 * with "standalone='yes'", the Name given in the entity reference
5404 * must match that in an entity declaration, except that well-formed
5405 * documents need not declare any of the following entities: amp, lt,
5406 * gt, apos, quot. The declaration of a parameter entity must precede
5407 * any reference to it. Similarly, the declaration of a general entity
5408 * must precede any reference to it which appears in a default value in an
5409 * attribute-list declaration. Note that if entities are declared in the
5410 * external subset or in external parameter entities, a non-validating
5411 * processor is not obligated to read and process their declarations;
5412 * for such documents, the rule that an entity must be declared is a
5413 * well-formedness constraint only if standalone='yes'.
5414 *
5415 * [ WFC: Parsed Entity ]
5416 * An entity reference must not contain the name of an unparsed entity
5417 *
5418 * Returns the xmlEntityPtr if found, or NULL otherwise.
5419 */
5420xmlEntityPtr
5421xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5422 xmlChar *name;
5423 xmlEntityPtr ent = NULL;
5424
5425 GROW;
5426
5427 if (RAW == '&') {
5428 NEXT;
5429 name = xmlParseName(ctxt);
5430 if (name == NULL) {
5431 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5432 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5433 ctxt->sax->error(ctxt->userData,
5434 "xmlParseEntityRef: no name\n");
5435 ctxt->wellFormed = 0;
5436 ctxt->disableSAX = 1;
5437 } else {
5438 if (RAW == ';') {
5439 NEXT;
5440 /*
5441 * Ask first SAX for entity resolution, otherwise try the
5442 * predefined set.
5443 */
5444 if (ctxt->sax != NULL) {
5445 if (ctxt->sax->getEntity != NULL)
5446 ent = ctxt->sax->getEntity(ctxt->userData, name);
5447 if (ent == NULL)
5448 ent = xmlGetPredefinedEntity(name);
5449 }
5450 /*
5451 * [ WFC: Entity Declared ]
5452 * In a document without any DTD, a document with only an
5453 * internal DTD subset which contains no parameter entity
5454 * references, or a document with "standalone='yes'", the
5455 * Name given in the entity reference must match that in an
5456 * entity declaration, except that well-formed documents
5457 * need not declare any of the following entities: amp, lt,
5458 * gt, apos, quot.
5459 * The declaration of a parameter entity must precede any
5460 * reference to it.
5461 * Similarly, the declaration of a general entity must
5462 * precede any reference to it which appears in a default
5463 * value in an attribute-list declaration. Note that if
5464 * entities are declared in the external subset or in
5465 * external parameter entities, a non-validating processor
5466 * is not obligated to read and process their declarations;
5467 * for such documents, the rule that an entity must be
5468 * declared is a well-formedness constraint only if
5469 * standalone='yes'.
5470 */
5471 if (ent == NULL) {
5472 if ((ctxt->standalone == 1) ||
5473 ((ctxt->hasExternalSubset == 0) &&
5474 (ctxt->hasPErefs == 0))) {
5475 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5477 ctxt->sax->error(ctxt->userData,
5478 "Entity '%s' not defined\n", name);
5479 ctxt->wellFormed = 0;
5480 ctxt->disableSAX = 1;
5481 } else {
5482 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005484 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005485 "Entity '%s' not defined\n", name);
5486 }
5487 }
5488
5489 /*
5490 * [ WFC: Parsed Entity ]
5491 * An entity reference must not contain the name of an
5492 * unparsed entity
5493 */
5494 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5495 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5497 ctxt->sax->error(ctxt->userData,
5498 "Entity reference to unparsed entity %s\n", name);
5499 ctxt->wellFormed = 0;
5500 ctxt->disableSAX = 1;
5501 }
5502
5503 /*
5504 * [ WFC: No External Entity References ]
5505 * Attribute values cannot contain direct or indirect
5506 * entity references to external entities.
5507 */
5508 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5509 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5510 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5512 ctxt->sax->error(ctxt->userData,
5513 "Attribute references external entity '%s'\n", name);
5514 ctxt->wellFormed = 0;
5515 ctxt->disableSAX = 1;
5516 }
5517 /*
5518 * [ WFC: No < in Attribute Values ]
5519 * The replacement text of any entity referred to directly or
5520 * indirectly in an attribute value (other than "&lt;") must
5521 * not contain a <.
5522 */
5523 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5524 (ent != NULL) &&
5525 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5526 (ent->content != NULL) &&
5527 (xmlStrchr(ent->content, '<'))) {
5528 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5530 ctxt->sax->error(ctxt->userData,
5531 "'<' in entity '%s' is not allowed in attributes values\n", name);
5532 ctxt->wellFormed = 0;
5533 ctxt->disableSAX = 1;
5534 }
5535
5536 /*
5537 * Internal check, no parameter entities here ...
5538 */
5539 else {
5540 switch (ent->etype) {
5541 case XML_INTERNAL_PARAMETER_ENTITY:
5542 case XML_EXTERNAL_PARAMETER_ENTITY:
5543 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5545 ctxt->sax->error(ctxt->userData,
5546 "Attempt to reference the parameter entity '%s'\n", name);
5547 ctxt->wellFormed = 0;
5548 ctxt->disableSAX = 1;
5549 break;
5550 default:
5551 break;
5552 }
5553 }
5554
5555 /*
5556 * [ WFC: No Recursion ]
5557 * A parsed entity must not contain a recursive reference
5558 * to itself, either directly or indirectly.
5559 * Done somewhere else
5560 */
5561
5562 } else {
5563 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5565 ctxt->sax->error(ctxt->userData,
5566 "xmlParseEntityRef: expecting ';'\n");
5567 ctxt->wellFormed = 0;
5568 ctxt->disableSAX = 1;
5569 }
5570 xmlFree(name);
5571 }
5572 }
5573 return(ent);
5574}
5575
5576/**
5577 * xmlParseStringEntityRef:
5578 * @ctxt: an XML parser context
5579 * @str: a pointer to an index in the string
5580 *
5581 * parse ENTITY references declarations, but this version parses it from
5582 * a string value.
5583 *
5584 * [68] EntityRef ::= '&' Name ';'
5585 *
5586 * [ WFC: Entity Declared ]
5587 * In a document without any DTD, a document with only an internal DTD
5588 * subset which contains no parameter entity references, or a document
5589 * with "standalone='yes'", the Name given in the entity reference
5590 * must match that in an entity declaration, except that well-formed
5591 * documents need not declare any of the following entities: amp, lt,
5592 * gt, apos, quot. The declaration of a parameter entity must precede
5593 * any reference to it. Similarly, the declaration of a general entity
5594 * must precede any reference to it which appears in a default value in an
5595 * attribute-list declaration. Note that if entities are declared in the
5596 * external subset or in external parameter entities, a non-validating
5597 * processor is not obligated to read and process their declarations;
5598 * for such documents, the rule that an entity must be declared is a
5599 * well-formedness constraint only if standalone='yes'.
5600 *
5601 * [ WFC: Parsed Entity ]
5602 * An entity reference must not contain the name of an unparsed entity
5603 *
5604 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5605 * is updated to the current location in the string.
5606 */
5607xmlEntityPtr
5608xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5609 xmlChar *name;
5610 const xmlChar *ptr;
5611 xmlChar cur;
5612 xmlEntityPtr ent = NULL;
5613
5614 if ((str == NULL) || (*str == NULL))
5615 return(NULL);
5616 ptr = *str;
5617 cur = *ptr;
5618 if (cur == '&') {
5619 ptr++;
5620 cur = *ptr;
5621 name = xmlParseStringName(ctxt, &ptr);
5622 if (name == NULL) {
5623 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5625 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005626 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005627 ctxt->wellFormed = 0;
5628 ctxt->disableSAX = 1;
5629 } else {
5630 if (*ptr == ';') {
5631 ptr++;
5632 /*
5633 * Ask first SAX for entity resolution, otherwise try the
5634 * predefined set.
5635 */
5636 if (ctxt->sax != NULL) {
5637 if (ctxt->sax->getEntity != NULL)
5638 ent = ctxt->sax->getEntity(ctxt->userData, name);
5639 if (ent == NULL)
5640 ent = xmlGetPredefinedEntity(name);
5641 }
5642 /*
5643 * [ WFC: Entity Declared ]
5644 * In a document without any DTD, a document with only an
5645 * internal DTD subset which contains no parameter entity
5646 * references, or a document with "standalone='yes'", the
5647 * Name given in the entity reference must match that in an
5648 * entity declaration, except that well-formed documents
5649 * need not declare any of the following entities: amp, lt,
5650 * gt, apos, quot.
5651 * The declaration of a parameter entity must precede any
5652 * reference to it.
5653 * Similarly, the declaration of a general entity must
5654 * precede any reference to it which appears in a default
5655 * value in an attribute-list declaration. Note that if
5656 * entities are declared in the external subset or in
5657 * external parameter entities, a non-validating processor
5658 * is not obligated to read and process their declarations;
5659 * for such documents, the rule that an entity must be
5660 * declared is a well-formedness constraint only if
5661 * standalone='yes'.
5662 */
5663 if (ent == NULL) {
5664 if ((ctxt->standalone == 1) ||
5665 ((ctxt->hasExternalSubset == 0) &&
5666 (ctxt->hasPErefs == 0))) {
5667 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5669 ctxt->sax->error(ctxt->userData,
5670 "Entity '%s' not defined\n", name);
5671 ctxt->wellFormed = 0;
5672 ctxt->disableSAX = 1;
5673 } else {
5674 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5675 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5676 ctxt->sax->warning(ctxt->userData,
5677 "Entity '%s' not defined\n", name);
5678 }
5679 }
5680
5681 /*
5682 * [ WFC: Parsed Entity ]
5683 * An entity reference must not contain the name of an
5684 * unparsed entity
5685 */
5686 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5687 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5689 ctxt->sax->error(ctxt->userData,
5690 "Entity reference to unparsed entity %s\n", name);
5691 ctxt->wellFormed = 0;
5692 ctxt->disableSAX = 1;
5693 }
5694
5695 /*
5696 * [ WFC: No External Entity References ]
5697 * Attribute values cannot contain direct or indirect
5698 * entity references to external entities.
5699 */
5700 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5701 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5702 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5704 ctxt->sax->error(ctxt->userData,
5705 "Attribute references external entity '%s'\n", name);
5706 ctxt->wellFormed = 0;
5707 ctxt->disableSAX = 1;
5708 }
5709 /*
5710 * [ WFC: No < in Attribute Values ]
5711 * The replacement text of any entity referred to directly or
5712 * indirectly in an attribute value (other than "&lt;") must
5713 * not contain a <.
5714 */
5715 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5716 (ent != NULL) &&
5717 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5718 (ent->content != NULL) &&
5719 (xmlStrchr(ent->content, '<'))) {
5720 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5722 ctxt->sax->error(ctxt->userData,
5723 "'<' in entity '%s' is not allowed in attributes values\n", name);
5724 ctxt->wellFormed = 0;
5725 ctxt->disableSAX = 1;
5726 }
5727
5728 /*
5729 * Internal check, no parameter entities here ...
5730 */
5731 else {
5732 switch (ent->etype) {
5733 case XML_INTERNAL_PARAMETER_ENTITY:
5734 case XML_EXTERNAL_PARAMETER_ENTITY:
5735 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5737 ctxt->sax->error(ctxt->userData,
5738 "Attempt to reference the parameter entity '%s'\n", name);
5739 ctxt->wellFormed = 0;
5740 ctxt->disableSAX = 1;
5741 break;
5742 default:
5743 break;
5744 }
5745 }
5746
5747 /*
5748 * [ WFC: No Recursion ]
5749 * A parsed entity must not contain a recursive reference
5750 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005751 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005752 */
5753
5754 } else {
5755 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5757 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005758 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005759 ctxt->wellFormed = 0;
5760 ctxt->disableSAX = 1;
5761 }
5762 xmlFree(name);
5763 }
5764 }
5765 *str = ptr;
5766 return(ent);
5767}
5768
5769/**
5770 * xmlParsePEReference:
5771 * @ctxt: an XML parser context
5772 *
5773 * parse PEReference declarations
5774 * The entity content is handled directly by pushing it's content as
5775 * a new input stream.
5776 *
5777 * [69] PEReference ::= '%' Name ';'
5778 *
5779 * [ WFC: No Recursion ]
5780 * A parsed entity must not contain a recursive
5781 * reference to itself, either directly or indirectly.
5782 *
5783 * [ WFC: Entity Declared ]
5784 * In a document without any DTD, a document with only an internal DTD
5785 * subset which contains no parameter entity references, or a document
5786 * with "standalone='yes'", ... ... The declaration of a parameter
5787 * entity must precede any reference to it...
5788 *
5789 * [ VC: Entity Declared ]
5790 * In a document with an external subset or external parameter entities
5791 * with "standalone='no'", ... ... The declaration of a parameter entity
5792 * must precede any reference to it...
5793 *
5794 * [ WFC: In DTD ]
5795 * Parameter-entity references may only appear in the DTD.
5796 * NOTE: misleading but this is handled.
5797 */
5798void
5799xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5800 xmlChar *name;
5801 xmlEntityPtr entity = NULL;
5802 xmlParserInputPtr input;
5803
5804 if (RAW == '%') {
5805 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005806 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005807 if (name == NULL) {
5808 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5810 ctxt->sax->error(ctxt->userData,
5811 "xmlParsePEReference: no name\n");
5812 ctxt->wellFormed = 0;
5813 ctxt->disableSAX = 1;
5814 } else {
5815 if (RAW == ';') {
5816 NEXT;
5817 if ((ctxt->sax != NULL) &&
5818 (ctxt->sax->getParameterEntity != NULL))
5819 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5820 name);
5821 if (entity == NULL) {
5822 /*
5823 * [ WFC: Entity Declared ]
5824 * In a document without any DTD, a document with only an
5825 * internal DTD subset which contains no parameter entity
5826 * references, or a document with "standalone='yes'", ...
5827 * ... The declaration of a parameter entity must precede
5828 * any reference to it...
5829 */
5830 if ((ctxt->standalone == 1) ||
5831 ((ctxt->hasExternalSubset == 0) &&
5832 (ctxt->hasPErefs == 0))) {
5833 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5834 if ((!ctxt->disableSAX) &&
5835 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5836 ctxt->sax->error(ctxt->userData,
5837 "PEReference: %%%s; not found\n", name);
5838 ctxt->wellFormed = 0;
5839 ctxt->disableSAX = 1;
5840 } else {
5841 /*
5842 * [ VC: Entity Declared ]
5843 * In a document with an external subset or external
5844 * parameter entities with "standalone='no'", ...
5845 * ... The declaration of a parameter entity must precede
5846 * any reference to it...
5847 */
5848 if ((!ctxt->disableSAX) &&
5849 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5850 ctxt->sax->warning(ctxt->userData,
5851 "PEReference: %%%s; not found\n", name);
5852 ctxt->valid = 0;
5853 }
5854 } else {
5855 /*
5856 * Internal checking in case the entity quest barfed
5857 */
5858 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5859 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5860 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5861 ctxt->sax->warning(ctxt->userData,
5862 "Internal: %%%s; is not a parameter entity\n", name);
5863 } else {
5864 /*
5865 * TODO !!!
5866 * handle the extra spaces added before and after
5867 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5868 */
5869 input = xmlNewEntityInputStream(ctxt, entity);
5870 xmlPushInput(ctxt, input);
5871 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5872 (RAW == '<') && (NXT(1) == '?') &&
5873 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5874 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5875 xmlParseTextDecl(ctxt);
5876 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5877 /*
5878 * The XML REC instructs us to stop parsing
5879 * right here
5880 */
5881 ctxt->instate = XML_PARSER_EOF;
5882 xmlFree(name);
5883 return;
5884 }
5885 }
5886 if (ctxt->token == 0)
5887 ctxt->token = ' ';
5888 }
5889 }
5890 ctxt->hasPErefs = 1;
5891 } else {
5892 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5893 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5894 ctxt->sax->error(ctxt->userData,
5895 "xmlParsePEReference: expecting ';'\n");
5896 ctxt->wellFormed = 0;
5897 ctxt->disableSAX = 1;
5898 }
5899 xmlFree(name);
5900 }
5901 }
5902}
5903
5904/**
5905 * xmlParseStringPEReference:
5906 * @ctxt: an XML parser context
5907 * @str: a pointer to an index in the string
5908 *
5909 * parse PEReference declarations
5910 *
5911 * [69] PEReference ::= '%' Name ';'
5912 *
5913 * [ WFC: No Recursion ]
5914 * A parsed entity must not contain a recursive
5915 * reference to itself, either directly or indirectly.
5916 *
5917 * [ WFC: Entity Declared ]
5918 * In a document without any DTD, a document with only an internal DTD
5919 * subset which contains no parameter entity references, or a document
5920 * with "standalone='yes'", ... ... The declaration of a parameter
5921 * entity must precede any reference to it...
5922 *
5923 * [ VC: Entity Declared ]
5924 * In a document with an external subset or external parameter entities
5925 * with "standalone='no'", ... ... The declaration of a parameter entity
5926 * must precede any reference to it...
5927 *
5928 * [ WFC: In DTD ]
5929 * Parameter-entity references may only appear in the DTD.
5930 * NOTE: misleading but this is handled.
5931 *
5932 * Returns the string of the entity content.
5933 * str is updated to the current value of the index
5934 */
5935xmlEntityPtr
5936xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5937 const xmlChar *ptr;
5938 xmlChar cur;
5939 xmlChar *name;
5940 xmlEntityPtr entity = NULL;
5941
5942 if ((str == NULL) || (*str == NULL)) return(NULL);
5943 ptr = *str;
5944 cur = *ptr;
5945 if (cur == '%') {
5946 ptr++;
5947 cur = *ptr;
5948 name = xmlParseStringName(ctxt, &ptr);
5949 if (name == NULL) {
5950 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5952 ctxt->sax->error(ctxt->userData,
5953 "xmlParseStringPEReference: no name\n");
5954 ctxt->wellFormed = 0;
5955 ctxt->disableSAX = 1;
5956 } else {
5957 cur = *ptr;
5958 if (cur == ';') {
5959 ptr++;
5960 cur = *ptr;
5961 if ((ctxt->sax != NULL) &&
5962 (ctxt->sax->getParameterEntity != NULL))
5963 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5964 name);
5965 if (entity == NULL) {
5966 /*
5967 * [ WFC: Entity Declared ]
5968 * In a document without any DTD, a document with only an
5969 * internal DTD subset which contains no parameter entity
5970 * references, or a document with "standalone='yes'", ...
5971 * ... The declaration of a parameter entity must precede
5972 * any reference to it...
5973 */
5974 if ((ctxt->standalone == 1) ||
5975 ((ctxt->hasExternalSubset == 0) &&
5976 (ctxt->hasPErefs == 0))) {
5977 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5979 ctxt->sax->error(ctxt->userData,
5980 "PEReference: %%%s; not found\n", name);
5981 ctxt->wellFormed = 0;
5982 ctxt->disableSAX = 1;
5983 } else {
5984 /*
5985 * [ VC: Entity Declared ]
5986 * In a document with an external subset or external
5987 * parameter entities with "standalone='no'", ...
5988 * ... The declaration of a parameter entity must
5989 * precede any reference to it...
5990 */
5991 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5992 ctxt->sax->warning(ctxt->userData,
5993 "PEReference: %%%s; not found\n", name);
5994 ctxt->valid = 0;
5995 }
5996 } else {
5997 /*
5998 * Internal checking in case the entity quest barfed
5999 */
6000 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6001 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6002 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6003 ctxt->sax->warning(ctxt->userData,
6004 "Internal: %%%s; is not a parameter entity\n", name);
6005 }
6006 }
6007 ctxt->hasPErefs = 1;
6008 } else {
6009 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6011 ctxt->sax->error(ctxt->userData,
6012 "xmlParseStringPEReference: expecting ';'\n");
6013 ctxt->wellFormed = 0;
6014 ctxt->disableSAX = 1;
6015 }
6016 xmlFree(name);
6017 }
6018 }
6019 *str = ptr;
6020 return(entity);
6021}
6022
6023/**
6024 * xmlParseDocTypeDecl:
6025 * @ctxt: an XML parser context
6026 *
6027 * parse a DOCTYPE declaration
6028 *
6029 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6030 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6031 *
6032 * [ VC: Root Element Type ]
6033 * The Name in the document type declaration must match the element
6034 * type of the root element.
6035 */
6036
6037void
6038xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6039 xmlChar *name = NULL;
6040 xmlChar *ExternalID = NULL;
6041 xmlChar *URI = NULL;
6042
6043 /*
6044 * We know that '<!DOCTYPE' has been detected.
6045 */
6046 SKIP(9);
6047
6048 SKIP_BLANKS;
6049
6050 /*
6051 * Parse the DOCTYPE name.
6052 */
6053 name = xmlParseName(ctxt);
6054 if (name == NULL) {
6055 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6057 ctxt->sax->error(ctxt->userData,
6058 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6059 ctxt->wellFormed = 0;
6060 ctxt->disableSAX = 1;
6061 }
6062 ctxt->intSubName = name;
6063
6064 SKIP_BLANKS;
6065
6066 /*
6067 * Check for SystemID and ExternalID
6068 */
6069 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6070
6071 if ((URI != NULL) || (ExternalID != NULL)) {
6072 ctxt->hasExternalSubset = 1;
6073 }
6074 ctxt->extSubURI = URI;
6075 ctxt->extSubSystem = ExternalID;
6076
6077 SKIP_BLANKS;
6078
6079 /*
6080 * Create and update the internal subset.
6081 */
6082 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6083 (!ctxt->disableSAX))
6084 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6085
6086 /*
6087 * Is there any internal subset declarations ?
6088 * they are handled separately in xmlParseInternalSubset()
6089 */
6090 if (RAW == '[')
6091 return;
6092
6093 /*
6094 * We should be at the end of the DOCTYPE declaration.
6095 */
6096 if (RAW != '>') {
6097 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006099 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006100 ctxt->wellFormed = 0;
6101 ctxt->disableSAX = 1;
6102 }
6103 NEXT;
6104}
6105
6106/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006107 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006108 * @ctxt: an XML parser context
6109 *
6110 * parse the internal subset declaration
6111 *
6112 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6113 */
6114
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006115static void
Owen Taylor3473f882001-02-23 17:55:21 +00006116xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6117 /*
6118 * Is there any DTD definition ?
6119 */
6120 if (RAW == '[') {
6121 ctxt->instate = XML_PARSER_DTD;
6122 NEXT;
6123 /*
6124 * Parse the succession of Markup declarations and
6125 * PEReferences.
6126 * Subsequence (markupdecl | PEReference | S)*
6127 */
6128 while (RAW != ']') {
6129 const xmlChar *check = CUR_PTR;
6130 int cons = ctxt->input->consumed;
6131
6132 SKIP_BLANKS;
6133 xmlParseMarkupDecl(ctxt);
6134 xmlParsePEReference(ctxt);
6135
6136 /*
6137 * Pop-up of finished entities.
6138 */
6139 while ((RAW == 0) && (ctxt->inputNr > 1))
6140 xmlPopInput(ctxt);
6141
6142 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6143 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6145 ctxt->sax->error(ctxt->userData,
6146 "xmlParseInternalSubset: error detected in Markup declaration\n");
6147 ctxt->wellFormed = 0;
6148 ctxt->disableSAX = 1;
6149 break;
6150 }
6151 }
6152 if (RAW == ']') {
6153 NEXT;
6154 SKIP_BLANKS;
6155 }
6156 }
6157
6158 /*
6159 * We should be at the end of the DOCTYPE declaration.
6160 */
6161 if (RAW != '>') {
6162 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6163 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006164 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006165 ctxt->wellFormed = 0;
6166 ctxt->disableSAX = 1;
6167 }
6168 NEXT;
6169}
6170
6171/**
6172 * xmlParseAttribute:
6173 * @ctxt: an XML parser context
6174 * @value: a xmlChar ** used to store the value of the attribute
6175 *
6176 * parse an attribute
6177 *
6178 * [41] Attribute ::= Name Eq AttValue
6179 *
6180 * [ WFC: No External Entity References ]
6181 * Attribute values cannot contain direct or indirect entity references
6182 * to external entities.
6183 *
6184 * [ WFC: No < in Attribute Values ]
6185 * The replacement text of any entity referred to directly or indirectly in
6186 * an attribute value (other than "&lt;") must not contain a <.
6187 *
6188 * [ VC: Attribute Value Type ]
6189 * The attribute must have been declared; the value must be of the type
6190 * declared for it.
6191 *
6192 * [25] Eq ::= S? '=' S?
6193 *
6194 * With namespace:
6195 *
6196 * [NS 11] Attribute ::= QName Eq AttValue
6197 *
6198 * Also the case QName == xmlns:??? is handled independently as a namespace
6199 * definition.
6200 *
6201 * Returns the attribute name, and the value in *value.
6202 */
6203
6204xmlChar *
6205xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6206 xmlChar *name, *val;
6207
6208 *value = NULL;
6209 name = xmlParseName(ctxt);
6210 if (name == NULL) {
6211 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6213 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6214 ctxt->wellFormed = 0;
6215 ctxt->disableSAX = 1;
6216 return(NULL);
6217 }
6218
6219 /*
6220 * read the value
6221 */
6222 SKIP_BLANKS;
6223 if (RAW == '=') {
6224 NEXT;
6225 SKIP_BLANKS;
6226 val = xmlParseAttValue(ctxt);
6227 ctxt->instate = XML_PARSER_CONTENT;
6228 } else {
6229 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6231 ctxt->sax->error(ctxt->userData,
6232 "Specification mandate value for attribute %s\n", name);
6233 ctxt->wellFormed = 0;
6234 ctxt->disableSAX = 1;
6235 xmlFree(name);
6236 return(NULL);
6237 }
6238
6239 /*
6240 * Check that xml:lang conforms to the specification
6241 * No more registered as an error, just generate a warning now
6242 * since this was deprecated in XML second edition
6243 */
6244 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6245 if (!xmlCheckLanguageID(val)) {
6246 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6247 ctxt->sax->warning(ctxt->userData,
6248 "Malformed value for xml:lang : %s\n", val);
6249 }
6250 }
6251
6252 /*
6253 * Check that xml:space conforms to the specification
6254 */
6255 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6256 if (xmlStrEqual(val, BAD_CAST "default"))
6257 *(ctxt->space) = 0;
6258 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6259 *(ctxt->space) = 1;
6260 else {
6261 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6262 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6263 ctxt->sax->error(ctxt->userData,
6264"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6265 val);
6266 ctxt->wellFormed = 0;
6267 ctxt->disableSAX = 1;
6268 }
6269 }
6270
6271 *value = val;
6272 return(name);
6273}
6274
6275/**
6276 * xmlParseStartTag:
6277 * @ctxt: an XML parser context
6278 *
6279 * parse a start of tag either for rule element or
6280 * EmptyElement. In both case we don't parse the tag closing chars.
6281 *
6282 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6283 *
6284 * [ WFC: Unique Att Spec ]
6285 * No attribute name may appear more than once in the same start-tag or
6286 * empty-element tag.
6287 *
6288 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6289 *
6290 * [ WFC: Unique Att Spec ]
6291 * No attribute name may appear more than once in the same start-tag or
6292 * empty-element tag.
6293 *
6294 * With namespace:
6295 *
6296 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6297 *
6298 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6299 *
6300 * Returns the element name parsed
6301 */
6302
6303xmlChar *
6304xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6305 xmlChar *name;
6306 xmlChar *attname;
6307 xmlChar *attvalue;
6308 const xmlChar **atts = NULL;
6309 int nbatts = 0;
6310 int maxatts = 0;
6311 int i;
6312
6313 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006314 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006315
6316 name = xmlParseName(ctxt);
6317 if (name == NULL) {
6318 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6320 ctxt->sax->error(ctxt->userData,
6321 "xmlParseStartTag: invalid element name\n");
6322 ctxt->wellFormed = 0;
6323 ctxt->disableSAX = 1;
6324 return(NULL);
6325 }
6326
6327 /*
6328 * Now parse the attributes, it ends up with the ending
6329 *
6330 * (S Attribute)* S?
6331 */
6332 SKIP_BLANKS;
6333 GROW;
6334
Daniel Veillard21a0f912001-02-25 19:54:14 +00006335 while ((RAW != '>') &&
6336 ((RAW != '/') || (NXT(1) != '>')) &&
6337 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006338 const xmlChar *q = CUR_PTR;
6339 int cons = ctxt->input->consumed;
6340
6341 attname = xmlParseAttribute(ctxt, &attvalue);
6342 if ((attname != NULL) && (attvalue != NULL)) {
6343 /*
6344 * [ WFC: Unique Att Spec ]
6345 * No attribute name may appear more than once in the same
6346 * start-tag or empty-element tag.
6347 */
6348 for (i = 0; i < nbatts;i += 2) {
6349 if (xmlStrEqual(atts[i], attname)) {
6350 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6352 ctxt->sax->error(ctxt->userData,
6353 "Attribute %s redefined\n",
6354 attname);
6355 ctxt->wellFormed = 0;
6356 ctxt->disableSAX = 1;
6357 xmlFree(attname);
6358 xmlFree(attvalue);
6359 goto failed;
6360 }
6361 }
6362
6363 /*
6364 * Add the pair to atts
6365 */
6366 if (atts == NULL) {
6367 maxatts = 10;
6368 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6369 if (atts == NULL) {
6370 xmlGenericError(xmlGenericErrorContext,
6371 "malloc of %ld byte failed\n",
6372 maxatts * (long)sizeof(xmlChar *));
6373 return(NULL);
6374 }
6375 } else if (nbatts + 4 > maxatts) {
6376 maxatts *= 2;
6377 atts = (const xmlChar **) xmlRealloc((void *) atts,
6378 maxatts * sizeof(xmlChar *));
6379 if (atts == NULL) {
6380 xmlGenericError(xmlGenericErrorContext,
6381 "realloc of %ld byte failed\n",
6382 maxatts * (long)sizeof(xmlChar *));
6383 return(NULL);
6384 }
6385 }
6386 atts[nbatts++] = attname;
6387 atts[nbatts++] = attvalue;
6388 atts[nbatts] = NULL;
6389 atts[nbatts + 1] = NULL;
6390 } else {
6391 if (attname != NULL)
6392 xmlFree(attname);
6393 if (attvalue != NULL)
6394 xmlFree(attvalue);
6395 }
6396
6397failed:
6398
6399 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6400 break;
6401 if (!IS_BLANK(RAW)) {
6402 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6404 ctxt->sax->error(ctxt->userData,
6405 "attributes construct error\n");
6406 ctxt->wellFormed = 0;
6407 ctxt->disableSAX = 1;
6408 }
6409 SKIP_BLANKS;
6410 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6411 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6413 ctxt->sax->error(ctxt->userData,
6414 "xmlParseStartTag: problem parsing attributes\n");
6415 ctxt->wellFormed = 0;
6416 ctxt->disableSAX = 1;
6417 break;
6418 }
6419 GROW;
6420 }
6421
6422 /*
6423 * SAX: Start of Element !
6424 */
6425 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6426 (!ctxt->disableSAX))
6427 ctxt->sax->startElement(ctxt->userData, name, atts);
6428
6429 if (atts != NULL) {
6430 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6431 xmlFree((void *) atts);
6432 }
6433 return(name);
6434}
6435
6436/**
6437 * xmlParseEndTag:
6438 * @ctxt: an XML parser context
6439 *
6440 * parse an end of tag
6441 *
6442 * [42] ETag ::= '</' Name S? '>'
6443 *
6444 * With namespace
6445 *
6446 * [NS 9] ETag ::= '</' QName S? '>'
6447 */
6448
6449void
6450xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6451 xmlChar *name;
6452 xmlChar *oldname;
6453
6454 GROW;
6455 if ((RAW != '<') || (NXT(1) != '/')) {
6456 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6457 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6458 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6459 ctxt->wellFormed = 0;
6460 ctxt->disableSAX = 1;
6461 return;
6462 }
6463 SKIP(2);
6464
6465 name = xmlParseName(ctxt);
6466
6467 /*
6468 * We should definitely be at the ending "S? '>'" part
6469 */
6470 GROW;
6471 SKIP_BLANKS;
6472 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6473 ctxt->errNo = XML_ERR_GT_REQUIRED;
6474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6475 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6476 ctxt->wellFormed = 0;
6477 ctxt->disableSAX = 1;
6478 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006479 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006480
6481 /*
6482 * [ WFC: Element Type Match ]
6483 * The Name in an element's end-tag must match the element type in the
6484 * start-tag.
6485 *
6486 */
6487 if ((name == NULL) || (ctxt->name == NULL) ||
6488 (!xmlStrEqual(name, ctxt->name))) {
6489 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6491 if ((name != NULL) && (ctxt->name != NULL)) {
6492 ctxt->sax->error(ctxt->userData,
6493 "Opening and ending tag mismatch: %s and %s\n",
6494 ctxt->name, name);
6495 } else if (ctxt->name != NULL) {
6496 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006497 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006498 } else {
6499 ctxt->sax->error(ctxt->userData,
6500 "Ending tag error: internal error ???\n");
6501 }
6502
6503 }
6504 ctxt->wellFormed = 0;
6505 ctxt->disableSAX = 1;
6506 }
6507
6508 /*
6509 * SAX: End of Tag
6510 */
6511 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6512 (!ctxt->disableSAX))
6513 ctxt->sax->endElement(ctxt->userData, name);
6514
6515 if (name != NULL)
6516 xmlFree(name);
6517 oldname = namePop(ctxt);
6518 spacePop(ctxt);
6519 if (oldname != NULL) {
6520#ifdef DEBUG_STACK
6521 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6522#endif
6523 xmlFree(oldname);
6524 }
6525 return;
6526}
6527
6528/**
6529 * xmlParseCDSect:
6530 * @ctxt: an XML parser context
6531 *
6532 * Parse escaped pure raw content.
6533 *
6534 * [18] CDSect ::= CDStart CData CDEnd
6535 *
6536 * [19] CDStart ::= '<![CDATA['
6537 *
6538 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6539 *
6540 * [21] CDEnd ::= ']]>'
6541 */
6542void
6543xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6544 xmlChar *buf = NULL;
6545 int len = 0;
6546 int size = XML_PARSER_BUFFER_SIZE;
6547 int r, rl;
6548 int s, sl;
6549 int cur, l;
6550 int count = 0;
6551
6552 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6553 (NXT(2) == '[') && (NXT(3) == 'C') &&
6554 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6555 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6556 (NXT(8) == '[')) {
6557 SKIP(9);
6558 } else
6559 return;
6560
6561 ctxt->instate = XML_PARSER_CDATA_SECTION;
6562 r = CUR_CHAR(rl);
6563 if (!IS_CHAR(r)) {
6564 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6566 ctxt->sax->error(ctxt->userData,
6567 "CData section not finished\n");
6568 ctxt->wellFormed = 0;
6569 ctxt->disableSAX = 1;
6570 ctxt->instate = XML_PARSER_CONTENT;
6571 return;
6572 }
6573 NEXTL(rl);
6574 s = CUR_CHAR(sl);
6575 if (!IS_CHAR(s)) {
6576 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6578 ctxt->sax->error(ctxt->userData,
6579 "CData section not finished\n");
6580 ctxt->wellFormed = 0;
6581 ctxt->disableSAX = 1;
6582 ctxt->instate = XML_PARSER_CONTENT;
6583 return;
6584 }
6585 NEXTL(sl);
6586 cur = CUR_CHAR(l);
6587 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6588 if (buf == NULL) {
6589 xmlGenericError(xmlGenericErrorContext,
6590 "malloc of %d byte failed\n", size);
6591 return;
6592 }
6593 while (IS_CHAR(cur) &&
6594 ((r != ']') || (s != ']') || (cur != '>'))) {
6595 if (len + 5 >= size) {
6596 size *= 2;
6597 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6598 if (buf == NULL) {
6599 xmlGenericError(xmlGenericErrorContext,
6600 "realloc of %d byte failed\n", size);
6601 return;
6602 }
6603 }
6604 COPY_BUF(rl,buf,len,r);
6605 r = s;
6606 rl = sl;
6607 s = cur;
6608 sl = l;
6609 count++;
6610 if (count > 50) {
6611 GROW;
6612 count = 0;
6613 }
6614 NEXTL(l);
6615 cur = CUR_CHAR(l);
6616 }
6617 buf[len] = 0;
6618 ctxt->instate = XML_PARSER_CONTENT;
6619 if (cur != '>') {
6620 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6622 ctxt->sax->error(ctxt->userData,
6623 "CData section not finished\n%.50s\n", buf);
6624 ctxt->wellFormed = 0;
6625 ctxt->disableSAX = 1;
6626 xmlFree(buf);
6627 return;
6628 }
6629 NEXTL(l);
6630
6631 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006632 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006633 */
6634 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6635 if (ctxt->sax->cdataBlock != NULL)
6636 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006637 else if (ctxt->sax->characters != NULL)
6638 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006639 }
6640 xmlFree(buf);
6641}
6642
6643/**
6644 * xmlParseContent:
6645 * @ctxt: an XML parser context
6646 *
6647 * Parse a content:
6648 *
6649 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6650 */
6651
6652void
6653xmlParseContent(xmlParserCtxtPtr ctxt) {
6654 GROW;
6655 while (((RAW != 0) || (ctxt->token != 0)) &&
6656 ((RAW != '<') || (NXT(1) != '/'))) {
6657 const xmlChar *test = CUR_PTR;
6658 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006659 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006660 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006661
6662 /*
6663 * Handle possible processed charrefs.
6664 */
6665 if (ctxt->token != 0) {
6666 xmlParseCharData(ctxt, 0);
6667 }
6668 /*
6669 * First case : a Processing Instruction.
6670 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006671 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006672 xmlParsePI(ctxt);
6673 }
6674
6675 /*
6676 * Second case : a CDSection
6677 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006678 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006679 (NXT(2) == '[') && (NXT(3) == 'C') &&
6680 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6681 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6682 (NXT(8) == '[')) {
6683 xmlParseCDSect(ctxt);
6684 }
6685
6686 /*
6687 * Third case : a comment
6688 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006689 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006690 (NXT(2) == '-') && (NXT(3) == '-')) {
6691 xmlParseComment(ctxt);
6692 ctxt->instate = XML_PARSER_CONTENT;
6693 }
6694
6695 /*
6696 * Fourth case : a sub-element.
6697 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006698 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006699 xmlParseElement(ctxt);
6700 }
6701
6702 /*
6703 * Fifth case : a reference. If if has not been resolved,
6704 * parsing returns it's Name, create the node
6705 */
6706
Daniel Veillard21a0f912001-02-25 19:54:14 +00006707 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006708 xmlParseReference(ctxt);
6709 }
6710
6711 /*
6712 * Last case, text. Note that References are handled directly.
6713 */
6714 else {
6715 xmlParseCharData(ctxt, 0);
6716 }
6717
6718 GROW;
6719 /*
6720 * Pop-up of finished entities.
6721 */
6722 while ((RAW == 0) && (ctxt->inputNr > 1))
6723 xmlPopInput(ctxt);
6724 SHRINK;
6725
6726 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6727 (tok == ctxt->token)) {
6728 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6730 ctxt->sax->error(ctxt->userData,
6731 "detected an error in element content\n");
6732 ctxt->wellFormed = 0;
6733 ctxt->disableSAX = 1;
6734 ctxt->instate = XML_PARSER_EOF;
6735 break;
6736 }
6737 }
6738}
6739
6740/**
6741 * xmlParseElement:
6742 * @ctxt: an XML parser context
6743 *
6744 * parse an XML element, this is highly recursive
6745 *
6746 * [39] element ::= EmptyElemTag | STag content ETag
6747 *
6748 * [ WFC: Element Type Match ]
6749 * The Name in an element's end-tag must match the element type in the
6750 * start-tag.
6751 *
6752 * [ VC: Element Valid ]
6753 * An element is valid if there is a declaration matching elementdecl
6754 * where the Name matches the element type and one of the following holds:
6755 * - The declaration matches EMPTY and the element has no content.
6756 * - The declaration matches children and the sequence of child elements
6757 * belongs to the language generated by the regular expression in the
6758 * content model, with optional white space (characters matching the
6759 * nonterminal S) between each pair of child elements.
6760 * - The declaration matches Mixed and the content consists of character
6761 * data and child elements whose types match names in the content model.
6762 * - The declaration matches ANY, and the types of any child elements have
6763 * been declared.
6764 */
6765
6766void
6767xmlParseElement(xmlParserCtxtPtr ctxt) {
6768 const xmlChar *openTag = CUR_PTR;
6769 xmlChar *name;
6770 xmlChar *oldname;
6771 xmlParserNodeInfo node_info;
6772 xmlNodePtr ret;
6773
6774 /* Capture start position */
6775 if (ctxt->record_info) {
6776 node_info.begin_pos = ctxt->input->consumed +
6777 (CUR_PTR - ctxt->input->base);
6778 node_info.begin_line = ctxt->input->line;
6779 }
6780
6781 if (ctxt->spaceNr == 0)
6782 spacePush(ctxt, -1);
6783 else
6784 spacePush(ctxt, *ctxt->space);
6785
6786 name = xmlParseStartTag(ctxt);
6787 if (name == NULL) {
6788 spacePop(ctxt);
6789 return;
6790 }
6791 namePush(ctxt, name);
6792 ret = ctxt->node;
6793
6794 /*
6795 * [ VC: Root Element Type ]
6796 * The Name in the document type declaration must match the element
6797 * type of the root element.
6798 */
6799 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6800 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6801 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6802
6803 /*
6804 * Check for an Empty Element.
6805 */
6806 if ((RAW == '/') && (NXT(1) == '>')) {
6807 SKIP(2);
6808 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6809 (!ctxt->disableSAX))
6810 ctxt->sax->endElement(ctxt->userData, name);
6811 oldname = namePop(ctxt);
6812 spacePop(ctxt);
6813 if (oldname != NULL) {
6814#ifdef DEBUG_STACK
6815 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6816#endif
6817 xmlFree(oldname);
6818 }
6819 if ( ret != NULL && ctxt->record_info ) {
6820 node_info.end_pos = ctxt->input->consumed +
6821 (CUR_PTR - ctxt->input->base);
6822 node_info.end_line = ctxt->input->line;
6823 node_info.node = ret;
6824 xmlParserAddNodeInfo(ctxt, &node_info);
6825 }
6826 return;
6827 }
6828 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006829 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006830 } else {
6831 ctxt->errNo = XML_ERR_GT_REQUIRED;
6832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6833 ctxt->sax->error(ctxt->userData,
6834 "Couldn't find end of Start Tag\n%.30s\n",
6835 openTag);
6836 ctxt->wellFormed = 0;
6837 ctxt->disableSAX = 1;
6838
6839 /*
6840 * end of parsing of this node.
6841 */
6842 nodePop(ctxt);
6843 oldname = namePop(ctxt);
6844 spacePop(ctxt);
6845 if (oldname != NULL) {
6846#ifdef DEBUG_STACK
6847 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6848#endif
6849 xmlFree(oldname);
6850 }
6851
6852 /*
6853 * Capture end position and add node
6854 */
6855 if ( ret != NULL && ctxt->record_info ) {
6856 node_info.end_pos = ctxt->input->consumed +
6857 (CUR_PTR - ctxt->input->base);
6858 node_info.end_line = ctxt->input->line;
6859 node_info.node = ret;
6860 xmlParserAddNodeInfo(ctxt, &node_info);
6861 }
6862 return;
6863 }
6864
6865 /*
6866 * Parse the content of the element:
6867 */
6868 xmlParseContent(ctxt);
6869 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00006870 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00006871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6872 ctxt->sax->error(ctxt->userData,
6873 "Premature end of data in tag %.30s\n", openTag);
6874 ctxt->wellFormed = 0;
6875 ctxt->disableSAX = 1;
6876
6877 /*
6878 * end of parsing of this node.
6879 */
6880 nodePop(ctxt);
6881 oldname = namePop(ctxt);
6882 spacePop(ctxt);
6883 if (oldname != NULL) {
6884#ifdef DEBUG_STACK
6885 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6886#endif
6887 xmlFree(oldname);
6888 }
6889 return;
6890 }
6891
6892 /*
6893 * parse the end of tag: '</' should be here.
6894 */
6895 xmlParseEndTag(ctxt);
6896
6897 /*
6898 * Capture end position and add node
6899 */
6900 if ( ret != NULL && ctxt->record_info ) {
6901 node_info.end_pos = ctxt->input->consumed +
6902 (CUR_PTR - ctxt->input->base);
6903 node_info.end_line = ctxt->input->line;
6904 node_info.node = ret;
6905 xmlParserAddNodeInfo(ctxt, &node_info);
6906 }
6907}
6908
6909/**
6910 * xmlParseVersionNum:
6911 * @ctxt: an XML parser context
6912 *
6913 * parse the XML version value.
6914 *
6915 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6916 *
6917 * Returns the string giving the XML version number, or NULL
6918 */
6919xmlChar *
6920xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6921 xmlChar *buf = NULL;
6922 int len = 0;
6923 int size = 10;
6924 xmlChar cur;
6925
6926 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6927 if (buf == NULL) {
6928 xmlGenericError(xmlGenericErrorContext,
6929 "malloc of %d byte failed\n", size);
6930 return(NULL);
6931 }
6932 cur = CUR;
6933 while (((cur >= 'a') && (cur <= 'z')) ||
6934 ((cur >= 'A') && (cur <= 'Z')) ||
6935 ((cur >= '0') && (cur <= '9')) ||
6936 (cur == '_') || (cur == '.') ||
6937 (cur == ':') || (cur == '-')) {
6938 if (len + 1 >= size) {
6939 size *= 2;
6940 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6941 if (buf == NULL) {
6942 xmlGenericError(xmlGenericErrorContext,
6943 "realloc of %d byte failed\n", size);
6944 return(NULL);
6945 }
6946 }
6947 buf[len++] = cur;
6948 NEXT;
6949 cur=CUR;
6950 }
6951 buf[len] = 0;
6952 return(buf);
6953}
6954
6955/**
6956 * xmlParseVersionInfo:
6957 * @ctxt: an XML parser context
6958 *
6959 * parse the XML version.
6960 *
6961 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6962 *
6963 * [25] Eq ::= S? '=' S?
6964 *
6965 * Returns the version string, e.g. "1.0"
6966 */
6967
6968xmlChar *
6969xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6970 xmlChar *version = NULL;
6971 const xmlChar *q;
6972
6973 if ((RAW == 'v') && (NXT(1) == 'e') &&
6974 (NXT(2) == 'r') && (NXT(3) == 's') &&
6975 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6976 (NXT(6) == 'n')) {
6977 SKIP(7);
6978 SKIP_BLANKS;
6979 if (RAW != '=') {
6980 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6981 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6982 ctxt->sax->error(ctxt->userData,
6983 "xmlParseVersionInfo : expected '='\n");
6984 ctxt->wellFormed = 0;
6985 ctxt->disableSAX = 1;
6986 return(NULL);
6987 }
6988 NEXT;
6989 SKIP_BLANKS;
6990 if (RAW == '"') {
6991 NEXT;
6992 q = CUR_PTR;
6993 version = xmlParseVersionNum(ctxt);
6994 if (RAW != '"') {
6995 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6997 ctxt->sax->error(ctxt->userData,
6998 "String not closed\n%.50s\n", q);
6999 ctxt->wellFormed = 0;
7000 ctxt->disableSAX = 1;
7001 } else
7002 NEXT;
7003 } else if (RAW == '\''){
7004 NEXT;
7005 q = CUR_PTR;
7006 version = xmlParseVersionNum(ctxt);
7007 if (RAW != '\'') {
7008 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7010 ctxt->sax->error(ctxt->userData,
7011 "String not closed\n%.50s\n", q);
7012 ctxt->wellFormed = 0;
7013 ctxt->disableSAX = 1;
7014 } else
7015 NEXT;
7016 } else {
7017 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7019 ctxt->sax->error(ctxt->userData,
7020 "xmlParseVersionInfo : expected ' or \"\n");
7021 ctxt->wellFormed = 0;
7022 ctxt->disableSAX = 1;
7023 }
7024 }
7025 return(version);
7026}
7027
7028/**
7029 * xmlParseEncName:
7030 * @ctxt: an XML parser context
7031 *
7032 * parse the XML encoding name
7033 *
7034 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7035 *
7036 * Returns the encoding name value or NULL
7037 */
7038xmlChar *
7039xmlParseEncName(xmlParserCtxtPtr ctxt) {
7040 xmlChar *buf = NULL;
7041 int len = 0;
7042 int size = 10;
7043 xmlChar cur;
7044
7045 cur = CUR;
7046 if (((cur >= 'a') && (cur <= 'z')) ||
7047 ((cur >= 'A') && (cur <= 'Z'))) {
7048 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7049 if (buf == NULL) {
7050 xmlGenericError(xmlGenericErrorContext,
7051 "malloc of %d byte failed\n", size);
7052 return(NULL);
7053 }
7054
7055 buf[len++] = cur;
7056 NEXT;
7057 cur = CUR;
7058 while (((cur >= 'a') && (cur <= 'z')) ||
7059 ((cur >= 'A') && (cur <= 'Z')) ||
7060 ((cur >= '0') && (cur <= '9')) ||
7061 (cur == '.') || (cur == '_') ||
7062 (cur == '-')) {
7063 if (len + 1 >= size) {
7064 size *= 2;
7065 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7066 if (buf == NULL) {
7067 xmlGenericError(xmlGenericErrorContext,
7068 "realloc of %d byte failed\n", size);
7069 return(NULL);
7070 }
7071 }
7072 buf[len++] = cur;
7073 NEXT;
7074 cur = CUR;
7075 if (cur == 0) {
7076 SHRINK;
7077 GROW;
7078 cur = CUR;
7079 }
7080 }
7081 buf[len] = 0;
7082 } else {
7083 ctxt->errNo = XML_ERR_ENCODING_NAME;
7084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7085 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7086 ctxt->wellFormed = 0;
7087 ctxt->disableSAX = 1;
7088 }
7089 return(buf);
7090}
7091
7092/**
7093 * xmlParseEncodingDecl:
7094 * @ctxt: an XML parser context
7095 *
7096 * parse the XML encoding declaration
7097 *
7098 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7099 *
7100 * this setups the conversion filters.
7101 *
7102 * Returns the encoding value or NULL
7103 */
7104
7105xmlChar *
7106xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7107 xmlChar *encoding = NULL;
7108 const xmlChar *q;
7109
7110 SKIP_BLANKS;
7111 if ((RAW == 'e') && (NXT(1) == 'n') &&
7112 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7113 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7114 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7115 SKIP(8);
7116 SKIP_BLANKS;
7117 if (RAW != '=') {
7118 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7119 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7120 ctxt->sax->error(ctxt->userData,
7121 "xmlParseEncodingDecl : expected '='\n");
7122 ctxt->wellFormed = 0;
7123 ctxt->disableSAX = 1;
7124 return(NULL);
7125 }
7126 NEXT;
7127 SKIP_BLANKS;
7128 if (RAW == '"') {
7129 NEXT;
7130 q = CUR_PTR;
7131 encoding = xmlParseEncName(ctxt);
7132 if (RAW != '"') {
7133 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7135 ctxt->sax->error(ctxt->userData,
7136 "String not closed\n%.50s\n", q);
7137 ctxt->wellFormed = 0;
7138 ctxt->disableSAX = 1;
7139 } else
7140 NEXT;
7141 } else if (RAW == '\''){
7142 NEXT;
7143 q = CUR_PTR;
7144 encoding = xmlParseEncName(ctxt);
7145 if (RAW != '\'') {
7146 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7147 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7148 ctxt->sax->error(ctxt->userData,
7149 "String not closed\n%.50s\n", q);
7150 ctxt->wellFormed = 0;
7151 ctxt->disableSAX = 1;
7152 } else
7153 NEXT;
7154 } else if (RAW == '"'){
7155 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7157 ctxt->sax->error(ctxt->userData,
7158 "xmlParseEncodingDecl : expected ' or \"\n");
7159 ctxt->wellFormed = 0;
7160 ctxt->disableSAX = 1;
7161 }
7162 if (encoding != NULL) {
7163 xmlCharEncoding enc;
7164 xmlCharEncodingHandlerPtr handler;
7165
7166 if (ctxt->input->encoding != NULL)
7167 xmlFree((xmlChar *) ctxt->input->encoding);
7168 ctxt->input->encoding = encoding;
7169
7170 enc = xmlParseCharEncoding((const char *) encoding);
7171 /*
7172 * registered set of known encodings
7173 */
7174 if (enc != XML_CHAR_ENCODING_ERROR) {
7175 xmlSwitchEncoding(ctxt, enc);
7176 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7177 xmlFree(encoding);
7178 return(NULL);
7179 }
7180 } else {
7181 /*
7182 * fallback for unknown encodings
7183 */
7184 handler = xmlFindCharEncodingHandler((const char *) encoding);
7185 if (handler != NULL) {
7186 xmlSwitchToEncoding(ctxt, handler);
7187 } else {
7188 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7190 ctxt->sax->error(ctxt->userData,
7191 "Unsupported encoding %s\n", encoding);
7192 return(NULL);
7193 }
7194 }
7195 }
7196 }
7197 return(encoding);
7198}
7199
7200/**
7201 * xmlParseSDDecl:
7202 * @ctxt: an XML parser context
7203 *
7204 * parse the XML standalone declaration
7205 *
7206 * [32] SDDecl ::= S 'standalone' Eq
7207 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7208 *
7209 * [ VC: Standalone Document Declaration ]
7210 * TODO The standalone document declaration must have the value "no"
7211 * if any external markup declarations contain declarations of:
7212 * - attributes with default values, if elements to which these
7213 * attributes apply appear in the document without specifications
7214 * of values for these attributes, or
7215 * - entities (other than amp, lt, gt, apos, quot), if references
7216 * to those entities appear in the document, or
7217 * - attributes with values subject to normalization, where the
7218 * attribute appears in the document with a value which will change
7219 * as a result of normalization, or
7220 * - element types with element content, if white space occurs directly
7221 * within any instance of those types.
7222 *
7223 * Returns 1 if standalone, 0 otherwise
7224 */
7225
7226int
7227xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7228 int standalone = -1;
7229
7230 SKIP_BLANKS;
7231 if ((RAW == 's') && (NXT(1) == 't') &&
7232 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7233 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7234 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7235 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7236 SKIP(10);
7237 SKIP_BLANKS;
7238 if (RAW != '=') {
7239 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7241 ctxt->sax->error(ctxt->userData,
7242 "XML standalone declaration : expected '='\n");
7243 ctxt->wellFormed = 0;
7244 ctxt->disableSAX = 1;
7245 return(standalone);
7246 }
7247 NEXT;
7248 SKIP_BLANKS;
7249 if (RAW == '\''){
7250 NEXT;
7251 if ((RAW == 'n') && (NXT(1) == 'o')) {
7252 standalone = 0;
7253 SKIP(2);
7254 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7255 (NXT(2) == 's')) {
7256 standalone = 1;
7257 SKIP(3);
7258 } else {
7259 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7260 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7261 ctxt->sax->error(ctxt->userData,
7262 "standalone accepts only 'yes' or 'no'\n");
7263 ctxt->wellFormed = 0;
7264 ctxt->disableSAX = 1;
7265 }
7266 if (RAW != '\'') {
7267 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7269 ctxt->sax->error(ctxt->userData, "String not closed\n");
7270 ctxt->wellFormed = 0;
7271 ctxt->disableSAX = 1;
7272 } else
7273 NEXT;
7274 } else if (RAW == '"'){
7275 NEXT;
7276 if ((RAW == 'n') && (NXT(1) == 'o')) {
7277 standalone = 0;
7278 SKIP(2);
7279 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7280 (NXT(2) == 's')) {
7281 standalone = 1;
7282 SKIP(3);
7283 } else {
7284 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7286 ctxt->sax->error(ctxt->userData,
7287 "standalone accepts only 'yes' or 'no'\n");
7288 ctxt->wellFormed = 0;
7289 ctxt->disableSAX = 1;
7290 }
7291 if (RAW != '"') {
7292 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7294 ctxt->sax->error(ctxt->userData, "String not closed\n");
7295 ctxt->wellFormed = 0;
7296 ctxt->disableSAX = 1;
7297 } else
7298 NEXT;
7299 } else {
7300 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7302 ctxt->sax->error(ctxt->userData,
7303 "Standalone value not found\n");
7304 ctxt->wellFormed = 0;
7305 ctxt->disableSAX = 1;
7306 }
7307 }
7308 return(standalone);
7309}
7310
7311/**
7312 * xmlParseXMLDecl:
7313 * @ctxt: an XML parser context
7314 *
7315 * parse an XML declaration header
7316 *
7317 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7318 */
7319
7320void
7321xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7322 xmlChar *version;
7323
7324 /*
7325 * We know that '<?xml' is here.
7326 */
7327 SKIP(5);
7328
7329 if (!IS_BLANK(RAW)) {
7330 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7332 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7333 ctxt->wellFormed = 0;
7334 ctxt->disableSAX = 1;
7335 }
7336 SKIP_BLANKS;
7337
7338 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007339 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007340 */
7341 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007342 if (version == NULL) {
7343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7344 ctxt->sax->error(ctxt->userData,
7345 "Malformed declaration expecting version\n");
7346 ctxt->wellFormed = 0;
7347 ctxt->disableSAX = 1;
7348 } else {
7349 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7350 /*
7351 * TODO: Blueberry should be detected here
7352 */
7353 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7354 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7355 version);
7356 }
7357 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007358 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007359 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007360 }
Owen Taylor3473f882001-02-23 17:55:21 +00007361
7362 /*
7363 * We may have the encoding declaration
7364 */
7365 if (!IS_BLANK(RAW)) {
7366 if ((RAW == '?') && (NXT(1) == '>')) {
7367 SKIP(2);
7368 return;
7369 }
7370 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7372 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7373 ctxt->wellFormed = 0;
7374 ctxt->disableSAX = 1;
7375 }
7376 xmlParseEncodingDecl(ctxt);
7377 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7378 /*
7379 * The XML REC instructs us to stop parsing right here
7380 */
7381 return;
7382 }
7383
7384 /*
7385 * We may have the standalone status.
7386 */
7387 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7388 if ((RAW == '?') && (NXT(1) == '>')) {
7389 SKIP(2);
7390 return;
7391 }
7392 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7394 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7395 ctxt->wellFormed = 0;
7396 ctxt->disableSAX = 1;
7397 }
7398 SKIP_BLANKS;
7399 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7400
7401 SKIP_BLANKS;
7402 if ((RAW == '?') && (NXT(1) == '>')) {
7403 SKIP(2);
7404 } else if (RAW == '>') {
7405 /* Deprecated old WD ... */
7406 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7408 ctxt->sax->error(ctxt->userData,
7409 "XML declaration must end-up with '?>'\n");
7410 ctxt->wellFormed = 0;
7411 ctxt->disableSAX = 1;
7412 NEXT;
7413 } else {
7414 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7416 ctxt->sax->error(ctxt->userData,
7417 "parsing XML declaration: '?>' expected\n");
7418 ctxt->wellFormed = 0;
7419 ctxt->disableSAX = 1;
7420 MOVETO_ENDTAG(CUR_PTR);
7421 NEXT;
7422 }
7423}
7424
7425/**
7426 * xmlParseMisc:
7427 * @ctxt: an XML parser context
7428 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007429 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007430 *
7431 * [27] Misc ::= Comment | PI | S
7432 */
7433
7434void
7435xmlParseMisc(xmlParserCtxtPtr ctxt) {
7436 while (((RAW == '<') && (NXT(1) == '?')) ||
7437 ((RAW == '<') && (NXT(1) == '!') &&
7438 (NXT(2) == '-') && (NXT(3) == '-')) ||
7439 IS_BLANK(CUR)) {
7440 if ((RAW == '<') && (NXT(1) == '?')) {
7441 xmlParsePI(ctxt);
7442 } else if (IS_BLANK(CUR)) {
7443 NEXT;
7444 } else
7445 xmlParseComment(ctxt);
7446 }
7447}
7448
7449/**
7450 * xmlParseDocument:
7451 * @ctxt: an XML parser context
7452 *
7453 * parse an XML document (and build a tree if using the standard SAX
7454 * interface).
7455 *
7456 * [1] document ::= prolog element Misc*
7457 *
7458 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7459 *
7460 * Returns 0, -1 in case of error. the parser context is augmented
7461 * as a result of the parsing.
7462 */
7463
7464int
7465xmlParseDocument(xmlParserCtxtPtr ctxt) {
7466 xmlChar start[4];
7467 xmlCharEncoding enc;
7468
7469 xmlInitParser();
7470
7471 GROW;
7472
7473 /*
7474 * SAX: beginning of the document processing.
7475 */
7476 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7477 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7478
Daniel Veillard50f34372001-08-03 12:06:36 +00007479 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007480 /*
7481 * Get the 4 first bytes and decode the charset
7482 * if enc != XML_CHAR_ENCODING_NONE
7483 * plug some encoding conversion routines.
7484 */
7485 start[0] = RAW;
7486 start[1] = NXT(1);
7487 start[2] = NXT(2);
7488 start[3] = NXT(3);
7489 enc = xmlDetectCharEncoding(start, 4);
7490 if (enc != XML_CHAR_ENCODING_NONE) {
7491 xmlSwitchEncoding(ctxt, enc);
7492 }
Owen Taylor3473f882001-02-23 17:55:21 +00007493 }
7494
7495
7496 if (CUR == 0) {
7497 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7499 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7500 ctxt->wellFormed = 0;
7501 ctxt->disableSAX = 1;
7502 }
7503
7504 /*
7505 * Check for the XMLDecl in the Prolog.
7506 */
7507 GROW;
7508 if ((RAW == '<') && (NXT(1) == '?') &&
7509 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7510 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7511
7512 /*
7513 * Note that we will switch encoding on the fly.
7514 */
7515 xmlParseXMLDecl(ctxt);
7516 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7517 /*
7518 * The XML REC instructs us to stop parsing right here
7519 */
7520 return(-1);
7521 }
7522 ctxt->standalone = ctxt->input->standalone;
7523 SKIP_BLANKS;
7524 } else {
7525 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7526 }
7527 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7528 ctxt->sax->startDocument(ctxt->userData);
7529
7530 /*
7531 * The Misc part of the Prolog
7532 */
7533 GROW;
7534 xmlParseMisc(ctxt);
7535
7536 /*
7537 * Then possibly doc type declaration(s) and more Misc
7538 * (doctypedecl Misc*)?
7539 */
7540 GROW;
7541 if ((RAW == '<') && (NXT(1) == '!') &&
7542 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7543 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7544 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7545 (NXT(8) == 'E')) {
7546
7547 ctxt->inSubset = 1;
7548 xmlParseDocTypeDecl(ctxt);
7549 if (RAW == '[') {
7550 ctxt->instate = XML_PARSER_DTD;
7551 xmlParseInternalSubset(ctxt);
7552 }
7553
7554 /*
7555 * Create and update the external subset.
7556 */
7557 ctxt->inSubset = 2;
7558 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7559 (!ctxt->disableSAX))
7560 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7561 ctxt->extSubSystem, ctxt->extSubURI);
7562 ctxt->inSubset = 0;
7563
7564
7565 ctxt->instate = XML_PARSER_PROLOG;
7566 xmlParseMisc(ctxt);
7567 }
7568
7569 /*
7570 * Time to start parsing the tree itself
7571 */
7572 GROW;
7573 if (RAW != '<') {
7574 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7576 ctxt->sax->error(ctxt->userData,
7577 "Start tag expected, '<' not found\n");
7578 ctxt->wellFormed = 0;
7579 ctxt->disableSAX = 1;
7580 ctxt->instate = XML_PARSER_EOF;
7581 } else {
7582 ctxt->instate = XML_PARSER_CONTENT;
7583 xmlParseElement(ctxt);
7584 ctxt->instate = XML_PARSER_EPILOG;
7585
7586
7587 /*
7588 * The Misc part at the end
7589 */
7590 xmlParseMisc(ctxt);
7591
7592 if (RAW != 0) {
7593 ctxt->errNo = XML_ERR_DOCUMENT_END;
7594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7595 ctxt->sax->error(ctxt->userData,
7596 "Extra content at the end of the document\n");
7597 ctxt->wellFormed = 0;
7598 ctxt->disableSAX = 1;
7599 }
7600 ctxt->instate = XML_PARSER_EOF;
7601 }
7602
7603 /*
7604 * SAX: end of the document processing.
7605 */
7606 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7607 (!ctxt->disableSAX))
7608 ctxt->sax->endDocument(ctxt->userData);
7609
7610 if (! ctxt->wellFormed) return(-1);
7611 return(0);
7612}
7613
7614/**
7615 * xmlParseExtParsedEnt:
7616 * @ctxt: an XML parser context
7617 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007618 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007619 * An external general parsed entity is well-formed if it matches the
7620 * production labeled extParsedEnt.
7621 *
7622 * [78] extParsedEnt ::= TextDecl? content
7623 *
7624 * Returns 0, -1 in case of error. the parser context is augmented
7625 * as a result of the parsing.
7626 */
7627
7628int
7629xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7630 xmlChar start[4];
7631 xmlCharEncoding enc;
7632
7633 xmlDefaultSAXHandlerInit();
7634
7635 GROW;
7636
7637 /*
7638 * SAX: beginning of the document processing.
7639 */
7640 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7641 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7642
7643 /*
7644 * Get the 4 first bytes and decode the charset
7645 * if enc != XML_CHAR_ENCODING_NONE
7646 * plug some encoding conversion routines.
7647 */
7648 start[0] = RAW;
7649 start[1] = NXT(1);
7650 start[2] = NXT(2);
7651 start[3] = NXT(3);
7652 enc = xmlDetectCharEncoding(start, 4);
7653 if (enc != XML_CHAR_ENCODING_NONE) {
7654 xmlSwitchEncoding(ctxt, enc);
7655 }
7656
7657
7658 if (CUR == 0) {
7659 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7661 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7662 ctxt->wellFormed = 0;
7663 ctxt->disableSAX = 1;
7664 }
7665
7666 /*
7667 * Check for the XMLDecl in the Prolog.
7668 */
7669 GROW;
7670 if ((RAW == '<') && (NXT(1) == '?') &&
7671 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7672 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7673
7674 /*
7675 * Note that we will switch encoding on the fly.
7676 */
7677 xmlParseXMLDecl(ctxt);
7678 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7679 /*
7680 * The XML REC instructs us to stop parsing right here
7681 */
7682 return(-1);
7683 }
7684 SKIP_BLANKS;
7685 } else {
7686 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7687 }
7688 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7689 ctxt->sax->startDocument(ctxt->userData);
7690
7691 /*
7692 * Doing validity checking on chunk doesn't make sense
7693 */
7694 ctxt->instate = XML_PARSER_CONTENT;
7695 ctxt->validate = 0;
7696 ctxt->loadsubset = 0;
7697 ctxt->depth = 0;
7698
7699 xmlParseContent(ctxt);
7700
7701 if ((RAW == '<') && (NXT(1) == '/')) {
7702 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7704 ctxt->sax->error(ctxt->userData,
7705 "chunk is not well balanced\n");
7706 ctxt->wellFormed = 0;
7707 ctxt->disableSAX = 1;
7708 } else if (RAW != 0) {
7709 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7711 ctxt->sax->error(ctxt->userData,
7712 "extra content at the end of well balanced chunk\n");
7713 ctxt->wellFormed = 0;
7714 ctxt->disableSAX = 1;
7715 }
7716
7717 /*
7718 * SAX: end of the document processing.
7719 */
7720 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7721 (!ctxt->disableSAX))
7722 ctxt->sax->endDocument(ctxt->userData);
7723
7724 if (! ctxt->wellFormed) return(-1);
7725 return(0);
7726}
7727
7728/************************************************************************
7729 * *
7730 * Progressive parsing interfaces *
7731 * *
7732 ************************************************************************/
7733
7734/**
7735 * xmlParseLookupSequence:
7736 * @ctxt: an XML parser context
7737 * @first: the first char to lookup
7738 * @next: the next char to lookup or zero
7739 * @third: the next char to lookup or zero
7740 *
7741 * Try to find if a sequence (first, next, third) or just (first next) or
7742 * (first) is available in the input stream.
7743 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7744 * to avoid rescanning sequences of bytes, it DOES change the state of the
7745 * parser, do not use liberally.
7746 *
7747 * Returns the index to the current parsing point if the full sequence
7748 * is available, -1 otherwise.
7749 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007750static int
Owen Taylor3473f882001-02-23 17:55:21 +00007751xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7752 xmlChar next, xmlChar third) {
7753 int base, len;
7754 xmlParserInputPtr in;
7755 const xmlChar *buf;
7756
7757 in = ctxt->input;
7758 if (in == NULL) return(-1);
7759 base = in->cur - in->base;
7760 if (base < 0) return(-1);
7761 if (ctxt->checkIndex > base)
7762 base = ctxt->checkIndex;
7763 if (in->buf == NULL) {
7764 buf = in->base;
7765 len = in->length;
7766 } else {
7767 buf = in->buf->buffer->content;
7768 len = in->buf->buffer->use;
7769 }
7770 /* take into account the sequence length */
7771 if (third) len -= 2;
7772 else if (next) len --;
7773 for (;base < len;base++) {
7774 if (buf[base] == first) {
7775 if (third != 0) {
7776 if ((buf[base + 1] != next) ||
7777 (buf[base + 2] != third)) continue;
7778 } else if (next != 0) {
7779 if (buf[base + 1] != next) continue;
7780 }
7781 ctxt->checkIndex = 0;
7782#ifdef DEBUG_PUSH
7783 if (next == 0)
7784 xmlGenericError(xmlGenericErrorContext,
7785 "PP: lookup '%c' found at %d\n",
7786 first, base);
7787 else if (third == 0)
7788 xmlGenericError(xmlGenericErrorContext,
7789 "PP: lookup '%c%c' found at %d\n",
7790 first, next, base);
7791 else
7792 xmlGenericError(xmlGenericErrorContext,
7793 "PP: lookup '%c%c%c' found at %d\n",
7794 first, next, third, base);
7795#endif
7796 return(base - (in->cur - in->base));
7797 }
7798 }
7799 ctxt->checkIndex = base;
7800#ifdef DEBUG_PUSH
7801 if (next == 0)
7802 xmlGenericError(xmlGenericErrorContext,
7803 "PP: lookup '%c' failed\n", first);
7804 else if (third == 0)
7805 xmlGenericError(xmlGenericErrorContext,
7806 "PP: lookup '%c%c' failed\n", first, next);
7807 else
7808 xmlGenericError(xmlGenericErrorContext,
7809 "PP: lookup '%c%c%c' failed\n", first, next, third);
7810#endif
7811 return(-1);
7812}
7813
7814/**
7815 * xmlParseTryOrFinish:
7816 * @ctxt: an XML parser context
7817 * @terminate: last chunk indicator
7818 *
7819 * Try to progress on parsing
7820 *
7821 * Returns zero if no parsing was possible
7822 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007823static int
Owen Taylor3473f882001-02-23 17:55:21 +00007824xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7825 int ret = 0;
7826 int avail;
7827 xmlChar cur, next;
7828
7829#ifdef DEBUG_PUSH
7830 switch (ctxt->instate) {
7831 case XML_PARSER_EOF:
7832 xmlGenericError(xmlGenericErrorContext,
7833 "PP: try EOF\n"); break;
7834 case XML_PARSER_START:
7835 xmlGenericError(xmlGenericErrorContext,
7836 "PP: try START\n"); break;
7837 case XML_PARSER_MISC:
7838 xmlGenericError(xmlGenericErrorContext,
7839 "PP: try MISC\n");break;
7840 case XML_PARSER_COMMENT:
7841 xmlGenericError(xmlGenericErrorContext,
7842 "PP: try COMMENT\n");break;
7843 case XML_PARSER_PROLOG:
7844 xmlGenericError(xmlGenericErrorContext,
7845 "PP: try PROLOG\n");break;
7846 case XML_PARSER_START_TAG:
7847 xmlGenericError(xmlGenericErrorContext,
7848 "PP: try START_TAG\n");break;
7849 case XML_PARSER_CONTENT:
7850 xmlGenericError(xmlGenericErrorContext,
7851 "PP: try CONTENT\n");break;
7852 case XML_PARSER_CDATA_SECTION:
7853 xmlGenericError(xmlGenericErrorContext,
7854 "PP: try CDATA_SECTION\n");break;
7855 case XML_PARSER_END_TAG:
7856 xmlGenericError(xmlGenericErrorContext,
7857 "PP: try END_TAG\n");break;
7858 case XML_PARSER_ENTITY_DECL:
7859 xmlGenericError(xmlGenericErrorContext,
7860 "PP: try ENTITY_DECL\n");break;
7861 case XML_PARSER_ENTITY_VALUE:
7862 xmlGenericError(xmlGenericErrorContext,
7863 "PP: try ENTITY_VALUE\n");break;
7864 case XML_PARSER_ATTRIBUTE_VALUE:
7865 xmlGenericError(xmlGenericErrorContext,
7866 "PP: try ATTRIBUTE_VALUE\n");break;
7867 case XML_PARSER_DTD:
7868 xmlGenericError(xmlGenericErrorContext,
7869 "PP: try DTD\n");break;
7870 case XML_PARSER_EPILOG:
7871 xmlGenericError(xmlGenericErrorContext,
7872 "PP: try EPILOG\n");break;
7873 case XML_PARSER_PI:
7874 xmlGenericError(xmlGenericErrorContext,
7875 "PP: try PI\n");break;
7876 case XML_PARSER_IGNORE:
7877 xmlGenericError(xmlGenericErrorContext,
7878 "PP: try IGNORE\n");break;
7879 }
7880#endif
7881
7882 while (1) {
7883 /*
7884 * Pop-up of finished entities.
7885 */
7886 while ((RAW == 0) && (ctxt->inputNr > 1))
7887 xmlPopInput(ctxt);
7888
7889 if (ctxt->input ==NULL) break;
7890 if (ctxt->input->buf == NULL)
7891 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7892 else
7893 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7894 if (avail < 1)
7895 goto done;
7896 switch (ctxt->instate) {
7897 case XML_PARSER_EOF:
7898 /*
7899 * Document parsing is done !
7900 */
7901 goto done;
7902 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007903 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7904 xmlChar start[4];
7905 xmlCharEncoding enc;
7906
7907 /*
7908 * Very first chars read from the document flow.
7909 */
7910 if (avail < 4)
7911 goto done;
7912
7913 /*
7914 * Get the 4 first bytes and decode the charset
7915 * if enc != XML_CHAR_ENCODING_NONE
7916 * plug some encoding conversion routines.
7917 */
7918 start[0] = RAW;
7919 start[1] = NXT(1);
7920 start[2] = NXT(2);
7921 start[3] = NXT(3);
7922 enc = xmlDetectCharEncoding(start, 4);
7923 if (enc != XML_CHAR_ENCODING_NONE) {
7924 xmlSwitchEncoding(ctxt, enc);
7925 }
7926 break;
7927 }
Owen Taylor3473f882001-02-23 17:55:21 +00007928
7929 cur = ctxt->input->cur[0];
7930 next = ctxt->input->cur[1];
7931 if (cur == 0) {
7932 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7933 ctxt->sax->setDocumentLocator(ctxt->userData,
7934 &xmlDefaultSAXLocator);
7935 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7937 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7938 ctxt->wellFormed = 0;
7939 ctxt->disableSAX = 1;
7940 ctxt->instate = XML_PARSER_EOF;
7941#ifdef DEBUG_PUSH
7942 xmlGenericError(xmlGenericErrorContext,
7943 "PP: entering EOF\n");
7944#endif
7945 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7946 ctxt->sax->endDocument(ctxt->userData);
7947 goto done;
7948 }
7949 if ((cur == '<') && (next == '?')) {
7950 /* PI or XML decl */
7951 if (avail < 5) return(ret);
7952 if ((!terminate) &&
7953 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7954 return(ret);
7955 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7956 ctxt->sax->setDocumentLocator(ctxt->userData,
7957 &xmlDefaultSAXLocator);
7958 if ((ctxt->input->cur[2] == 'x') &&
7959 (ctxt->input->cur[3] == 'm') &&
7960 (ctxt->input->cur[4] == 'l') &&
7961 (IS_BLANK(ctxt->input->cur[5]))) {
7962 ret += 5;
7963#ifdef DEBUG_PUSH
7964 xmlGenericError(xmlGenericErrorContext,
7965 "PP: Parsing XML Decl\n");
7966#endif
7967 xmlParseXMLDecl(ctxt);
7968 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7969 /*
7970 * The XML REC instructs us to stop parsing right
7971 * here
7972 */
7973 ctxt->instate = XML_PARSER_EOF;
7974 return(0);
7975 }
7976 ctxt->standalone = ctxt->input->standalone;
7977 if ((ctxt->encoding == NULL) &&
7978 (ctxt->input->encoding != NULL))
7979 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7980 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7981 (!ctxt->disableSAX))
7982 ctxt->sax->startDocument(ctxt->userData);
7983 ctxt->instate = XML_PARSER_MISC;
7984#ifdef DEBUG_PUSH
7985 xmlGenericError(xmlGenericErrorContext,
7986 "PP: entering MISC\n");
7987#endif
7988 } else {
7989 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7990 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7991 (!ctxt->disableSAX))
7992 ctxt->sax->startDocument(ctxt->userData);
7993 ctxt->instate = XML_PARSER_MISC;
7994#ifdef DEBUG_PUSH
7995 xmlGenericError(xmlGenericErrorContext,
7996 "PP: entering MISC\n");
7997#endif
7998 }
7999 } else {
8000 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8001 ctxt->sax->setDocumentLocator(ctxt->userData,
8002 &xmlDefaultSAXLocator);
8003 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8004 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8005 (!ctxt->disableSAX))
8006 ctxt->sax->startDocument(ctxt->userData);
8007 ctxt->instate = XML_PARSER_MISC;
8008#ifdef DEBUG_PUSH
8009 xmlGenericError(xmlGenericErrorContext,
8010 "PP: entering MISC\n");
8011#endif
8012 }
8013 break;
8014 case XML_PARSER_MISC:
8015 SKIP_BLANKS;
8016 if (ctxt->input->buf == NULL)
8017 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8018 else
8019 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8020 if (avail < 2)
8021 goto done;
8022 cur = ctxt->input->cur[0];
8023 next = ctxt->input->cur[1];
8024 if ((cur == '<') && (next == '?')) {
8025 if ((!terminate) &&
8026 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8027 goto done;
8028#ifdef DEBUG_PUSH
8029 xmlGenericError(xmlGenericErrorContext,
8030 "PP: Parsing PI\n");
8031#endif
8032 xmlParsePI(ctxt);
8033 } else if ((cur == '<') && (next == '!') &&
8034 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8035 if ((!terminate) &&
8036 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8037 goto done;
8038#ifdef DEBUG_PUSH
8039 xmlGenericError(xmlGenericErrorContext,
8040 "PP: Parsing Comment\n");
8041#endif
8042 xmlParseComment(ctxt);
8043 ctxt->instate = XML_PARSER_MISC;
8044 } else if ((cur == '<') && (next == '!') &&
8045 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8046 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8047 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8048 (ctxt->input->cur[8] == 'E')) {
8049 if ((!terminate) &&
8050 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8051 goto done;
8052#ifdef DEBUG_PUSH
8053 xmlGenericError(xmlGenericErrorContext,
8054 "PP: Parsing internal subset\n");
8055#endif
8056 ctxt->inSubset = 1;
8057 xmlParseDocTypeDecl(ctxt);
8058 if (RAW == '[') {
8059 ctxt->instate = XML_PARSER_DTD;
8060#ifdef DEBUG_PUSH
8061 xmlGenericError(xmlGenericErrorContext,
8062 "PP: entering DTD\n");
8063#endif
8064 } else {
8065 /*
8066 * Create and update the external subset.
8067 */
8068 ctxt->inSubset = 2;
8069 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8070 (ctxt->sax->externalSubset != NULL))
8071 ctxt->sax->externalSubset(ctxt->userData,
8072 ctxt->intSubName, ctxt->extSubSystem,
8073 ctxt->extSubURI);
8074 ctxt->inSubset = 0;
8075 ctxt->instate = XML_PARSER_PROLOG;
8076#ifdef DEBUG_PUSH
8077 xmlGenericError(xmlGenericErrorContext,
8078 "PP: entering PROLOG\n");
8079#endif
8080 }
8081 } else if ((cur == '<') && (next == '!') &&
8082 (avail < 9)) {
8083 goto done;
8084 } else {
8085 ctxt->instate = XML_PARSER_START_TAG;
8086#ifdef DEBUG_PUSH
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: entering START_TAG\n");
8089#endif
8090 }
8091 break;
8092 case XML_PARSER_IGNORE:
8093 xmlGenericError(xmlGenericErrorContext,
8094 "PP: internal error, state == IGNORE");
8095 ctxt->instate = XML_PARSER_DTD;
8096#ifdef DEBUG_PUSH
8097 xmlGenericError(xmlGenericErrorContext,
8098 "PP: entering DTD\n");
8099#endif
8100 break;
8101 case XML_PARSER_PROLOG:
8102 SKIP_BLANKS;
8103 if (ctxt->input->buf == NULL)
8104 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8105 else
8106 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8107 if (avail < 2)
8108 goto done;
8109 cur = ctxt->input->cur[0];
8110 next = ctxt->input->cur[1];
8111 if ((cur == '<') && (next == '?')) {
8112 if ((!terminate) &&
8113 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8114 goto done;
8115#ifdef DEBUG_PUSH
8116 xmlGenericError(xmlGenericErrorContext,
8117 "PP: Parsing PI\n");
8118#endif
8119 xmlParsePI(ctxt);
8120 } else if ((cur == '<') && (next == '!') &&
8121 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8122 if ((!terminate) &&
8123 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8124 goto done;
8125#ifdef DEBUG_PUSH
8126 xmlGenericError(xmlGenericErrorContext,
8127 "PP: Parsing Comment\n");
8128#endif
8129 xmlParseComment(ctxt);
8130 ctxt->instate = XML_PARSER_PROLOG;
8131 } else if ((cur == '<') && (next == '!') &&
8132 (avail < 4)) {
8133 goto done;
8134 } else {
8135 ctxt->instate = XML_PARSER_START_TAG;
8136#ifdef DEBUG_PUSH
8137 xmlGenericError(xmlGenericErrorContext,
8138 "PP: entering START_TAG\n");
8139#endif
8140 }
8141 break;
8142 case XML_PARSER_EPILOG:
8143 SKIP_BLANKS;
8144 if (ctxt->input->buf == NULL)
8145 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8146 else
8147 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8148 if (avail < 2)
8149 goto done;
8150 cur = ctxt->input->cur[0];
8151 next = ctxt->input->cur[1];
8152 if ((cur == '<') && (next == '?')) {
8153 if ((!terminate) &&
8154 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8155 goto done;
8156#ifdef DEBUG_PUSH
8157 xmlGenericError(xmlGenericErrorContext,
8158 "PP: Parsing PI\n");
8159#endif
8160 xmlParsePI(ctxt);
8161 ctxt->instate = XML_PARSER_EPILOG;
8162 } else if ((cur == '<') && (next == '!') &&
8163 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8164 if ((!terminate) &&
8165 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8166 goto done;
8167#ifdef DEBUG_PUSH
8168 xmlGenericError(xmlGenericErrorContext,
8169 "PP: Parsing Comment\n");
8170#endif
8171 xmlParseComment(ctxt);
8172 ctxt->instate = XML_PARSER_EPILOG;
8173 } else if ((cur == '<') && (next == '!') &&
8174 (avail < 4)) {
8175 goto done;
8176 } else {
8177 ctxt->errNo = XML_ERR_DOCUMENT_END;
8178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8179 ctxt->sax->error(ctxt->userData,
8180 "Extra content at the end of the document\n");
8181 ctxt->wellFormed = 0;
8182 ctxt->disableSAX = 1;
8183 ctxt->instate = XML_PARSER_EOF;
8184#ifdef DEBUG_PUSH
8185 xmlGenericError(xmlGenericErrorContext,
8186 "PP: entering EOF\n");
8187#endif
8188 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8189 (!ctxt->disableSAX))
8190 ctxt->sax->endDocument(ctxt->userData);
8191 goto done;
8192 }
8193 break;
8194 case XML_PARSER_START_TAG: {
8195 xmlChar *name, *oldname;
8196
8197 if ((avail < 2) && (ctxt->inputNr == 1))
8198 goto done;
8199 cur = ctxt->input->cur[0];
8200 if (cur != '<') {
8201 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8203 ctxt->sax->error(ctxt->userData,
8204 "Start tag expect, '<' not found\n");
8205 ctxt->wellFormed = 0;
8206 ctxt->disableSAX = 1;
8207 ctxt->instate = XML_PARSER_EOF;
8208#ifdef DEBUG_PUSH
8209 xmlGenericError(xmlGenericErrorContext,
8210 "PP: entering EOF\n");
8211#endif
8212 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8213 (!ctxt->disableSAX))
8214 ctxt->sax->endDocument(ctxt->userData);
8215 goto done;
8216 }
8217 if ((!terminate) &&
8218 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8219 goto done;
8220 if (ctxt->spaceNr == 0)
8221 spacePush(ctxt, -1);
8222 else
8223 spacePush(ctxt, *ctxt->space);
8224 name = xmlParseStartTag(ctxt);
8225 if (name == NULL) {
8226 spacePop(ctxt);
8227 ctxt->instate = XML_PARSER_EOF;
8228#ifdef DEBUG_PUSH
8229 xmlGenericError(xmlGenericErrorContext,
8230 "PP: entering EOF\n");
8231#endif
8232 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8233 (!ctxt->disableSAX))
8234 ctxt->sax->endDocument(ctxt->userData);
8235 goto done;
8236 }
8237 namePush(ctxt, xmlStrdup(name));
8238
8239 /*
8240 * [ VC: Root Element Type ]
8241 * The Name in the document type declaration must match
8242 * the element type of the root element.
8243 */
8244 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8245 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8246 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8247
8248 /*
8249 * Check for an Empty Element.
8250 */
8251 if ((RAW == '/') && (NXT(1) == '>')) {
8252 SKIP(2);
8253 if ((ctxt->sax != NULL) &&
8254 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8255 ctxt->sax->endElement(ctxt->userData, name);
8256 xmlFree(name);
8257 oldname = namePop(ctxt);
8258 spacePop(ctxt);
8259 if (oldname != NULL) {
8260#ifdef DEBUG_STACK
8261 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8262#endif
8263 xmlFree(oldname);
8264 }
8265 if (ctxt->name == NULL) {
8266 ctxt->instate = XML_PARSER_EPILOG;
8267#ifdef DEBUG_PUSH
8268 xmlGenericError(xmlGenericErrorContext,
8269 "PP: entering EPILOG\n");
8270#endif
8271 } else {
8272 ctxt->instate = XML_PARSER_CONTENT;
8273#ifdef DEBUG_PUSH
8274 xmlGenericError(xmlGenericErrorContext,
8275 "PP: entering CONTENT\n");
8276#endif
8277 }
8278 break;
8279 }
8280 if (RAW == '>') {
8281 NEXT;
8282 } else {
8283 ctxt->errNo = XML_ERR_GT_REQUIRED;
8284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8285 ctxt->sax->error(ctxt->userData,
8286 "Couldn't find end of Start Tag %s\n",
8287 name);
8288 ctxt->wellFormed = 0;
8289 ctxt->disableSAX = 1;
8290
8291 /*
8292 * end of parsing of this node.
8293 */
8294 nodePop(ctxt);
8295 oldname = namePop(ctxt);
8296 spacePop(ctxt);
8297 if (oldname != NULL) {
8298#ifdef DEBUG_STACK
8299 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8300#endif
8301 xmlFree(oldname);
8302 }
8303 }
8304 xmlFree(name);
8305 ctxt->instate = XML_PARSER_CONTENT;
8306#ifdef DEBUG_PUSH
8307 xmlGenericError(xmlGenericErrorContext,
8308 "PP: entering CONTENT\n");
8309#endif
8310 break;
8311 }
8312 case XML_PARSER_CONTENT: {
8313 const xmlChar *test;
8314 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008315 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008316
8317 /*
8318 * Handle preparsed entities and charRef
8319 */
8320 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008321 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008322
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008323 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008324 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8325 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008326 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008327 ctxt->token = 0;
8328 }
8329 if ((avail < 2) && (ctxt->inputNr == 1))
8330 goto done;
8331 cur = ctxt->input->cur[0];
8332 next = ctxt->input->cur[1];
8333
8334 test = CUR_PTR;
8335 cons = ctxt->input->consumed;
8336 tok = ctxt->token;
8337 if ((cur == '<') && (next == '?')) {
8338 if ((!terminate) &&
8339 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8340 goto done;
8341#ifdef DEBUG_PUSH
8342 xmlGenericError(xmlGenericErrorContext,
8343 "PP: Parsing PI\n");
8344#endif
8345 xmlParsePI(ctxt);
8346 } else if ((cur == '<') && (next == '!') &&
8347 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8348 if ((!terminate) &&
8349 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8350 goto done;
8351#ifdef DEBUG_PUSH
8352 xmlGenericError(xmlGenericErrorContext,
8353 "PP: Parsing Comment\n");
8354#endif
8355 xmlParseComment(ctxt);
8356 ctxt->instate = XML_PARSER_CONTENT;
8357 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8358 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8359 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8360 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8361 (ctxt->input->cur[8] == '[')) {
8362 SKIP(9);
8363 ctxt->instate = XML_PARSER_CDATA_SECTION;
8364#ifdef DEBUG_PUSH
8365 xmlGenericError(xmlGenericErrorContext,
8366 "PP: entering CDATA_SECTION\n");
8367#endif
8368 break;
8369 } else if ((cur == '<') && (next == '!') &&
8370 (avail < 9)) {
8371 goto done;
8372 } else if ((cur == '<') && (next == '/')) {
8373 ctxt->instate = XML_PARSER_END_TAG;
8374#ifdef DEBUG_PUSH
8375 xmlGenericError(xmlGenericErrorContext,
8376 "PP: entering END_TAG\n");
8377#endif
8378 break;
8379 } else if (cur == '<') {
8380 ctxt->instate = XML_PARSER_START_TAG;
8381#ifdef DEBUG_PUSH
8382 xmlGenericError(xmlGenericErrorContext,
8383 "PP: entering START_TAG\n");
8384#endif
8385 break;
8386 } else if (cur == '&') {
8387 if ((!terminate) &&
8388 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8389 goto done;
8390#ifdef DEBUG_PUSH
8391 xmlGenericError(xmlGenericErrorContext,
8392 "PP: Parsing Reference\n");
8393#endif
8394 xmlParseReference(ctxt);
8395 } else {
8396 /* TODO Avoid the extra copy, handle directly !!! */
8397 /*
8398 * Goal of the following test is:
8399 * - minimize calls to the SAX 'character' callback
8400 * when they are mergeable
8401 * - handle an problem for isBlank when we only parse
8402 * a sequence of blank chars and the next one is
8403 * not available to check against '<' presence.
8404 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008405 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008406 * of the parser.
8407 */
8408 if ((ctxt->inputNr == 1) &&
8409 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8410 if ((!terminate) &&
8411 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8412 goto done;
8413 }
8414 ctxt->checkIndex = 0;
8415#ifdef DEBUG_PUSH
8416 xmlGenericError(xmlGenericErrorContext,
8417 "PP: Parsing char data\n");
8418#endif
8419 xmlParseCharData(ctxt, 0);
8420 }
8421 /*
8422 * Pop-up of finished entities.
8423 */
8424 while ((RAW == 0) && (ctxt->inputNr > 1))
8425 xmlPopInput(ctxt);
8426 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8427 (tok == ctxt->token)) {
8428 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8430 ctxt->sax->error(ctxt->userData,
8431 "detected an error in element content\n");
8432 ctxt->wellFormed = 0;
8433 ctxt->disableSAX = 1;
8434 ctxt->instate = XML_PARSER_EOF;
8435 break;
8436 }
8437 break;
8438 }
8439 case XML_PARSER_CDATA_SECTION: {
8440 /*
8441 * The Push mode need to have the SAX callback for
8442 * cdataBlock merge back contiguous callbacks.
8443 */
8444 int base;
8445
8446 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8447 if (base < 0) {
8448 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8449 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8450 if (ctxt->sax->cdataBlock != NULL)
8451 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8452 XML_PARSER_BIG_BUFFER_SIZE);
8453 }
8454 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8455 ctxt->checkIndex = 0;
8456 }
8457 goto done;
8458 } else {
8459 if ((ctxt->sax != NULL) && (base > 0) &&
8460 (!ctxt->disableSAX)) {
8461 if (ctxt->sax->cdataBlock != NULL)
8462 ctxt->sax->cdataBlock(ctxt->userData,
8463 ctxt->input->cur, base);
8464 }
8465 SKIP(base + 3);
8466 ctxt->checkIndex = 0;
8467 ctxt->instate = XML_PARSER_CONTENT;
8468#ifdef DEBUG_PUSH
8469 xmlGenericError(xmlGenericErrorContext,
8470 "PP: entering CONTENT\n");
8471#endif
8472 }
8473 break;
8474 }
8475 case XML_PARSER_END_TAG:
8476 if (avail < 2)
8477 goto done;
8478 if ((!terminate) &&
8479 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8480 goto done;
8481 xmlParseEndTag(ctxt);
8482 if (ctxt->name == NULL) {
8483 ctxt->instate = XML_PARSER_EPILOG;
8484#ifdef DEBUG_PUSH
8485 xmlGenericError(xmlGenericErrorContext,
8486 "PP: entering EPILOG\n");
8487#endif
8488 } else {
8489 ctxt->instate = XML_PARSER_CONTENT;
8490#ifdef DEBUG_PUSH
8491 xmlGenericError(xmlGenericErrorContext,
8492 "PP: entering CONTENT\n");
8493#endif
8494 }
8495 break;
8496 case XML_PARSER_DTD: {
8497 /*
8498 * Sorry but progressive parsing of the internal subset
8499 * is not expected to be supported. We first check that
8500 * the full content of the internal subset is available and
8501 * the parsing is launched only at that point.
8502 * Internal subset ends up with "']' S? '>'" in an unescaped
8503 * section and not in a ']]>' sequence which are conditional
8504 * sections (whoever argued to keep that crap in XML deserve
8505 * a place in hell !).
8506 */
8507 int base, i;
8508 xmlChar *buf;
8509 xmlChar quote = 0;
8510
8511 base = ctxt->input->cur - ctxt->input->base;
8512 if (base < 0) return(0);
8513 if (ctxt->checkIndex > base)
8514 base = ctxt->checkIndex;
8515 buf = ctxt->input->buf->buffer->content;
8516 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8517 base++) {
8518 if (quote != 0) {
8519 if (buf[base] == quote)
8520 quote = 0;
8521 continue;
8522 }
8523 if (buf[base] == '"') {
8524 quote = '"';
8525 continue;
8526 }
8527 if (buf[base] == '\'') {
8528 quote = '\'';
8529 continue;
8530 }
8531 if (buf[base] == ']') {
8532 if ((unsigned int) base +1 >=
8533 ctxt->input->buf->buffer->use)
8534 break;
8535 if (buf[base + 1] == ']') {
8536 /* conditional crap, skip both ']' ! */
8537 base++;
8538 continue;
8539 }
8540 for (i = 0;
8541 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8542 i++) {
8543 if (buf[base + i] == '>')
8544 goto found_end_int_subset;
8545 }
8546 break;
8547 }
8548 }
8549 /*
8550 * We didn't found the end of the Internal subset
8551 */
8552 if (quote == 0)
8553 ctxt->checkIndex = base;
8554#ifdef DEBUG_PUSH
8555 if (next == 0)
8556 xmlGenericError(xmlGenericErrorContext,
8557 "PP: lookup of int subset end filed\n");
8558#endif
8559 goto done;
8560
8561found_end_int_subset:
8562 xmlParseInternalSubset(ctxt);
8563 ctxt->inSubset = 2;
8564 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8565 (ctxt->sax->externalSubset != NULL))
8566 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8567 ctxt->extSubSystem, ctxt->extSubURI);
8568 ctxt->inSubset = 0;
8569 ctxt->instate = XML_PARSER_PROLOG;
8570 ctxt->checkIndex = 0;
8571#ifdef DEBUG_PUSH
8572 xmlGenericError(xmlGenericErrorContext,
8573 "PP: entering PROLOG\n");
8574#endif
8575 break;
8576 }
8577 case XML_PARSER_COMMENT:
8578 xmlGenericError(xmlGenericErrorContext,
8579 "PP: internal error, state == COMMENT\n");
8580 ctxt->instate = XML_PARSER_CONTENT;
8581#ifdef DEBUG_PUSH
8582 xmlGenericError(xmlGenericErrorContext,
8583 "PP: entering CONTENT\n");
8584#endif
8585 break;
8586 case XML_PARSER_PI:
8587 xmlGenericError(xmlGenericErrorContext,
8588 "PP: internal error, state == PI\n");
8589 ctxt->instate = XML_PARSER_CONTENT;
8590#ifdef DEBUG_PUSH
8591 xmlGenericError(xmlGenericErrorContext,
8592 "PP: entering CONTENT\n");
8593#endif
8594 break;
8595 case XML_PARSER_ENTITY_DECL:
8596 xmlGenericError(xmlGenericErrorContext,
8597 "PP: internal error, state == ENTITY_DECL\n");
8598 ctxt->instate = XML_PARSER_DTD;
8599#ifdef DEBUG_PUSH
8600 xmlGenericError(xmlGenericErrorContext,
8601 "PP: entering DTD\n");
8602#endif
8603 break;
8604 case XML_PARSER_ENTITY_VALUE:
8605 xmlGenericError(xmlGenericErrorContext,
8606 "PP: internal error, state == ENTITY_VALUE\n");
8607 ctxt->instate = XML_PARSER_CONTENT;
8608#ifdef DEBUG_PUSH
8609 xmlGenericError(xmlGenericErrorContext,
8610 "PP: entering DTD\n");
8611#endif
8612 break;
8613 case XML_PARSER_ATTRIBUTE_VALUE:
8614 xmlGenericError(xmlGenericErrorContext,
8615 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8616 ctxt->instate = XML_PARSER_START_TAG;
8617#ifdef DEBUG_PUSH
8618 xmlGenericError(xmlGenericErrorContext,
8619 "PP: entering START_TAG\n");
8620#endif
8621 break;
8622 case XML_PARSER_SYSTEM_LITERAL:
8623 xmlGenericError(xmlGenericErrorContext,
8624 "PP: internal error, state == SYSTEM_LITERAL\n");
8625 ctxt->instate = XML_PARSER_START_TAG;
8626#ifdef DEBUG_PUSH
8627 xmlGenericError(xmlGenericErrorContext,
8628 "PP: entering START_TAG\n");
8629#endif
8630 break;
8631 }
8632 }
8633done:
8634#ifdef DEBUG_PUSH
8635 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8636#endif
8637 return(ret);
8638}
8639
8640/**
Owen Taylor3473f882001-02-23 17:55:21 +00008641 * xmlParseChunk:
8642 * @ctxt: an XML parser context
8643 * @chunk: an char array
8644 * @size: the size in byte of the chunk
8645 * @terminate: last chunk indicator
8646 *
8647 * Parse a Chunk of memory
8648 *
8649 * Returns zero if no error, the xmlParserErrors otherwise.
8650 */
8651int
8652xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8653 int terminate) {
8654 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8655 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8656 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8657 int cur = ctxt->input->cur - ctxt->input->base;
8658
8659 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8660 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8661 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008662 ctxt->input->end =
8663 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008664#ifdef DEBUG_PUSH
8665 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8666#endif
8667
8668 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8669 xmlParseTryOrFinish(ctxt, terminate);
8670 } else if (ctxt->instate != XML_PARSER_EOF) {
8671 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8672 xmlParserInputBufferPtr in = ctxt->input->buf;
8673 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8674 (in->raw != NULL)) {
8675 int nbchars;
8676
8677 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8678 if (nbchars < 0) {
8679 xmlGenericError(xmlGenericErrorContext,
8680 "xmlParseChunk: encoder error\n");
8681 return(XML_ERR_INVALID_ENCODING);
8682 }
8683 }
8684 }
8685 }
8686 xmlParseTryOrFinish(ctxt, terminate);
8687 if (terminate) {
8688 /*
8689 * Check for termination
8690 */
8691 if ((ctxt->instate != XML_PARSER_EOF) &&
8692 (ctxt->instate != XML_PARSER_EPILOG)) {
8693 ctxt->errNo = XML_ERR_DOCUMENT_END;
8694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8695 ctxt->sax->error(ctxt->userData,
8696 "Extra content at the end of the document\n");
8697 ctxt->wellFormed = 0;
8698 ctxt->disableSAX = 1;
8699 }
8700 if (ctxt->instate != XML_PARSER_EOF) {
8701 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8702 (!ctxt->disableSAX))
8703 ctxt->sax->endDocument(ctxt->userData);
8704 }
8705 ctxt->instate = XML_PARSER_EOF;
8706 }
8707 return((xmlParserErrors) ctxt->errNo);
8708}
8709
8710/************************************************************************
8711 * *
8712 * I/O front end functions to the parser *
8713 * *
8714 ************************************************************************/
8715
8716/**
8717 * xmlStopParser:
8718 * @ctxt: an XML parser context
8719 *
8720 * Blocks further parser processing
8721 */
8722void
8723xmlStopParser(xmlParserCtxtPtr ctxt) {
8724 ctxt->instate = XML_PARSER_EOF;
8725 if (ctxt->input != NULL)
8726 ctxt->input->cur = BAD_CAST"";
8727}
8728
8729/**
8730 * xmlCreatePushParserCtxt:
8731 * @sax: a SAX handler
8732 * @user_data: The user data returned on SAX callbacks
8733 * @chunk: a pointer to an array of chars
8734 * @size: number of chars in the array
8735 * @filename: an optional file name or URI
8736 *
8737 * Create a parser context for using the XML parser in push mode
8738 * To allow content encoding detection, @size should be >= 4
8739 * The value of @filename is used for fetching external entities
8740 * and error/warning reports.
8741 *
8742 * Returns the new parser context or NULL
8743 */
8744xmlParserCtxtPtr
8745xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8746 const char *chunk, int size, const char *filename) {
8747 xmlParserCtxtPtr ctxt;
8748 xmlParserInputPtr inputStream;
8749 xmlParserInputBufferPtr buf;
8750 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8751
8752 /*
8753 * plug some encoding conversion routines
8754 */
8755 if ((chunk != NULL) && (size >= 4))
8756 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8757
8758 buf = xmlAllocParserInputBuffer(enc);
8759 if (buf == NULL) return(NULL);
8760
8761 ctxt = xmlNewParserCtxt();
8762 if (ctxt == NULL) {
8763 xmlFree(buf);
8764 return(NULL);
8765 }
8766 if (sax != NULL) {
8767 if (ctxt->sax != &xmlDefaultSAXHandler)
8768 xmlFree(ctxt->sax);
8769 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8770 if (ctxt->sax == NULL) {
8771 xmlFree(buf);
8772 xmlFree(ctxt);
8773 return(NULL);
8774 }
8775 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8776 if (user_data != NULL)
8777 ctxt->userData = user_data;
8778 }
8779 if (filename == NULL) {
8780 ctxt->directory = NULL;
8781 } else {
8782 ctxt->directory = xmlParserGetDirectory(filename);
8783 }
8784
8785 inputStream = xmlNewInputStream(ctxt);
8786 if (inputStream == NULL) {
8787 xmlFreeParserCtxt(ctxt);
8788 return(NULL);
8789 }
8790
8791 if (filename == NULL)
8792 inputStream->filename = NULL;
8793 else
8794 inputStream->filename = xmlMemStrdup(filename);
8795 inputStream->buf = buf;
8796 inputStream->base = inputStream->buf->buffer->content;
8797 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008798 inputStream->end =
8799 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008800
8801 inputPush(ctxt, inputStream);
8802
8803 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8804 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008805 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8806 int cur = ctxt->input->cur - ctxt->input->base;
8807
Owen Taylor3473f882001-02-23 17:55:21 +00008808 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008809
8810 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8811 ctxt->input->cur = ctxt->input->base + cur;
8812 ctxt->input->end =
8813 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008814#ifdef DEBUG_PUSH
8815 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8816#endif
8817 }
8818
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008819 if (enc != XML_CHAR_ENCODING_NONE) {
8820 xmlSwitchEncoding(ctxt, enc);
8821 }
8822
Owen Taylor3473f882001-02-23 17:55:21 +00008823 return(ctxt);
8824}
8825
8826/**
8827 * xmlCreateIOParserCtxt:
8828 * @sax: a SAX handler
8829 * @user_data: The user data returned on SAX callbacks
8830 * @ioread: an I/O read function
8831 * @ioclose: an I/O close function
8832 * @ioctx: an I/O handler
8833 * @enc: the charset encoding if known
8834 *
8835 * Create a parser context for using the XML parser with an existing
8836 * I/O stream
8837 *
8838 * Returns the new parser context or NULL
8839 */
8840xmlParserCtxtPtr
8841xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8842 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8843 void *ioctx, xmlCharEncoding enc) {
8844 xmlParserCtxtPtr ctxt;
8845 xmlParserInputPtr inputStream;
8846 xmlParserInputBufferPtr buf;
8847
8848 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8849 if (buf == NULL) return(NULL);
8850
8851 ctxt = xmlNewParserCtxt();
8852 if (ctxt == NULL) {
8853 xmlFree(buf);
8854 return(NULL);
8855 }
8856 if (sax != NULL) {
8857 if (ctxt->sax != &xmlDefaultSAXHandler)
8858 xmlFree(ctxt->sax);
8859 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8860 if (ctxt->sax == NULL) {
8861 xmlFree(buf);
8862 xmlFree(ctxt);
8863 return(NULL);
8864 }
8865 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8866 if (user_data != NULL)
8867 ctxt->userData = user_data;
8868 }
8869
8870 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8871 if (inputStream == NULL) {
8872 xmlFreeParserCtxt(ctxt);
8873 return(NULL);
8874 }
8875 inputPush(ctxt, inputStream);
8876
8877 return(ctxt);
8878}
8879
8880/************************************************************************
8881 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008882 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00008883 * *
8884 ************************************************************************/
8885
8886/**
8887 * xmlIOParseDTD:
8888 * @sax: the SAX handler block or NULL
8889 * @input: an Input Buffer
8890 * @enc: the charset encoding if known
8891 *
8892 * Load and parse a DTD
8893 *
8894 * Returns the resulting xmlDtdPtr or NULL in case of error.
8895 * @input will be freed at parsing end.
8896 */
8897
8898xmlDtdPtr
8899xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8900 xmlCharEncoding enc) {
8901 xmlDtdPtr ret = NULL;
8902 xmlParserCtxtPtr ctxt;
8903 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008904 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008905
8906 if (input == NULL)
8907 return(NULL);
8908
8909 ctxt = xmlNewParserCtxt();
8910 if (ctxt == NULL) {
8911 return(NULL);
8912 }
8913
8914 /*
8915 * Set-up the SAX context
8916 */
8917 if (sax != NULL) {
8918 if (ctxt->sax != NULL)
8919 xmlFree(ctxt->sax);
8920 ctxt->sax = sax;
8921 ctxt->userData = NULL;
8922 }
8923
8924 /*
8925 * generate a parser input from the I/O handler
8926 */
8927
8928 pinput = xmlNewIOInputStream(ctxt, input, enc);
8929 if (pinput == NULL) {
8930 if (sax != NULL) ctxt->sax = NULL;
8931 xmlFreeParserCtxt(ctxt);
8932 return(NULL);
8933 }
8934
8935 /*
8936 * plug some encoding conversion routines here.
8937 */
8938 xmlPushInput(ctxt, pinput);
8939
8940 pinput->filename = NULL;
8941 pinput->line = 1;
8942 pinput->col = 1;
8943 pinput->base = ctxt->input->cur;
8944 pinput->cur = ctxt->input->cur;
8945 pinput->free = NULL;
8946
8947 /*
8948 * let's parse that entity knowing it's an external subset.
8949 */
8950 ctxt->inSubset = 2;
8951 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8952 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8953 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008954
8955 if (enc == XML_CHAR_ENCODING_NONE) {
8956 /*
8957 * Get the 4 first bytes and decode the charset
8958 * if enc != XML_CHAR_ENCODING_NONE
8959 * plug some encoding conversion routines.
8960 */
8961 start[0] = RAW;
8962 start[1] = NXT(1);
8963 start[2] = NXT(2);
8964 start[3] = NXT(3);
8965 enc = xmlDetectCharEncoding(start, 4);
8966 if (enc != XML_CHAR_ENCODING_NONE) {
8967 xmlSwitchEncoding(ctxt, enc);
8968 }
8969 }
8970
Owen Taylor3473f882001-02-23 17:55:21 +00008971 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8972
8973 if (ctxt->myDoc != NULL) {
8974 if (ctxt->wellFormed) {
8975 ret = ctxt->myDoc->extSubset;
8976 ctxt->myDoc->extSubset = NULL;
8977 } else {
8978 ret = NULL;
8979 }
8980 xmlFreeDoc(ctxt->myDoc);
8981 ctxt->myDoc = NULL;
8982 }
8983 if (sax != NULL) ctxt->sax = NULL;
8984 xmlFreeParserCtxt(ctxt);
8985
8986 return(ret);
8987}
8988
8989/**
8990 * xmlSAXParseDTD:
8991 * @sax: the SAX handler block
8992 * @ExternalID: a NAME* containing the External ID of the DTD
8993 * @SystemID: a NAME* containing the URL to the DTD
8994 *
8995 * Load and parse an external subset.
8996 *
8997 * Returns the resulting xmlDtdPtr or NULL in case of error.
8998 */
8999
9000xmlDtdPtr
9001xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9002 const xmlChar *SystemID) {
9003 xmlDtdPtr ret = NULL;
9004 xmlParserCtxtPtr ctxt;
9005 xmlParserInputPtr input = NULL;
9006 xmlCharEncoding enc;
9007
9008 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9009
9010 ctxt = xmlNewParserCtxt();
9011 if (ctxt == NULL) {
9012 return(NULL);
9013 }
9014
9015 /*
9016 * Set-up the SAX context
9017 */
9018 if (sax != NULL) {
9019 if (ctxt->sax != NULL)
9020 xmlFree(ctxt->sax);
9021 ctxt->sax = sax;
9022 ctxt->userData = NULL;
9023 }
9024
9025 /*
9026 * Ask the Entity resolver to load the damn thing
9027 */
9028
9029 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9030 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9031 if (input == NULL) {
9032 if (sax != NULL) ctxt->sax = NULL;
9033 xmlFreeParserCtxt(ctxt);
9034 return(NULL);
9035 }
9036
9037 /*
9038 * plug some encoding conversion routines here.
9039 */
9040 xmlPushInput(ctxt, input);
9041 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9042 xmlSwitchEncoding(ctxt, enc);
9043
9044 if (input->filename == NULL)
9045 input->filename = (char *) xmlStrdup(SystemID);
9046 input->line = 1;
9047 input->col = 1;
9048 input->base = ctxt->input->cur;
9049 input->cur = ctxt->input->cur;
9050 input->free = NULL;
9051
9052 /*
9053 * let's parse that entity knowing it's an external subset.
9054 */
9055 ctxt->inSubset = 2;
9056 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9057 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9058 ExternalID, SystemID);
9059 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9060
9061 if (ctxt->myDoc != NULL) {
9062 if (ctxt->wellFormed) {
9063 ret = ctxt->myDoc->extSubset;
9064 ctxt->myDoc->extSubset = NULL;
9065 } else {
9066 ret = NULL;
9067 }
9068 xmlFreeDoc(ctxt->myDoc);
9069 ctxt->myDoc = NULL;
9070 }
9071 if (sax != NULL) ctxt->sax = NULL;
9072 xmlFreeParserCtxt(ctxt);
9073
9074 return(ret);
9075}
9076
9077/**
9078 * xmlParseDTD:
9079 * @ExternalID: a NAME* containing the External ID of the DTD
9080 * @SystemID: a NAME* containing the URL to the DTD
9081 *
9082 * Load and parse an external subset.
9083 *
9084 * Returns the resulting xmlDtdPtr or NULL in case of error.
9085 */
9086
9087xmlDtdPtr
9088xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9089 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9090}
9091
9092/************************************************************************
9093 * *
9094 * Front ends when parsing an Entity *
9095 * *
9096 ************************************************************************/
9097
9098/**
Owen Taylor3473f882001-02-23 17:55:21 +00009099 * xmlParseCtxtExternalEntity:
9100 * @ctx: the existing parsing context
9101 * @URL: the URL for the entity to load
9102 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009103 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009104 *
9105 * Parse an external general entity within an existing parsing context
9106 * An external general parsed entity is well-formed if it matches the
9107 * production labeled extParsedEnt.
9108 *
9109 * [78] extParsedEnt ::= TextDecl? content
9110 *
9111 * Returns 0 if the entity is well formed, -1 in case of args problem and
9112 * the parser error code otherwise
9113 */
9114
9115int
9116xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009117 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009118 xmlParserCtxtPtr ctxt;
9119 xmlDocPtr newDoc;
9120 xmlSAXHandlerPtr oldsax = NULL;
9121 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009122 xmlChar start[4];
9123 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009124
9125 if (ctx->depth > 40) {
9126 return(XML_ERR_ENTITY_LOOP);
9127 }
9128
Daniel Veillardcda96922001-08-21 10:56:31 +00009129 if (lst != NULL)
9130 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009131 if ((URL == NULL) && (ID == NULL))
9132 return(-1);
9133 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9134 return(-1);
9135
9136
9137 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9138 if (ctxt == NULL) return(-1);
9139 ctxt->userData = ctxt;
9140 oldsax = ctxt->sax;
9141 ctxt->sax = ctx->sax;
9142 newDoc = xmlNewDoc(BAD_CAST "1.0");
9143 if (newDoc == NULL) {
9144 xmlFreeParserCtxt(ctxt);
9145 return(-1);
9146 }
9147 if (ctx->myDoc != NULL) {
9148 newDoc->intSubset = ctx->myDoc->intSubset;
9149 newDoc->extSubset = ctx->myDoc->extSubset;
9150 }
9151 if (ctx->myDoc->URL != NULL) {
9152 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9153 }
9154 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9155 if (newDoc->children == NULL) {
9156 ctxt->sax = oldsax;
9157 xmlFreeParserCtxt(ctxt);
9158 newDoc->intSubset = NULL;
9159 newDoc->extSubset = NULL;
9160 xmlFreeDoc(newDoc);
9161 return(-1);
9162 }
9163 nodePush(ctxt, newDoc->children);
9164 if (ctx->myDoc == NULL) {
9165 ctxt->myDoc = newDoc;
9166 } else {
9167 ctxt->myDoc = ctx->myDoc;
9168 newDoc->children->doc = ctx->myDoc;
9169 }
9170
Daniel Veillard87a764e2001-06-20 17:41:10 +00009171 /*
9172 * Get the 4 first bytes and decode the charset
9173 * if enc != XML_CHAR_ENCODING_NONE
9174 * plug some encoding conversion routines.
9175 */
9176 GROW
9177 start[0] = RAW;
9178 start[1] = NXT(1);
9179 start[2] = NXT(2);
9180 start[3] = NXT(3);
9181 enc = xmlDetectCharEncoding(start, 4);
9182 if (enc != XML_CHAR_ENCODING_NONE) {
9183 xmlSwitchEncoding(ctxt, enc);
9184 }
9185
Owen Taylor3473f882001-02-23 17:55:21 +00009186 /*
9187 * Parse a possible text declaration first
9188 */
Owen Taylor3473f882001-02-23 17:55:21 +00009189 if ((RAW == '<') && (NXT(1) == '?') &&
9190 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9191 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9192 xmlParseTextDecl(ctxt);
9193 }
9194
9195 /*
9196 * Doing validity checking on chunk doesn't make sense
9197 */
9198 ctxt->instate = XML_PARSER_CONTENT;
9199 ctxt->validate = ctx->validate;
9200 ctxt->loadsubset = ctx->loadsubset;
9201 ctxt->depth = ctx->depth + 1;
9202 ctxt->replaceEntities = ctx->replaceEntities;
9203 if (ctxt->validate) {
9204 ctxt->vctxt.error = ctx->vctxt.error;
9205 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009206 } else {
9207 ctxt->vctxt.error = NULL;
9208 ctxt->vctxt.warning = NULL;
9209 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009210 ctxt->vctxt.nodeTab = NULL;
9211 ctxt->vctxt.nodeNr = 0;
9212 ctxt->vctxt.nodeMax = 0;
9213 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009214
9215 xmlParseContent(ctxt);
9216
9217 if ((RAW == '<') && (NXT(1) == '/')) {
9218 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9220 ctxt->sax->error(ctxt->userData,
9221 "chunk is not well balanced\n");
9222 ctxt->wellFormed = 0;
9223 ctxt->disableSAX = 1;
9224 } else if (RAW != 0) {
9225 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9227 ctxt->sax->error(ctxt->userData,
9228 "extra content at the end of well balanced chunk\n");
9229 ctxt->wellFormed = 0;
9230 ctxt->disableSAX = 1;
9231 }
9232 if (ctxt->node != newDoc->children) {
9233 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9234 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9235 ctxt->sax->error(ctxt->userData,
9236 "chunk is not well balanced\n");
9237 ctxt->wellFormed = 0;
9238 ctxt->disableSAX = 1;
9239 }
9240
9241 if (!ctxt->wellFormed) {
9242 if (ctxt->errNo == 0)
9243 ret = 1;
9244 else
9245 ret = ctxt->errNo;
9246 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009247 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009248 xmlNodePtr cur;
9249
9250 /*
9251 * Return the newly created nodeset after unlinking it from
9252 * they pseudo parent.
9253 */
9254 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009255 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009256 while (cur != NULL) {
9257 cur->parent = NULL;
9258 cur = cur->next;
9259 }
9260 newDoc->children->children = NULL;
9261 }
9262 ret = 0;
9263 }
9264 ctxt->sax = oldsax;
9265 xmlFreeParserCtxt(ctxt);
9266 newDoc->intSubset = NULL;
9267 newDoc->extSubset = NULL;
9268 xmlFreeDoc(newDoc);
9269
9270 return(ret);
9271}
9272
9273/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009274 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009275 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009276 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009277 * @sax: the SAX handler bloc (possibly NULL)
9278 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9279 * @depth: Used for loop detection, use 0
9280 * @URL: the URL for the entity to load
9281 * @ID: the System ID for the entity to load
9282 * @list: the return value for the set of parsed nodes
9283 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009284 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009285 *
9286 * Returns 0 if the entity is well formed, -1 in case of args problem and
9287 * the parser error code otherwise
9288 */
9289
Daniel Veillard257d9102001-05-08 10:41:44 +00009290static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009291xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9292 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009293 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009294 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009295 xmlParserCtxtPtr ctxt;
9296 xmlDocPtr newDoc;
9297 xmlSAXHandlerPtr oldsax = NULL;
9298 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009299 xmlChar start[4];
9300 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009301
9302 if (depth > 40) {
9303 return(XML_ERR_ENTITY_LOOP);
9304 }
9305
9306
9307
9308 if (list != NULL)
9309 *list = NULL;
9310 if ((URL == NULL) && (ID == NULL))
9311 return(-1);
9312 if (doc == NULL) /* @@ relax but check for dereferences */
9313 return(-1);
9314
9315
9316 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9317 if (ctxt == NULL) return(-1);
9318 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009319 if (oldctxt != NULL) {
9320 ctxt->_private = oldctxt->_private;
9321 ctxt->loadsubset = oldctxt->loadsubset;
9322 ctxt->validate = oldctxt->validate;
9323 ctxt->external = oldctxt->external;
9324 } else {
9325 /*
9326 * Doing validity checking on chunk without context
9327 * doesn't make sense
9328 */
9329 ctxt->_private = NULL;
9330 ctxt->validate = 0;
9331 ctxt->external = 2;
9332 ctxt->loadsubset = 0;
9333 }
Owen Taylor3473f882001-02-23 17:55:21 +00009334 if (sax != NULL) {
9335 oldsax = ctxt->sax;
9336 ctxt->sax = sax;
9337 if (user_data != NULL)
9338 ctxt->userData = user_data;
9339 }
9340 newDoc = xmlNewDoc(BAD_CAST "1.0");
9341 if (newDoc == NULL) {
9342 xmlFreeParserCtxt(ctxt);
9343 return(-1);
9344 }
9345 if (doc != NULL) {
9346 newDoc->intSubset = doc->intSubset;
9347 newDoc->extSubset = doc->extSubset;
9348 }
9349 if (doc->URL != NULL) {
9350 newDoc->URL = xmlStrdup(doc->URL);
9351 }
9352 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9353 if (newDoc->children == NULL) {
9354 if (sax != NULL)
9355 ctxt->sax = oldsax;
9356 xmlFreeParserCtxt(ctxt);
9357 newDoc->intSubset = NULL;
9358 newDoc->extSubset = NULL;
9359 xmlFreeDoc(newDoc);
9360 return(-1);
9361 }
9362 nodePush(ctxt, newDoc->children);
9363 if (doc == NULL) {
9364 ctxt->myDoc = newDoc;
9365 } else {
9366 ctxt->myDoc = doc;
9367 newDoc->children->doc = doc;
9368 }
9369
Daniel Veillard87a764e2001-06-20 17:41:10 +00009370 /*
9371 * Get the 4 first bytes and decode the charset
9372 * if enc != XML_CHAR_ENCODING_NONE
9373 * plug some encoding conversion routines.
9374 */
9375 GROW;
9376 start[0] = RAW;
9377 start[1] = NXT(1);
9378 start[2] = NXT(2);
9379 start[3] = NXT(3);
9380 enc = xmlDetectCharEncoding(start, 4);
9381 if (enc != XML_CHAR_ENCODING_NONE) {
9382 xmlSwitchEncoding(ctxt, enc);
9383 }
9384
Owen Taylor3473f882001-02-23 17:55:21 +00009385 /*
9386 * Parse a possible text declaration first
9387 */
Owen Taylor3473f882001-02-23 17:55:21 +00009388 if ((RAW == '<') && (NXT(1) == '?') &&
9389 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9390 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9391 xmlParseTextDecl(ctxt);
9392 }
9393
Owen Taylor3473f882001-02-23 17:55:21 +00009394 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009395 ctxt->depth = depth;
9396
9397 xmlParseContent(ctxt);
9398
9399 if ((RAW == '<') && (NXT(1) == '/')) {
9400 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9402 ctxt->sax->error(ctxt->userData,
9403 "chunk is not well balanced\n");
9404 ctxt->wellFormed = 0;
9405 ctxt->disableSAX = 1;
9406 } else if (RAW != 0) {
9407 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9409 ctxt->sax->error(ctxt->userData,
9410 "extra content at the end of well balanced chunk\n");
9411 ctxt->wellFormed = 0;
9412 ctxt->disableSAX = 1;
9413 }
9414 if (ctxt->node != newDoc->children) {
9415 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9417 ctxt->sax->error(ctxt->userData,
9418 "chunk is not well balanced\n");
9419 ctxt->wellFormed = 0;
9420 ctxt->disableSAX = 1;
9421 }
9422
9423 if (!ctxt->wellFormed) {
9424 if (ctxt->errNo == 0)
9425 ret = 1;
9426 else
9427 ret = ctxt->errNo;
9428 } else {
9429 if (list != NULL) {
9430 xmlNodePtr cur;
9431
9432 /*
9433 * Return the newly created nodeset after unlinking it from
9434 * they pseudo parent.
9435 */
9436 cur = newDoc->children->children;
9437 *list = cur;
9438 while (cur != NULL) {
9439 cur->parent = NULL;
9440 cur = cur->next;
9441 }
9442 newDoc->children->children = NULL;
9443 }
9444 ret = 0;
9445 }
9446 if (sax != NULL)
9447 ctxt->sax = oldsax;
9448 xmlFreeParserCtxt(ctxt);
9449 newDoc->intSubset = NULL;
9450 newDoc->extSubset = NULL;
9451 xmlFreeDoc(newDoc);
9452
9453 return(ret);
9454}
9455
9456/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009457 * xmlParseExternalEntity:
9458 * @doc: the document the chunk pertains to
9459 * @sax: the SAX handler bloc (possibly NULL)
9460 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9461 * @depth: Used for loop detection, use 0
9462 * @URL: the URL for the entity to load
9463 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009464 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009465 *
9466 * Parse an external general entity
9467 * An external general parsed entity is well-formed if it matches the
9468 * production labeled extParsedEnt.
9469 *
9470 * [78] extParsedEnt ::= TextDecl? content
9471 *
9472 * Returns 0 if the entity is well formed, -1 in case of args problem and
9473 * the parser error code otherwise
9474 */
9475
9476int
9477xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009478 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009479 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009480 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009481}
9482
9483/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009484 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009485 * @doc: the document the chunk pertains to
9486 * @sax: the SAX handler bloc (possibly NULL)
9487 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9488 * @depth: Used for loop detection, use 0
9489 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009490 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009491 *
9492 * Parse a well-balanced chunk of an XML document
9493 * called by the parser
9494 * The allowed sequence for the Well Balanced Chunk is the one defined by
9495 * the content production in the XML grammar:
9496 *
9497 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9498 *
9499 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9500 * the parser error code otherwise
9501 */
9502
9503int
9504xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009505 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009506 xmlParserCtxtPtr ctxt;
9507 xmlDocPtr newDoc;
9508 xmlSAXHandlerPtr oldsax = NULL;
9509 int size;
9510 int ret = 0;
9511
9512 if (depth > 40) {
9513 return(XML_ERR_ENTITY_LOOP);
9514 }
9515
9516
Daniel Veillardcda96922001-08-21 10:56:31 +00009517 if (lst != NULL)
9518 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009519 if (string == NULL)
9520 return(-1);
9521
9522 size = xmlStrlen(string);
9523
9524 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9525 if (ctxt == NULL) return(-1);
9526 ctxt->userData = ctxt;
9527 if (sax != NULL) {
9528 oldsax = ctxt->sax;
9529 ctxt->sax = sax;
9530 if (user_data != NULL)
9531 ctxt->userData = user_data;
9532 }
9533 newDoc = xmlNewDoc(BAD_CAST "1.0");
9534 if (newDoc == NULL) {
9535 xmlFreeParserCtxt(ctxt);
9536 return(-1);
9537 }
9538 if (doc != NULL) {
9539 newDoc->intSubset = doc->intSubset;
9540 newDoc->extSubset = doc->extSubset;
9541 }
9542 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9543 if (newDoc->children == NULL) {
9544 if (sax != NULL)
9545 ctxt->sax = oldsax;
9546 xmlFreeParserCtxt(ctxt);
9547 newDoc->intSubset = NULL;
9548 newDoc->extSubset = NULL;
9549 xmlFreeDoc(newDoc);
9550 return(-1);
9551 }
9552 nodePush(ctxt, newDoc->children);
9553 if (doc == NULL) {
9554 ctxt->myDoc = newDoc;
9555 } else {
9556 ctxt->myDoc = doc;
9557 newDoc->children->doc = doc;
9558 }
9559 ctxt->instate = XML_PARSER_CONTENT;
9560 ctxt->depth = depth;
9561
9562 /*
9563 * Doing validity checking on chunk doesn't make sense
9564 */
9565 ctxt->validate = 0;
9566 ctxt->loadsubset = 0;
9567
9568 xmlParseContent(ctxt);
9569
9570 if ((RAW == '<') && (NXT(1) == '/')) {
9571 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9573 ctxt->sax->error(ctxt->userData,
9574 "chunk is not well balanced\n");
9575 ctxt->wellFormed = 0;
9576 ctxt->disableSAX = 1;
9577 } else if (RAW != 0) {
9578 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9579 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9580 ctxt->sax->error(ctxt->userData,
9581 "extra content at the end of well balanced chunk\n");
9582 ctxt->wellFormed = 0;
9583 ctxt->disableSAX = 1;
9584 }
9585 if (ctxt->node != newDoc->children) {
9586 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9588 ctxt->sax->error(ctxt->userData,
9589 "chunk is not well balanced\n");
9590 ctxt->wellFormed = 0;
9591 ctxt->disableSAX = 1;
9592 }
9593
9594 if (!ctxt->wellFormed) {
9595 if (ctxt->errNo == 0)
9596 ret = 1;
9597 else
9598 ret = ctxt->errNo;
9599 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009600 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009601 xmlNodePtr cur;
9602
9603 /*
9604 * Return the newly created nodeset after unlinking it from
9605 * they pseudo parent.
9606 */
9607 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009608 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009609 while (cur != NULL) {
9610 cur->parent = NULL;
9611 cur = cur->next;
9612 }
9613 newDoc->children->children = NULL;
9614 }
9615 ret = 0;
9616 }
9617 if (sax != NULL)
9618 ctxt->sax = oldsax;
9619 xmlFreeParserCtxt(ctxt);
9620 newDoc->intSubset = NULL;
9621 newDoc->extSubset = NULL;
9622 xmlFreeDoc(newDoc);
9623
9624 return(ret);
9625}
9626
9627/**
9628 * xmlSAXParseEntity:
9629 * @sax: the SAX handler block
9630 * @filename: the filename
9631 *
9632 * parse an XML external entity out of context and build a tree.
9633 * It use the given SAX function block to handle the parsing callback.
9634 * If sax is NULL, fallback to the default DOM tree building routines.
9635 *
9636 * [78] extParsedEnt ::= TextDecl? content
9637 *
9638 * This correspond to a "Well Balanced" chunk
9639 *
9640 * Returns the resulting document tree
9641 */
9642
9643xmlDocPtr
9644xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9645 xmlDocPtr ret;
9646 xmlParserCtxtPtr ctxt;
9647 char *directory = NULL;
9648
9649 ctxt = xmlCreateFileParserCtxt(filename);
9650 if (ctxt == NULL) {
9651 return(NULL);
9652 }
9653 if (sax != NULL) {
9654 if (ctxt->sax != NULL)
9655 xmlFree(ctxt->sax);
9656 ctxt->sax = sax;
9657 ctxt->userData = NULL;
9658 }
9659
9660 if ((ctxt->directory == NULL) && (directory == NULL))
9661 directory = xmlParserGetDirectory(filename);
9662
9663 xmlParseExtParsedEnt(ctxt);
9664
9665 if (ctxt->wellFormed)
9666 ret = ctxt->myDoc;
9667 else {
9668 ret = NULL;
9669 xmlFreeDoc(ctxt->myDoc);
9670 ctxt->myDoc = NULL;
9671 }
9672 if (sax != NULL)
9673 ctxt->sax = NULL;
9674 xmlFreeParserCtxt(ctxt);
9675
9676 return(ret);
9677}
9678
9679/**
9680 * xmlParseEntity:
9681 * @filename: the filename
9682 *
9683 * parse an XML external entity out of context and build a tree.
9684 *
9685 * [78] extParsedEnt ::= TextDecl? content
9686 *
9687 * This correspond to a "Well Balanced" chunk
9688 *
9689 * Returns the resulting document tree
9690 */
9691
9692xmlDocPtr
9693xmlParseEntity(const char *filename) {
9694 return(xmlSAXParseEntity(NULL, filename));
9695}
9696
9697/**
9698 * xmlCreateEntityParserCtxt:
9699 * @URL: the entity URL
9700 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009701 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009702 *
9703 * Create a parser context for an external entity
9704 * Automatic support for ZLIB/Compress compressed document is provided
9705 * by default if found at compile-time.
9706 *
9707 * Returns the new parser context or NULL
9708 */
9709xmlParserCtxtPtr
9710xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9711 const xmlChar *base) {
9712 xmlParserCtxtPtr ctxt;
9713 xmlParserInputPtr inputStream;
9714 char *directory = NULL;
9715 xmlChar *uri;
9716
9717 ctxt = xmlNewParserCtxt();
9718 if (ctxt == NULL) {
9719 return(NULL);
9720 }
9721
9722 uri = xmlBuildURI(URL, base);
9723
9724 if (uri == NULL) {
9725 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9726 if (inputStream == NULL) {
9727 xmlFreeParserCtxt(ctxt);
9728 return(NULL);
9729 }
9730
9731 inputPush(ctxt, inputStream);
9732
9733 if ((ctxt->directory == NULL) && (directory == NULL))
9734 directory = xmlParserGetDirectory((char *)URL);
9735 if ((ctxt->directory == NULL) && (directory != NULL))
9736 ctxt->directory = directory;
9737 } else {
9738 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9739 if (inputStream == NULL) {
9740 xmlFree(uri);
9741 xmlFreeParserCtxt(ctxt);
9742 return(NULL);
9743 }
9744
9745 inputPush(ctxt, inputStream);
9746
9747 if ((ctxt->directory == NULL) && (directory == NULL))
9748 directory = xmlParserGetDirectory((char *)uri);
9749 if ((ctxt->directory == NULL) && (directory != NULL))
9750 ctxt->directory = directory;
9751 xmlFree(uri);
9752 }
9753
9754 return(ctxt);
9755}
9756
9757/************************************************************************
9758 * *
9759 * Front ends when parsing from a file *
9760 * *
9761 ************************************************************************/
9762
9763/**
9764 * xmlCreateFileParserCtxt:
9765 * @filename: the filename
9766 *
9767 * Create a parser context for a file content.
9768 * Automatic support for ZLIB/Compress compressed document is provided
9769 * by default if found at compile-time.
9770 *
9771 * Returns the new parser context or NULL
9772 */
9773xmlParserCtxtPtr
9774xmlCreateFileParserCtxt(const char *filename)
9775{
9776 xmlParserCtxtPtr ctxt;
9777 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009778 char *directory = NULL;
9779
Owen Taylor3473f882001-02-23 17:55:21 +00009780 ctxt = xmlNewParserCtxt();
9781 if (ctxt == NULL) {
9782 if (xmlDefaultSAXHandler.error != NULL) {
9783 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9784 }
9785 return(NULL);
9786 }
9787
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009788 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009789 if (inputStream == NULL) {
9790 xmlFreeParserCtxt(ctxt);
9791 return(NULL);
9792 }
9793
Owen Taylor3473f882001-02-23 17:55:21 +00009794 inputPush(ctxt, inputStream);
9795 if ((ctxt->directory == NULL) && (directory == NULL))
9796 directory = xmlParserGetDirectory(filename);
9797 if ((ctxt->directory == NULL) && (directory != NULL))
9798 ctxt->directory = directory;
9799
9800 return(ctxt);
9801}
9802
9803/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009804 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009805 * @sax: the SAX handler block
9806 * @filename: the filename
9807 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9808 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009809 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009810 *
9811 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9812 * compressed document is provided by default if found at compile-time.
9813 * It use the given SAX function block to handle the parsing callback.
9814 * If sax is NULL, fallback to the default DOM tree building routines.
9815 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009816 * User data (void *) is stored within the parser context, so it is
9817 * available nearly everywhere in libxml.
9818 *
Owen Taylor3473f882001-02-23 17:55:21 +00009819 * Returns the resulting document tree
9820 */
9821
9822xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009823xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9824 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009825 xmlDocPtr ret;
9826 xmlParserCtxtPtr ctxt;
9827 char *directory = NULL;
9828
Daniel Veillard635ef722001-10-29 11:48:19 +00009829 xmlInitParser();
9830
Owen Taylor3473f882001-02-23 17:55:21 +00009831 ctxt = xmlCreateFileParserCtxt(filename);
9832 if (ctxt == NULL) {
9833 return(NULL);
9834 }
9835 if (sax != NULL) {
9836 if (ctxt->sax != NULL)
9837 xmlFree(ctxt->sax);
9838 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009839 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009840 if (data!=NULL) {
9841 ctxt->_private=data;
9842 }
Owen Taylor3473f882001-02-23 17:55:21 +00009843
9844 if ((ctxt->directory == NULL) && (directory == NULL))
9845 directory = xmlParserGetDirectory(filename);
9846 if ((ctxt->directory == NULL) && (directory != NULL))
9847 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9848
9849 xmlParseDocument(ctxt);
9850
9851 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9852 else {
9853 ret = NULL;
9854 xmlFreeDoc(ctxt->myDoc);
9855 ctxt->myDoc = NULL;
9856 }
9857 if (sax != NULL)
9858 ctxt->sax = NULL;
9859 xmlFreeParserCtxt(ctxt);
9860
9861 return(ret);
9862}
9863
9864/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009865 * xmlSAXParseFile:
9866 * @sax: the SAX handler block
9867 * @filename: the filename
9868 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9869 * documents
9870 *
9871 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9872 * compressed document is provided by default if found at compile-time.
9873 * It use the given SAX function block to handle the parsing callback.
9874 * If sax is NULL, fallback to the default DOM tree building routines.
9875 *
9876 * Returns the resulting document tree
9877 */
9878
9879xmlDocPtr
9880xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9881 int recovery) {
9882 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
9883}
9884
9885/**
Owen Taylor3473f882001-02-23 17:55:21 +00009886 * xmlRecoverDoc:
9887 * @cur: a pointer to an array of xmlChar
9888 *
9889 * parse an XML in-memory document and build a tree.
9890 * In the case the document is not Well Formed, a tree is built anyway
9891 *
9892 * Returns the resulting document tree
9893 */
9894
9895xmlDocPtr
9896xmlRecoverDoc(xmlChar *cur) {
9897 return(xmlSAXParseDoc(NULL, cur, 1));
9898}
9899
9900/**
9901 * xmlParseFile:
9902 * @filename: the filename
9903 *
9904 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9905 * compressed document is provided by default if found at compile-time.
9906 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009907 * Returns the resulting document tree if the file was wellformed,
9908 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009909 */
9910
9911xmlDocPtr
9912xmlParseFile(const char *filename) {
9913 return(xmlSAXParseFile(NULL, filename, 0));
9914}
9915
9916/**
9917 * xmlRecoverFile:
9918 * @filename: the filename
9919 *
9920 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9921 * compressed document is provided by default if found at compile-time.
9922 * In the case the document is not Well Formed, a tree is built anyway
9923 *
9924 * Returns the resulting document tree
9925 */
9926
9927xmlDocPtr
9928xmlRecoverFile(const char *filename) {
9929 return(xmlSAXParseFile(NULL, filename, 1));
9930}
9931
9932
9933/**
9934 * xmlSetupParserForBuffer:
9935 * @ctxt: an XML parser context
9936 * @buffer: a xmlChar * buffer
9937 * @filename: a file name
9938 *
9939 * Setup the parser context to parse a new buffer; Clears any prior
9940 * contents from the parser context. The buffer parameter must not be
9941 * NULL, but the filename parameter can be
9942 */
9943void
9944xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9945 const char* filename)
9946{
9947 xmlParserInputPtr input;
9948
9949 input = xmlNewInputStream(ctxt);
9950 if (input == NULL) {
9951 perror("malloc");
9952 xmlFree(ctxt);
9953 return;
9954 }
9955
9956 xmlClearParserCtxt(ctxt);
9957 if (filename != NULL)
9958 input->filename = xmlMemStrdup(filename);
9959 input->base = buffer;
9960 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009961 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009962 inputPush(ctxt, input);
9963}
9964
9965/**
9966 * xmlSAXUserParseFile:
9967 * @sax: a SAX handler
9968 * @user_data: The user data returned on SAX callbacks
9969 * @filename: a file name
9970 *
9971 * parse an XML file and call the given SAX handler routines.
9972 * Automatic support for ZLIB/Compress compressed document is provided
9973 *
9974 * Returns 0 in case of success or a error number otherwise
9975 */
9976int
9977xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9978 const char *filename) {
9979 int ret = 0;
9980 xmlParserCtxtPtr ctxt;
9981
9982 ctxt = xmlCreateFileParserCtxt(filename);
9983 if (ctxt == NULL) return -1;
9984 if (ctxt->sax != &xmlDefaultSAXHandler)
9985 xmlFree(ctxt->sax);
9986 ctxt->sax = sax;
9987 if (user_data != NULL)
9988 ctxt->userData = user_data;
9989
9990 xmlParseDocument(ctxt);
9991
9992 if (ctxt->wellFormed)
9993 ret = 0;
9994 else {
9995 if (ctxt->errNo != 0)
9996 ret = ctxt->errNo;
9997 else
9998 ret = -1;
9999 }
10000 if (sax != NULL)
10001 ctxt->sax = NULL;
10002 xmlFreeParserCtxt(ctxt);
10003
10004 return ret;
10005}
10006
10007/************************************************************************
10008 * *
10009 * Front ends when parsing from memory *
10010 * *
10011 ************************************************************************/
10012
10013/**
10014 * xmlCreateMemoryParserCtxt:
10015 * @buffer: a pointer to a char array
10016 * @size: the size of the array
10017 *
10018 * Create a parser context for an XML in-memory document.
10019 *
10020 * Returns the new parser context or NULL
10021 */
10022xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010023xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010024 xmlParserCtxtPtr ctxt;
10025 xmlParserInputPtr input;
10026 xmlParserInputBufferPtr buf;
10027
10028 if (buffer == NULL)
10029 return(NULL);
10030 if (size <= 0)
10031 return(NULL);
10032
10033 ctxt = xmlNewParserCtxt();
10034 if (ctxt == NULL)
10035 return(NULL);
10036
10037 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10038 if (buf == NULL) return(NULL);
10039
10040 input = xmlNewInputStream(ctxt);
10041 if (input == NULL) {
10042 xmlFreeParserCtxt(ctxt);
10043 return(NULL);
10044 }
10045
10046 input->filename = NULL;
10047 input->buf = buf;
10048 input->base = input->buf->buffer->content;
10049 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010050 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010051
10052 inputPush(ctxt, input);
10053 return(ctxt);
10054}
10055
10056/**
10057 * xmlSAXParseMemory:
10058 * @sax: the SAX handler block
10059 * @buffer: an pointer to a char array
10060 * @size: the size of the array
10061 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10062 * documents
10063 *
10064 * parse an XML in-memory block and use the given SAX function block
10065 * to handle the parsing callback. If sax is NULL, fallback to the default
10066 * DOM tree building routines.
10067 *
10068 * Returns the resulting document tree
10069 */
10070xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010071xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10072 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010073 xmlDocPtr ret;
10074 xmlParserCtxtPtr ctxt;
10075
10076 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10077 if (ctxt == NULL) return(NULL);
10078 if (sax != NULL) {
10079 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010080 }
10081
10082 xmlParseDocument(ctxt);
10083
10084 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10085 else {
10086 ret = NULL;
10087 xmlFreeDoc(ctxt->myDoc);
10088 ctxt->myDoc = NULL;
10089 }
10090 if (sax != NULL)
10091 ctxt->sax = NULL;
10092 xmlFreeParserCtxt(ctxt);
10093
10094 return(ret);
10095}
10096
10097/**
10098 * xmlParseMemory:
10099 * @buffer: an pointer to a char array
10100 * @size: the size of the array
10101 *
10102 * parse an XML in-memory block and build a tree.
10103 *
10104 * Returns the resulting document tree
10105 */
10106
Daniel Veillard50822cb2001-07-26 20:05:51 +000010107xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010108 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10109}
10110
10111/**
10112 * xmlRecoverMemory:
10113 * @buffer: an pointer to a char array
10114 * @size: the size of the array
10115 *
10116 * parse an XML in-memory block and build a tree.
10117 * In the case the document is not Well Formed, a tree is built anyway
10118 *
10119 * Returns the resulting document tree
10120 */
10121
Daniel Veillard50822cb2001-07-26 20:05:51 +000010122xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010123 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10124}
10125
10126/**
10127 * xmlSAXUserParseMemory:
10128 * @sax: a SAX handler
10129 * @user_data: The user data returned on SAX callbacks
10130 * @buffer: an in-memory XML document input
10131 * @size: the length of the XML document in bytes
10132 *
10133 * A better SAX parsing routine.
10134 * parse an XML in-memory buffer and call the given SAX handler routines.
10135 *
10136 * Returns 0 in case of success or a error number otherwise
10137 */
10138int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010139 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010140 int ret = 0;
10141 xmlParserCtxtPtr ctxt;
10142 xmlSAXHandlerPtr oldsax = NULL;
10143
10144 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10145 if (ctxt == NULL) return -1;
10146 if (sax != NULL) {
10147 oldsax = ctxt->sax;
10148 ctxt->sax = sax;
10149 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010150 if (user_data != NULL)
10151 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010152
10153 xmlParseDocument(ctxt);
10154
10155 if (ctxt->wellFormed)
10156 ret = 0;
10157 else {
10158 if (ctxt->errNo != 0)
10159 ret = ctxt->errNo;
10160 else
10161 ret = -1;
10162 }
10163 if (sax != NULL) {
10164 ctxt->sax = oldsax;
10165 }
10166 xmlFreeParserCtxt(ctxt);
10167
10168 return ret;
10169}
10170
10171/**
10172 * xmlCreateDocParserCtxt:
10173 * @cur: a pointer to an array of xmlChar
10174 *
10175 * Creates a parser context for an XML in-memory document.
10176 *
10177 * Returns the new parser context or NULL
10178 */
10179xmlParserCtxtPtr
10180xmlCreateDocParserCtxt(xmlChar *cur) {
10181 int len;
10182
10183 if (cur == NULL)
10184 return(NULL);
10185 len = xmlStrlen(cur);
10186 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10187}
10188
10189/**
10190 * xmlSAXParseDoc:
10191 * @sax: the SAX handler block
10192 * @cur: a pointer to an array of xmlChar
10193 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10194 * documents
10195 *
10196 * parse an XML in-memory document and build a tree.
10197 * It use the given SAX function block to handle the parsing callback.
10198 * If sax is NULL, fallback to the default DOM tree building routines.
10199 *
10200 * Returns the resulting document tree
10201 */
10202
10203xmlDocPtr
10204xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10205 xmlDocPtr ret;
10206 xmlParserCtxtPtr ctxt;
10207
10208 if (cur == NULL) return(NULL);
10209
10210
10211 ctxt = xmlCreateDocParserCtxt(cur);
10212 if (ctxt == NULL) return(NULL);
10213 if (sax != NULL) {
10214 ctxt->sax = sax;
10215 ctxt->userData = NULL;
10216 }
10217
10218 xmlParseDocument(ctxt);
10219 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10220 else {
10221 ret = NULL;
10222 xmlFreeDoc(ctxt->myDoc);
10223 ctxt->myDoc = NULL;
10224 }
10225 if (sax != NULL)
10226 ctxt->sax = NULL;
10227 xmlFreeParserCtxt(ctxt);
10228
10229 return(ret);
10230}
10231
10232/**
10233 * xmlParseDoc:
10234 * @cur: a pointer to an array of xmlChar
10235 *
10236 * parse an XML in-memory document and build a tree.
10237 *
10238 * Returns the resulting document tree
10239 */
10240
10241xmlDocPtr
10242xmlParseDoc(xmlChar *cur) {
10243 return(xmlSAXParseDoc(NULL, cur, 0));
10244}
10245
Daniel Veillard8107a222002-01-13 14:10:10 +000010246/************************************************************************
10247 * *
10248 * Specific function to keep track of entities references *
10249 * and used by the XSLT debugger *
10250 * *
10251 ************************************************************************/
10252
10253static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10254
10255/**
10256 * xmlAddEntityReference:
10257 * @ent : A valid entity
10258 * @firstNode : A valid first node for children of entity
10259 * @lastNode : A valid last node of children entity
10260 *
10261 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10262 */
10263static void
10264xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10265 xmlNodePtr lastNode)
10266{
10267 if (xmlEntityRefFunc != NULL) {
10268 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10269 }
10270}
10271
10272
10273/**
10274 * xmlSetEntityReferenceFunc:
10275 * @func : A valid function
10276 *
10277 * Set the function to call call back when a xml reference has been made
10278 */
10279void
10280xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10281{
10282 xmlEntityRefFunc = func;
10283}
Owen Taylor3473f882001-02-23 17:55:21 +000010284
10285/************************************************************************
10286 * *
10287 * Miscellaneous *
10288 * *
10289 ************************************************************************/
10290
10291#ifdef LIBXML_XPATH_ENABLED
10292#include <libxml/xpath.h>
10293#endif
10294
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010295extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010296static int xmlParserInitialized = 0;
10297
10298/**
10299 * xmlInitParser:
10300 *
10301 * Initialization function for the XML parser.
10302 * This is not reentrant. Call once before processing in case of
10303 * use in multithreaded programs.
10304 */
10305
10306void
10307xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010308 if (xmlParserInitialized != 0)
10309 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010310
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010311 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10312 (xmlGenericError == NULL))
10313 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010314 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010315 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010316 xmlInitCharEncodingHandlers();
10317 xmlInitializePredefinedEntities();
10318 xmlDefaultSAXHandlerInit();
10319 xmlRegisterDefaultInputCallbacks();
10320 xmlRegisterDefaultOutputCallbacks();
10321#ifdef LIBXML_HTML_ENABLED
10322 htmlInitAutoClose();
10323 htmlDefaultSAXHandlerInit();
10324#endif
10325#ifdef LIBXML_XPATH_ENABLED
10326 xmlXPathInit();
10327#endif
10328 xmlParserInitialized = 1;
10329}
10330
10331/**
10332 * xmlCleanupParser:
10333 *
10334 * Cleanup function for the XML parser. It tries to reclaim all
10335 * parsing related global memory allocated for the parser processing.
10336 * It doesn't deallocate any document related memory. Calling this
10337 * function should not prevent reusing the parser.
10338 */
10339
10340void
10341xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010342 xmlCleanupCharEncodingHandlers();
10343 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010344#ifdef LIBXML_CATALOG_ENABLED
10345 xmlCatalogCleanup();
10346#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010347 xmlCleanupThreads();
10348 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010349}