blob: 9162d9007279bdc49f36114eb418722f4fd601d0 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Daniel Veillard3c5ed912002-01-08 10:36:16 +000035#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000044#include <libxml/threads.h>
45#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/tree.h>
47#include <libxml/parser.h>
48#include <libxml/parserInternals.h>
49#include <libxml/valid.h>
50#include <libxml/entities.h>
51#include <libxml/xmlerror.h>
52#include <libxml/encoding.h>
53#include <libxml/xmlIO.h>
54#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000055#ifdef LIBXML_CATALOG_ENABLED
56#include <libxml/catalog.h>
57#endif
Owen Taylor3473f882001-02-23 17:55:21 +000058
59#ifdef HAVE_CTYPE_H
60#include <ctype.h>
61#endif
62#ifdef HAVE_STDLIB_H
63#include <stdlib.h>
64#endif
65#ifdef HAVE_SYS_STAT_H
66#include <sys/stat.h>
67#endif
68#ifdef HAVE_FCNTL_H
69#include <fcntl.h>
70#endif
71#ifdef HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74#ifdef HAVE_ZLIB_H
75#include <zlib.h>
76#endif
77
78
Daniel Veillard21a0f912001-02-25 19:54:14 +000079#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000080#define XML_PARSER_BUFFER_SIZE 100
81
82/*
Owen Taylor3473f882001-02-23 17:55:21 +000083 * List of XML prefixed PI allowed by W3C specs
84 */
85
Daniel Veillardb44025c2001-10-11 22:55:55 +000086static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000087 "xml-stylesheet",
88 NULL
89};
90
91/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000092xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
93 const xmlChar **str);
94
Daniel Veillard257d9102001-05-08 10:41:44 +000095static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000096xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
97 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000098 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000100
Daniel Veillard8107a222002-01-13 14:10:10 +0000101static void
102xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
103 xmlNodePtr lastNode);
104
Owen Taylor3473f882001-02-23 17:55:21 +0000105/************************************************************************
106 * *
107 * Parser stacks related functions and macros *
108 * *
109 ************************************************************************/
110
111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
112 const xmlChar ** str);
113
114/*
115 * Generic function for accessing stacks in the Parser Context
116 */
117
118#define PUSH_AND_POP(scope, type, name) \
119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
120 if (ctxt->name##Nr >= ctxt->name##Max) { \
121 ctxt->name##Max *= 2; \
122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
124 if (ctxt->name##Tab == NULL) { \
125 xmlGenericError(xmlGenericErrorContext, \
126 "realloc failed !\n"); \
127 return(0); \
128 } \
129 } \
130 ctxt->name##Tab[ctxt->name##Nr] = value; \
131 ctxt->name = value; \
132 return(ctxt->name##Nr++); \
133} \
134scope type name##Pop(xmlParserCtxtPtr ctxt) { \
135 type ret; \
136 if (ctxt->name##Nr <= 0) return(0); \
137 ctxt->name##Nr--; \
138 if (ctxt->name##Nr > 0) \
139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
140 else \
141 ctxt->name = NULL; \
142 ret = ctxt->name##Tab[ctxt->name##Nr]; \
143 ctxt->name##Tab[ctxt->name##Nr] = 0; \
144 return(ret); \
145} \
146
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
148 * inputPop:
149 * @ctxt: an XML parser context
150 *
151 * Pops the top parser input from the input stack
152 *
153 * Returns the input just removed
154 */
155/**
156 * inputPush:
157 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000158 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000159 *
160 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 *
162 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000163 */
164/**
165 * namePop:
166 * @ctxt: an XML parser context
167 *
168 * Pops the top element name from the name stack
169 *
170 * Returns the name just removed
171 */
172/**
173 * namePush:
174 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000175 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000176 *
177 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 *
179 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000180 */
181/**
182 * nodePop:
183 * @ctxt: an XML parser context
184 *
185 * Pops the top element node from the node stack
186 *
187 * Returns the node just removed
188 */
189/**
190 * nodePush:
191 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000192 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000193 *
194 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 *
196 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000197 */
Owen Taylor3473f882001-02-23 17:55:21 +0000198/*
199 * Those macros actually generate the functions
200 */
201PUSH_AND_POP(extern, xmlParserInputPtr, input)
202PUSH_AND_POP(extern, xmlNodePtr, node)
203PUSH_AND_POP(extern, xmlChar*, name)
204
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000205static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000206 if (ctxt->spaceNr >= ctxt->spaceMax) {
207 ctxt->spaceMax *= 2;
208 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
209 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
210 if (ctxt->spaceTab == NULL) {
211 xmlGenericError(xmlGenericErrorContext,
212 "realloc failed !\n");
213 return(0);
214 }
215 }
216 ctxt->spaceTab[ctxt->spaceNr] = val;
217 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
218 return(ctxt->spaceNr++);
219}
220
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000221static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000222 int ret;
223 if (ctxt->spaceNr <= 0) return(0);
224 ctxt->spaceNr--;
225 if (ctxt->spaceNr > 0)
226 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
227 else
228 ctxt->space = NULL;
229 ret = ctxt->spaceTab[ctxt->spaceNr];
230 ctxt->spaceTab[ctxt->spaceNr] = -1;
231 return(ret);
232}
233
234/*
235 * Macros for accessing the content. Those should be used only by the parser,
236 * and not exported.
237 *
238 * Dirty macros, i.e. one often need to make assumption on the context to
239 * use them
240 *
241 * CUR_PTR return the current pointer to the xmlChar to be parsed.
242 * To be used with extreme caution since operations consuming
243 * characters may move the input buffer to a different location !
244 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
245 * This should be used internally by the parser
246 * only to compare to ASCII values otherwise it would break when
247 * running with UTF-8 encoding.
248 * RAW same as CUR but in the input buffer, bypass any token
249 * extraction that may have been done
250 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
251 * to compare on ASCII based substring.
252 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
253 * strings within the parser.
254 *
255 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
256 *
257 * NEXT Skip to the next character, this does the proper decoding
258 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000259 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000260 * CUR_CHAR(l) returns the current unicode character (int), set l
261 * to the number of xmlChars used for the encoding [0-5].
262 * CUR_SCHAR same but operate on a string instead of the context
263 * COPY_BUF copy the current unicode char to the target buffer, increment
264 * the index
265 * GROW, SHRINK handling of input buffers
266 */
267
268#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
269#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
270#define NXT(val) ctxt->input->cur[(val)]
271#define CUR_PTR ctxt->input->cur
272
273#define SKIP(val) do { \
274 ctxt->nbChars += (val),ctxt->input->cur += (val); \
275 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000276 if ((*ctxt->input->cur == 0) && \
277 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
278 xmlPopInput(ctxt); \
279 } while (0)
280
Daniel Veillard48b2f892001-02-25 16:11:03 +0000281#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000282 xmlParserInputShrink(ctxt->input); \
283 if ((*ctxt->input->cur == 0) && \
284 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
285 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000286 }
Owen Taylor3473f882001-02-23 17:55:21 +0000287
Daniel Veillard48b2f892001-02-25 16:11:03 +0000288#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000289 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
290 if ((*ctxt->input->cur == 0) && \
291 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
292 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000293 }
Owen Taylor3473f882001-02-23 17:55:21 +0000294
295#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
296
297#define NEXT xmlNextChar(ctxt)
298
Daniel Veillard21a0f912001-02-25 19:54:14 +0000299#define NEXT1 { \
300 ctxt->input->cur++; \
301 ctxt->nbChars++; \
302 if (*ctxt->input->cur == 0) \
303 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
304 }
305
Owen Taylor3473f882001-02-23 17:55:21 +0000306#define NEXTL(l) do { \
307 if (*(ctxt->input->cur) == '\n') { \
308 ctxt->input->line++; ctxt->input->col = 1; \
309 } else ctxt->input->col++; \
310 ctxt->token = 0; ctxt->input->cur += l; \
311 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000312 } while (0)
313
314#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
315#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
316
317#define COPY_BUF(l,b,i,v) \
318 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000319 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000320
321/**
322 * xmlSkipBlankChars:
323 * @ctxt: the XML parser context
324 *
325 * skip all blanks character found at that point in the input streams.
326 * It pops up finished entities in the process if allowable at that point.
327 *
328 * Returns the number of space chars skipped
329 */
330
331int
332xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000333 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000334
Daniel Veillard02141ea2001-04-30 11:46:40 +0000335 if (ctxt->token != 0) {
336 if (!IS_BLANK(ctxt->token))
337 return(0);
338 ctxt->token = 0;
339 res++;
340 }
Owen Taylor3473f882001-02-23 17:55:21 +0000341 /*
342 * It's Okay to use CUR/NEXT here since all the blanks are on
343 * the ASCII range.
344 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000345 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
346 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000347 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000348 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000349 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000350 cur = ctxt->input->cur;
351 while (IS_BLANK(*cur)) {
352 if (*cur == '\n') {
353 ctxt->input->line++; ctxt->input->col = 1;
354 }
355 cur++;
356 res++;
357 if (*cur == 0) {
358 ctxt->input->cur = cur;
359 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
360 cur = ctxt->input->cur;
361 }
362 }
363 ctxt->input->cur = cur;
364 } else {
365 int cur;
366 do {
367 cur = CUR;
368 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
369 NEXT;
370 cur = CUR;
371 res++;
372 }
373 while ((cur == 0) && (ctxt->inputNr > 1) &&
374 (ctxt->instate != XML_PARSER_COMMENT)) {
375 xmlPopInput(ctxt);
376 cur = CUR;
377 }
378 /*
379 * Need to handle support of entities branching here
380 */
381 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
382 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
383 }
Owen Taylor3473f882001-02-23 17:55:21 +0000384 return(res);
385}
386
387/************************************************************************
388 * *
389 * Commodity functions to handle entities *
390 * *
391 ************************************************************************/
392
393/**
394 * xmlPopInput:
395 * @ctxt: an XML parser context
396 *
397 * xmlPopInput: the current input pointed by ctxt->input came to an end
398 * pop it and return the next char.
399 *
400 * Returns the current xmlChar in the parser context
401 */
402xmlChar
403xmlPopInput(xmlParserCtxtPtr ctxt) {
404 if (ctxt->inputNr == 1) return(0); /* End of main Input */
405 if (xmlParserDebugEntities)
406 xmlGenericError(xmlGenericErrorContext,
407 "Popping input %d\n", ctxt->inputNr);
408 xmlFreeInputStream(inputPop(ctxt));
409 if ((*ctxt->input->cur == 0) &&
410 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
411 return(xmlPopInput(ctxt));
412 return(CUR);
413}
414
415/**
416 * xmlPushInput:
417 * @ctxt: an XML parser context
418 * @input: an XML parser input fragment (entity, XML fragment ...).
419 *
420 * xmlPushInput: switch to a new input stream which is stacked on top
421 * of the previous one(s).
422 */
423void
424xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
425 if (input == NULL) return;
426
427 if (xmlParserDebugEntities) {
428 if ((ctxt->input != NULL) && (ctxt->input->filename))
429 xmlGenericError(xmlGenericErrorContext,
430 "%s(%d): ", ctxt->input->filename,
431 ctxt->input->line);
432 xmlGenericError(xmlGenericErrorContext,
433 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
434 }
435 inputPush(ctxt, input);
436 GROW;
437}
438
439/**
440 * xmlParseCharRef:
441 * @ctxt: an XML parser context
442 *
443 * parse Reference declarations
444 *
445 * [66] CharRef ::= '&#' [0-9]+ ';' |
446 * '&#x' [0-9a-fA-F]+ ';'
447 *
448 * [ WFC: Legal Character ]
449 * Characters referred to using character references must match the
450 * production for Char.
451 *
452 * Returns the value parsed (as an int), 0 in case of error
453 */
454int
455xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000456 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000457 int count = 0;
458
459 if (ctxt->token != 0) {
460 val = ctxt->token;
461 ctxt->token = 0;
462 return(val);
463 }
464 /*
465 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
466 */
467 if ((RAW == '&') && (NXT(1) == '#') &&
468 (NXT(2) == 'x')) {
469 SKIP(3);
470 GROW;
471 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000472 if (count++ > 20) {
473 count = 0;
474 GROW;
475 }
476 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000477 val = val * 16 + (CUR - '0');
478 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
479 val = val * 16 + (CUR - 'a') + 10;
480 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
481 val = val * 16 + (CUR - 'A') + 10;
482 else {
483 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
485 ctxt->sax->error(ctxt->userData,
486 "xmlParseCharRef: invalid hexadecimal value\n");
487 ctxt->wellFormed = 0;
488 ctxt->disableSAX = 1;
489 val = 0;
490 break;
491 }
492 NEXT;
493 count++;
494 }
495 if (RAW == ';') {
496 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
497 ctxt->nbChars ++;
498 ctxt->input->cur++;
499 }
500 } else if ((RAW == '&') && (NXT(1) == '#')) {
501 SKIP(2);
502 GROW;
503 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000504 if (count++ > 20) {
505 count = 0;
506 GROW;
507 }
508 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000509 val = val * 10 + (CUR - '0');
510 else {
511 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
513 ctxt->sax->error(ctxt->userData,
514 "xmlParseCharRef: invalid decimal value\n");
515 ctxt->wellFormed = 0;
516 ctxt->disableSAX = 1;
517 val = 0;
518 break;
519 }
520 NEXT;
521 count++;
522 }
523 if (RAW == ';') {
524 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
525 ctxt->nbChars ++;
526 ctxt->input->cur++;
527 }
528 } else {
529 ctxt->errNo = XML_ERR_INVALID_CHARREF;
530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
531 ctxt->sax->error(ctxt->userData,
532 "xmlParseCharRef: invalid value\n");
533 ctxt->wellFormed = 0;
534 ctxt->disableSAX = 1;
535 }
536
537 /*
538 * [ WFC: Legal Character ]
539 * Characters referred to using character references must match the
540 * production for Char.
541 */
542 if (IS_CHAR(val)) {
543 return(val);
544 } else {
545 ctxt->errNo = XML_ERR_INVALID_CHAR;
546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000547 ctxt->sax->error(ctxt->userData,
548 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000549 val);
550 ctxt->wellFormed = 0;
551 ctxt->disableSAX = 1;
552 }
553 return(0);
554}
555
556/**
557 * xmlParseStringCharRef:
558 * @ctxt: an XML parser context
559 * @str: a pointer to an index in the string
560 *
561 * parse Reference declarations, variant parsing from a string rather
562 * than an an input flow.
563 *
564 * [66] CharRef ::= '&#' [0-9]+ ';' |
565 * '&#x' [0-9a-fA-F]+ ';'
566 *
567 * [ WFC: Legal Character ]
568 * Characters referred to using character references must match the
569 * production for Char.
570 *
571 * Returns the value parsed (as an int), 0 in case of error, str will be
572 * updated to the current value of the index
573 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000574static int
Owen Taylor3473f882001-02-23 17:55:21 +0000575xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
576 const xmlChar *ptr;
577 xmlChar cur;
578 int val = 0;
579
580 if ((str == NULL) || (*str == NULL)) return(0);
581 ptr = *str;
582 cur = *ptr;
583 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
584 ptr += 3;
585 cur = *ptr;
586 while (cur != ';') { /* Non input consuming loop */
587 if ((cur >= '0') && (cur <= '9'))
588 val = val * 16 + (cur - '0');
589 else if ((cur >= 'a') && (cur <= 'f'))
590 val = val * 16 + (cur - 'a') + 10;
591 else if ((cur >= 'A') && (cur <= 'F'))
592 val = val * 16 + (cur - 'A') + 10;
593 else {
594 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
596 ctxt->sax->error(ctxt->userData,
597 "xmlParseStringCharRef: invalid hexadecimal value\n");
598 ctxt->wellFormed = 0;
599 ctxt->disableSAX = 1;
600 val = 0;
601 break;
602 }
603 ptr++;
604 cur = *ptr;
605 }
606 if (cur == ';')
607 ptr++;
608 } else if ((cur == '&') && (ptr[1] == '#')){
609 ptr += 2;
610 cur = *ptr;
611 while (cur != ';') { /* Non input consuming loops */
612 if ((cur >= '0') && (cur <= '9'))
613 val = val * 10 + (cur - '0');
614 else {
615 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
617 ctxt->sax->error(ctxt->userData,
618 "xmlParseStringCharRef: invalid decimal value\n");
619 ctxt->wellFormed = 0;
620 ctxt->disableSAX = 1;
621 val = 0;
622 break;
623 }
624 ptr++;
625 cur = *ptr;
626 }
627 if (cur == ';')
628 ptr++;
629 } else {
630 ctxt->errNo = XML_ERR_INVALID_CHARREF;
631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
632 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000633 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000634 ctxt->wellFormed = 0;
635 ctxt->disableSAX = 1;
636 return(0);
637 }
638 *str = ptr;
639
640 /*
641 * [ WFC: Legal Character ]
642 * Characters referred to using character references must match the
643 * production for Char.
644 */
645 if (IS_CHAR(val)) {
646 return(val);
647 } else {
648 ctxt->errNo = XML_ERR_INVALID_CHAR;
649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
650 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000651 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000652 ctxt->wellFormed = 0;
653 ctxt->disableSAX = 1;
654 }
655 return(0);
656}
657
658/**
659 * xmlParserHandlePEReference:
660 * @ctxt: the parser context
661 *
662 * [69] PEReference ::= '%' Name ';'
663 *
664 * [ WFC: No Recursion ]
665 * A parsed entity must not contain a recursive
666 * reference to itself, either directly or indirectly.
667 *
668 * [ WFC: Entity Declared ]
669 * In a document without any DTD, a document with only an internal DTD
670 * subset which contains no parameter entity references, or a document
671 * with "standalone='yes'", ... ... The declaration of a parameter
672 * entity must precede any reference to it...
673 *
674 * [ VC: Entity Declared ]
675 * In a document with an external subset or external parameter entities
676 * with "standalone='no'", ... ... The declaration of a parameter entity
677 * must precede any reference to it...
678 *
679 * [ WFC: In DTD ]
680 * Parameter-entity references may only appear in the DTD.
681 * NOTE: misleading but this is handled.
682 *
683 * A PEReference may have been detected in the current input stream
684 * the handling is done accordingly to
685 * http://www.w3.org/TR/REC-xml#entproc
686 * i.e.
687 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000688 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000689 */
690void
691xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
692 xmlChar *name;
693 xmlEntityPtr entity = NULL;
694 xmlParserInputPtr input;
695
696 if (ctxt->token != 0) {
697 return;
698 }
699 if (RAW != '%') return;
700 switch(ctxt->instate) {
701 case XML_PARSER_CDATA_SECTION:
702 return;
703 case XML_PARSER_COMMENT:
704 return;
705 case XML_PARSER_START_TAG:
706 return;
707 case XML_PARSER_END_TAG:
708 return;
709 case XML_PARSER_EOF:
710 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
712 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
713 ctxt->wellFormed = 0;
714 ctxt->disableSAX = 1;
715 return;
716 case XML_PARSER_PROLOG:
717 case XML_PARSER_START:
718 case XML_PARSER_MISC:
719 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
721 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
722 ctxt->wellFormed = 0;
723 ctxt->disableSAX = 1;
724 return;
725 case XML_PARSER_ENTITY_DECL:
726 case XML_PARSER_CONTENT:
727 case XML_PARSER_ATTRIBUTE_VALUE:
728 case XML_PARSER_PI:
729 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000730 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000731 /* we just ignore it there */
732 return;
733 case XML_PARSER_EPILOG:
734 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
736 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
737 ctxt->wellFormed = 0;
738 ctxt->disableSAX = 1;
739 return;
740 case XML_PARSER_ENTITY_VALUE:
741 /*
742 * NOTE: in the case of entity values, we don't do the
743 * substitution here since we need the literal
744 * entity value to be able to save the internal
745 * subset of the document.
746 * This will be handled by xmlStringDecodeEntities
747 */
748 return;
749 case XML_PARSER_DTD:
750 /*
751 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
752 * In the internal DTD subset, parameter-entity references
753 * can occur only where markup declarations can occur, not
754 * within markup declarations.
755 * In that case this is handled in xmlParseMarkupDecl
756 */
757 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
758 return;
759 break;
760 case XML_PARSER_IGNORE:
761 return;
762 }
763
764 NEXT;
765 name = xmlParseName(ctxt);
766 if (xmlParserDebugEntities)
767 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000768 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000769 if (name == NULL) {
770 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000772 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000773 ctxt->wellFormed = 0;
774 ctxt->disableSAX = 1;
775 } else {
776 if (RAW == ';') {
777 NEXT;
778 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
779 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
780 if (entity == NULL) {
781
782 /*
783 * [ WFC: Entity Declared ]
784 * In a document without any DTD, a document with only an
785 * internal DTD subset which contains no parameter entity
786 * references, or a document with "standalone='yes'", ...
787 * ... The declaration of a parameter entity must precede
788 * any reference to it...
789 */
790 if ((ctxt->standalone == 1) ||
791 ((ctxt->hasExternalSubset == 0) &&
792 (ctxt->hasPErefs == 0))) {
793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
794 ctxt->sax->error(ctxt->userData,
795 "PEReference: %%%s; not found\n", name);
796 ctxt->wellFormed = 0;
797 ctxt->disableSAX = 1;
798 } else {
799 /*
800 * [ VC: Entity Declared ]
801 * In a document with an external subset or external
802 * parameter entities with "standalone='no'", ...
803 * ... The declaration of a parameter entity must precede
804 * any reference to it...
805 */
806 if ((!ctxt->disableSAX) &&
807 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
808 ctxt->vctxt.error(ctxt->vctxt.userData,
809 "PEReference: %%%s; not found\n", name);
810 } else if ((!ctxt->disableSAX) &&
811 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
812 ctxt->sax->warning(ctxt->userData,
813 "PEReference: %%%s; not found\n", name);
814 ctxt->valid = 0;
815 }
816 } else {
817 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
818 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000819 xmlChar start[4];
820 xmlCharEncoding enc;
821
Owen Taylor3473f882001-02-23 17:55:21 +0000822 /*
823 * handle the extra spaces added before and after
824 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000825 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000826 */
827 input = xmlNewEntityInputStream(ctxt, entity);
828 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000829
830 /*
831 * Get the 4 first bytes and decode the charset
832 * if enc != XML_CHAR_ENCODING_NONE
833 * plug some encoding conversion routines.
834 */
835 GROW
836 start[0] = RAW;
837 start[1] = NXT(1);
838 start[2] = NXT(2);
839 start[3] = NXT(3);
840 enc = xmlDetectCharEncoding(start, 4);
841 if (enc != XML_CHAR_ENCODING_NONE) {
842 xmlSwitchEncoding(ctxt, enc);
843 }
844
Owen Taylor3473f882001-02-23 17:55:21 +0000845 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
846 (RAW == '<') && (NXT(1) == '?') &&
847 (NXT(2) == 'x') && (NXT(3) == 'm') &&
848 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
849 xmlParseTextDecl(ctxt);
850 }
851 if (ctxt->token == 0)
852 ctxt->token = ' ';
853 } else {
854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
855 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000856 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000857 name);
858 ctxt->wellFormed = 0;
859 ctxt->disableSAX = 1;
860 }
861 }
862 } else {
863 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
865 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000866 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000867 ctxt->wellFormed = 0;
868 ctxt->disableSAX = 1;
869 }
870 xmlFree(name);
871 }
872}
873
874/*
875 * Macro used to grow the current buffer.
876 */
877#define growBuffer(buffer) { \
878 buffer##_size *= 2; \
879 buffer = (xmlChar *) \
880 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
881 if (buffer == NULL) { \
882 perror("realloc failed"); \
883 return(NULL); \
884 } \
885}
886
887/**
888 * xmlStringDecodeEntities:
889 * @ctxt: the parser context
890 * @str: the input string
891 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
892 * @end: an end marker xmlChar, 0 if none
893 * @end2: an end marker xmlChar, 0 if none
894 * @end3: an end marker xmlChar, 0 if none
895 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000896 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000897 *
898 * [67] Reference ::= EntityRef | CharRef
899 *
900 * [69] PEReference ::= '%' Name ';'
901 *
902 * Returns A newly allocated string with the substitution done. The caller
903 * must deallocate it !
904 */
905xmlChar *
906xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
907 xmlChar end, xmlChar end2, xmlChar end3) {
908 xmlChar *buffer = NULL;
909 int buffer_size = 0;
910
911 xmlChar *current = NULL;
912 xmlEntityPtr ent;
913 int c,l;
914 int nbchars = 0;
915
916 if (str == NULL)
917 return(NULL);
918
919 if (ctxt->depth > 40) {
920 ctxt->errNo = XML_ERR_ENTITY_LOOP;
921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
922 ctxt->sax->error(ctxt->userData,
923 "Detected entity reference loop\n");
924 ctxt->wellFormed = 0;
925 ctxt->disableSAX = 1;
926 return(NULL);
927 }
928
929 /*
930 * allocate a translation buffer.
931 */
932 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
933 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
934 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000935 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000936 return(NULL);
937 }
938
939 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000940 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000941 * we are operating on already parsed values.
942 */
943 c = CUR_SCHAR(str, l);
944 while ((c != 0) && (c != end) && /* non input consuming loop */
945 (c != end2) && (c != end3)) {
946
947 if (c == 0) break;
948 if ((c == '&') && (str[1] == '#')) {
949 int val = xmlParseStringCharRef(ctxt, &str);
950 if (val != 0) {
951 COPY_BUF(0,buffer,nbchars,val);
952 }
953 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
954 if (xmlParserDebugEntities)
955 xmlGenericError(xmlGenericErrorContext,
956 "String decoding Entity Reference: %.30s\n",
957 str);
958 ent = xmlParseStringEntityRef(ctxt, &str);
959 if ((ent != NULL) &&
960 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
961 if (ent->content != NULL) {
962 COPY_BUF(0,buffer,nbchars,ent->content[0]);
963 } else {
964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
965 ctxt->sax->error(ctxt->userData,
966 "internal error entity has no content\n");
967 }
968 } else if ((ent != NULL) && (ent->content != NULL)) {
969 xmlChar *rep;
970
971 ctxt->depth++;
972 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
973 0, 0, 0);
974 ctxt->depth--;
975 if (rep != NULL) {
976 current = rep;
977 while (*current != 0) { /* non input consuming loop */
978 buffer[nbchars++] = *current++;
979 if (nbchars >
980 buffer_size - XML_PARSER_BUFFER_SIZE) {
981 growBuffer(buffer);
982 }
983 }
984 xmlFree(rep);
985 }
986 } else if (ent != NULL) {
987 int i = xmlStrlen(ent->name);
988 const xmlChar *cur = ent->name;
989
990 buffer[nbchars++] = '&';
991 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
992 growBuffer(buffer);
993 }
994 for (;i > 0;i--)
995 buffer[nbchars++] = *cur++;
996 buffer[nbchars++] = ';';
997 }
998 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
999 if (xmlParserDebugEntities)
1000 xmlGenericError(xmlGenericErrorContext,
1001 "String decoding PE Reference: %.30s\n", str);
1002 ent = xmlParseStringPEReference(ctxt, &str);
1003 if (ent != NULL) {
1004 xmlChar *rep;
1005
1006 ctxt->depth++;
1007 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1008 0, 0, 0);
1009 ctxt->depth--;
1010 if (rep != NULL) {
1011 current = rep;
1012 while (*current != 0) { /* non input consuming loop */
1013 buffer[nbchars++] = *current++;
1014 if (nbchars >
1015 buffer_size - XML_PARSER_BUFFER_SIZE) {
1016 growBuffer(buffer);
1017 }
1018 }
1019 xmlFree(rep);
1020 }
1021 }
1022 } else {
1023 COPY_BUF(l,buffer,nbchars,c);
1024 str += l;
1025 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1026 growBuffer(buffer);
1027 }
1028 }
1029 c = CUR_SCHAR(str, l);
1030 }
1031 buffer[nbchars++] = 0;
1032 return(buffer);
1033}
1034
1035
1036/************************************************************************
1037 * *
1038 * Commodity functions to handle xmlChars *
1039 * *
1040 ************************************************************************/
1041
1042/**
1043 * xmlStrndup:
1044 * @cur: the input xmlChar *
1045 * @len: the len of @cur
1046 *
1047 * a strndup for array of xmlChar's
1048 *
1049 * Returns a new xmlChar * or NULL
1050 */
1051xmlChar *
1052xmlStrndup(const xmlChar *cur, int len) {
1053 xmlChar *ret;
1054
1055 if ((cur == NULL) || (len < 0)) return(NULL);
1056 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1057 if (ret == NULL) {
1058 xmlGenericError(xmlGenericErrorContext,
1059 "malloc of %ld byte failed\n",
1060 (len + 1) * (long)sizeof(xmlChar));
1061 return(NULL);
1062 }
1063 memcpy(ret, cur, len * sizeof(xmlChar));
1064 ret[len] = 0;
1065 return(ret);
1066}
1067
1068/**
1069 * xmlStrdup:
1070 * @cur: the input xmlChar *
1071 *
1072 * a strdup for array of xmlChar's. Since they are supposed to be
1073 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1074 * a termination mark of '0'.
1075 *
1076 * Returns a new xmlChar * or NULL
1077 */
1078xmlChar *
1079xmlStrdup(const xmlChar *cur) {
1080 const xmlChar *p = cur;
1081
1082 if (cur == NULL) return(NULL);
1083 while (*p != 0) p++; /* non input consuming */
1084 return(xmlStrndup(cur, p - cur));
1085}
1086
1087/**
1088 * xmlCharStrndup:
1089 * @cur: the input char *
1090 * @len: the len of @cur
1091 *
1092 * a strndup for char's to xmlChar's
1093 *
1094 * Returns a new xmlChar * or NULL
1095 */
1096
1097xmlChar *
1098xmlCharStrndup(const char *cur, int len) {
1099 int i;
1100 xmlChar *ret;
1101
1102 if ((cur == NULL) || (len < 0)) return(NULL);
1103 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1104 if (ret == NULL) {
1105 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1106 (len + 1) * (long)sizeof(xmlChar));
1107 return(NULL);
1108 }
1109 for (i = 0;i < len;i++)
1110 ret[i] = (xmlChar) cur[i];
1111 ret[len] = 0;
1112 return(ret);
1113}
1114
1115/**
1116 * xmlCharStrdup:
1117 * @cur: the input char *
1118 * @len: the len of @cur
1119 *
1120 * a strdup for char's to xmlChar's
1121 *
1122 * Returns a new xmlChar * or NULL
1123 */
1124
1125xmlChar *
1126xmlCharStrdup(const char *cur) {
1127 const char *p = cur;
1128
1129 if (cur == NULL) return(NULL);
1130 while (*p != '\0') p++; /* non input consuming */
1131 return(xmlCharStrndup(cur, p - cur));
1132}
1133
1134/**
1135 * xmlStrcmp:
1136 * @str1: the first xmlChar *
1137 * @str2: the second xmlChar *
1138 *
1139 * a strcmp for xmlChar's
1140 *
1141 * Returns the integer result of the comparison
1142 */
1143
1144int
1145xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1146 register int tmp;
1147
1148 if (str1 == str2) return(0);
1149 if (str1 == NULL) return(-1);
1150 if (str2 == NULL) return(1);
1151 do {
1152 tmp = *str1++ - *str2;
1153 if (tmp != 0) return(tmp);
1154 } while (*str2++ != 0);
1155 return 0;
1156}
1157
1158/**
1159 * xmlStrEqual:
1160 * @str1: the first xmlChar *
1161 * @str2: the second xmlChar *
1162 *
1163 * Check if both string are equal of have same content
1164 * Should be a bit more readable and faster than xmlStrEqual()
1165 *
1166 * Returns 1 if they are equal, 0 if they are different
1167 */
1168
1169int
1170xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1171 if (str1 == str2) return(1);
1172 if (str1 == NULL) return(0);
1173 if (str2 == NULL) return(0);
1174 do {
1175 if (*str1++ != *str2) return(0);
1176 } while (*str2++);
1177 return(1);
1178}
1179
1180/**
1181 * xmlStrncmp:
1182 * @str1: the first xmlChar *
1183 * @str2: the second xmlChar *
1184 * @len: the max comparison length
1185 *
1186 * a strncmp for xmlChar's
1187 *
1188 * Returns the integer result of the comparison
1189 */
1190
1191int
1192xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1193 register int tmp;
1194
1195 if (len <= 0) return(0);
1196 if (str1 == str2) return(0);
1197 if (str1 == NULL) return(-1);
1198 if (str2 == NULL) return(1);
1199 do {
1200 tmp = *str1++ - *str2;
1201 if (tmp != 0 || --len == 0) return(tmp);
1202 } while (*str2++ != 0);
1203 return 0;
1204}
1205
Daniel Veillardb44025c2001-10-11 22:55:55 +00001206static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001207 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1208 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1209 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1210 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1211 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1212 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1213 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1214 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1215 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1216 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1217 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1218 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1219 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1220 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1221 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1222 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1223 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1224 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1225 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1226 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1227 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1228 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1229 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1230 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1231 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1232 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1233 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1234 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1235 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1236 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1237 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1238 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1239};
1240
1241/**
1242 * xmlStrcasecmp:
1243 * @str1: the first xmlChar *
1244 * @str2: the second xmlChar *
1245 *
1246 * a strcasecmp for xmlChar's
1247 *
1248 * Returns the integer result of the comparison
1249 */
1250
1251int
1252xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1253 register int tmp;
1254
1255 if (str1 == str2) return(0);
1256 if (str1 == NULL) return(-1);
1257 if (str2 == NULL) return(1);
1258 do {
1259 tmp = casemap[*str1++] - casemap[*str2];
1260 if (tmp != 0) return(tmp);
1261 } while (*str2++ != 0);
1262 return 0;
1263}
1264
1265/**
1266 * xmlStrncasecmp:
1267 * @str1: the first xmlChar *
1268 * @str2: the second xmlChar *
1269 * @len: the max comparison length
1270 *
1271 * a strncasecmp for xmlChar's
1272 *
1273 * Returns the integer result of the comparison
1274 */
1275
1276int
1277xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1278 register int tmp;
1279
1280 if (len <= 0) return(0);
1281 if (str1 == str2) return(0);
1282 if (str1 == NULL) return(-1);
1283 if (str2 == NULL) return(1);
1284 do {
1285 tmp = casemap[*str1++] - casemap[*str2];
1286 if (tmp != 0 || --len == 0) return(tmp);
1287 } while (*str2++ != 0);
1288 return 0;
1289}
1290
1291/**
1292 * xmlStrchr:
1293 * @str: the xmlChar * array
1294 * @val: the xmlChar to search
1295 *
1296 * a strchr for xmlChar's
1297 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001298 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001299 */
1300
1301const xmlChar *
1302xmlStrchr(const xmlChar *str, xmlChar val) {
1303 if (str == NULL) return(NULL);
1304 while (*str != 0) { /* non input consuming */
1305 if (*str == val) return((xmlChar *) str);
1306 str++;
1307 }
1308 return(NULL);
1309}
1310
1311/**
1312 * xmlStrstr:
1313 * @str: the xmlChar * array (haystack)
1314 * @val: the xmlChar to search (needle)
1315 *
1316 * a strstr for xmlChar's
1317 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001318 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001319 */
1320
1321const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001322xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001323 int n;
1324
1325 if (str == NULL) return(NULL);
1326 if (val == NULL) return(NULL);
1327 n = xmlStrlen(val);
1328
1329 if (n == 0) return(str);
1330 while (*str != 0) { /* non input consuming */
1331 if (*str == *val) {
1332 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1333 }
1334 str++;
1335 }
1336 return(NULL);
1337}
1338
1339/**
1340 * xmlStrcasestr:
1341 * @str: the xmlChar * array (haystack)
1342 * @val: the xmlChar to search (needle)
1343 *
1344 * a case-ignoring strstr for xmlChar's
1345 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001346 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001347 */
1348
1349const xmlChar *
1350xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1351 int n;
1352
1353 if (str == NULL) return(NULL);
1354 if (val == NULL) return(NULL);
1355 n = xmlStrlen(val);
1356
1357 if (n == 0) return(str);
1358 while (*str != 0) { /* non input consuming */
1359 if (casemap[*str] == casemap[*val])
1360 if (!xmlStrncasecmp(str, val, n)) return(str);
1361 str++;
1362 }
1363 return(NULL);
1364}
1365
1366/**
1367 * xmlStrsub:
1368 * @str: the xmlChar * array (haystack)
1369 * @start: the index of the first char (zero based)
1370 * @len: the length of the substring
1371 *
1372 * Extract a substring of a given string
1373 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001374 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001375 */
1376
1377xmlChar *
1378xmlStrsub(const xmlChar *str, int start, int len) {
1379 int i;
1380
1381 if (str == NULL) return(NULL);
1382 if (start < 0) return(NULL);
1383 if (len < 0) return(NULL);
1384
1385 for (i = 0;i < start;i++) {
1386 if (*str == 0) return(NULL);
1387 str++;
1388 }
1389 if (*str == 0) return(NULL);
1390 return(xmlStrndup(str, len));
1391}
1392
1393/**
1394 * xmlStrlen:
1395 * @str: the xmlChar * array
1396 *
1397 * length of a xmlChar's string
1398 *
1399 * Returns the number of xmlChar contained in the ARRAY.
1400 */
1401
1402int
1403xmlStrlen(const xmlChar *str) {
1404 int len = 0;
1405
1406 if (str == NULL) return(0);
1407 while (*str != 0) { /* non input consuming */
1408 str++;
1409 len++;
1410 }
1411 return(len);
1412}
1413
1414/**
1415 * xmlStrncat:
1416 * @cur: the original xmlChar * array
1417 * @add: the xmlChar * array added
1418 * @len: the length of @add
1419 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001420 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001421 * first bytes of @add.
1422 *
1423 * Returns a new xmlChar *, the original @cur is reallocated if needed
1424 * and should not be freed
1425 */
1426
1427xmlChar *
1428xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1429 int size;
1430 xmlChar *ret;
1431
1432 if ((add == NULL) || (len == 0))
1433 return(cur);
1434 if (cur == NULL)
1435 return(xmlStrndup(add, len));
1436
1437 size = xmlStrlen(cur);
1438 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1439 if (ret == NULL) {
1440 xmlGenericError(xmlGenericErrorContext,
1441 "xmlStrncat: realloc of %ld byte failed\n",
1442 (size + len + 1) * (long)sizeof(xmlChar));
1443 return(cur);
1444 }
1445 memcpy(&ret[size], add, len * sizeof(xmlChar));
1446 ret[size + len] = 0;
1447 return(ret);
1448}
1449
1450/**
1451 * xmlStrcat:
1452 * @cur: the original xmlChar * array
1453 * @add: the xmlChar * array added
1454 *
1455 * a strcat for array of xmlChar's. Since they are supposed to be
1456 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1457 * a termination mark of '0'.
1458 *
1459 * Returns a new xmlChar * containing the concatenated string.
1460 */
1461xmlChar *
1462xmlStrcat(xmlChar *cur, const xmlChar *add) {
1463 const xmlChar *p = add;
1464
1465 if (add == NULL) return(cur);
1466 if (cur == NULL)
1467 return(xmlStrdup(add));
1468
1469 while (*p != 0) p++; /* non input consuming */
1470 return(xmlStrncat(cur, add, p - add));
1471}
1472
1473/************************************************************************
1474 * *
1475 * Commodity functions, cleanup needed ? *
1476 * *
1477 ************************************************************************/
1478
1479/**
1480 * areBlanks:
1481 * @ctxt: an XML parser context
1482 * @str: a xmlChar *
1483 * @len: the size of @str
1484 *
1485 * Is this a sequence of blank chars that one can ignore ?
1486 *
1487 * Returns 1 if ignorable 0 otherwise.
1488 */
1489
1490static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1491 int i, ret;
1492 xmlNodePtr lastChild;
1493
Daniel Veillard05c13a22001-09-09 08:38:09 +00001494 /*
1495 * Don't spend time trying to differentiate them, the same callback is
1496 * used !
1497 */
1498 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001499 return(0);
1500
Owen Taylor3473f882001-02-23 17:55:21 +00001501 /*
1502 * Check for xml:space value.
1503 */
1504 if (*(ctxt->space) == 1)
1505 return(0);
1506
1507 /*
1508 * Check that the string is made of blanks
1509 */
1510 for (i = 0;i < len;i++)
1511 if (!(IS_BLANK(str[i]))) return(0);
1512
1513 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001514 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001515 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001516 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (ctxt->myDoc != NULL) {
1518 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1519 if (ret == 0) return(1);
1520 if (ret == 1) return(0);
1521 }
1522
1523 /*
1524 * Otherwise, heuristic :-\
1525 */
Owen Taylor3473f882001-02-23 17:55:21 +00001526 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001527 if ((ctxt->node->children == NULL) &&
1528 (RAW == '<') && (NXT(1) == '/')) return(0);
1529
1530 lastChild = xmlGetLastChild(ctxt->node);
1531 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001532 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1533 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001534 } else if (xmlNodeIsText(lastChild))
1535 return(0);
1536 else if ((ctxt->node->children != NULL) &&
1537 (xmlNodeIsText(ctxt->node->children)))
1538 return(0);
1539 return(1);
1540}
1541
Owen Taylor3473f882001-02-23 17:55:21 +00001542/************************************************************************
1543 * *
1544 * Extra stuff for namespace support *
1545 * Relates to http://www.w3.org/TR/WD-xml-names *
1546 * *
1547 ************************************************************************/
1548
1549/**
1550 * xmlSplitQName:
1551 * @ctxt: an XML parser context
1552 * @name: an XML parser context
1553 * @prefix: a xmlChar **
1554 *
1555 * parse an UTF8 encoded XML qualified name string
1556 *
1557 * [NS 5] QName ::= (Prefix ':')? LocalPart
1558 *
1559 * [NS 6] Prefix ::= NCName
1560 *
1561 * [NS 7] LocalPart ::= NCName
1562 *
1563 * Returns the local part, and prefix is updated
1564 * to get the Prefix if any.
1565 */
1566
1567xmlChar *
1568xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1569 xmlChar buf[XML_MAX_NAMELEN + 5];
1570 xmlChar *buffer = NULL;
1571 int len = 0;
1572 int max = XML_MAX_NAMELEN;
1573 xmlChar *ret = NULL;
1574 const xmlChar *cur = name;
1575 int c;
1576
1577 *prefix = NULL;
1578
1579#ifndef XML_XML_NAMESPACE
1580 /* xml: prefix is not really a namespace */
1581 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1582 (cur[2] == 'l') && (cur[3] == ':'))
1583 return(xmlStrdup(name));
1584#endif
1585
1586 /* nasty but valid */
1587 if (cur[0] == ':')
1588 return(xmlStrdup(name));
1589
1590 c = *cur++;
1591 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1592 buf[len++] = c;
1593 c = *cur++;
1594 }
1595 if (len >= max) {
1596 /*
1597 * Okay someone managed to make a huge name, so he's ready to pay
1598 * for the processing speed.
1599 */
1600 max = len * 2;
1601
1602 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1603 if (buffer == NULL) {
1604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1605 ctxt->sax->error(ctxt->userData,
1606 "xmlSplitQName: out of memory\n");
1607 return(NULL);
1608 }
1609 memcpy(buffer, buf, len);
1610 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1611 if (len + 10 > max) {
1612 max *= 2;
1613 buffer = (xmlChar *) xmlRealloc(buffer,
1614 max * sizeof(xmlChar));
1615 if (buffer == NULL) {
1616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1617 ctxt->sax->error(ctxt->userData,
1618 "xmlSplitQName: out of memory\n");
1619 return(NULL);
1620 }
1621 }
1622 buffer[len++] = c;
1623 c = *cur++;
1624 }
1625 buffer[len] = 0;
1626 }
1627
1628 if (buffer == NULL)
1629 ret = xmlStrndup(buf, len);
1630 else {
1631 ret = buffer;
1632 buffer = NULL;
1633 max = XML_MAX_NAMELEN;
1634 }
1635
1636
1637 if (c == ':') {
1638 c = *cur++;
1639 if (c == 0) return(ret);
1640 *prefix = ret;
1641 len = 0;
1642
1643 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1644 buf[len++] = c;
1645 c = *cur++;
1646 }
1647 if (len >= max) {
1648 /*
1649 * Okay someone managed to make a huge name, so he's ready to pay
1650 * for the processing speed.
1651 */
1652 max = len * 2;
1653
1654 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1655 if (buffer == NULL) {
1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1657 ctxt->sax->error(ctxt->userData,
1658 "xmlSplitQName: out of memory\n");
1659 return(NULL);
1660 }
1661 memcpy(buffer, buf, len);
1662 while (c != 0) { /* tested bigname2.xml */
1663 if (len + 10 > max) {
1664 max *= 2;
1665 buffer = (xmlChar *) xmlRealloc(buffer,
1666 max * sizeof(xmlChar));
1667 if (buffer == NULL) {
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "xmlSplitQName: out of memory\n");
1671 return(NULL);
1672 }
1673 }
1674 buffer[len++] = c;
1675 c = *cur++;
1676 }
1677 buffer[len] = 0;
1678 }
1679
1680 if (buffer == NULL)
1681 ret = xmlStrndup(buf, len);
1682 else {
1683 ret = buffer;
1684 }
1685 }
1686
1687 return(ret);
1688}
1689
1690/************************************************************************
1691 * *
1692 * The parser itself *
1693 * Relates to http://www.w3.org/TR/REC-xml *
1694 * *
1695 ************************************************************************/
1696
Daniel Veillard76d66f42001-05-16 21:05:17 +00001697static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001698/**
1699 * xmlParseName:
1700 * @ctxt: an XML parser context
1701 *
1702 * parse an XML name.
1703 *
1704 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1705 * CombiningChar | Extender
1706 *
1707 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1708 *
1709 * [6] Names ::= Name (S Name)*
1710 *
1711 * Returns the Name parsed or NULL
1712 */
1713
1714xmlChar *
1715xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001716 const xmlChar *in;
1717 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 int count = 0;
1719
1720 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001721
1722 /*
1723 * Accelerator for simple ASCII names
1724 */
1725 in = ctxt->input->cur;
1726 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1727 ((*in >= 0x41) && (*in <= 0x5A)) ||
1728 (*in == '_') || (*in == ':')) {
1729 in++;
1730 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1731 ((*in >= 0x41) && (*in <= 0x5A)) ||
1732 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001733 (*in == '_') || (*in == '-') ||
1734 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001735 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001736 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001737 count = in - ctxt->input->cur;
1738 ret = xmlStrndup(ctxt->input->cur, count);
1739 ctxt->input->cur = in;
1740 return(ret);
1741 }
1742 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001743 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001744}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001745
Daniel Veillard76d66f42001-05-16 21:05:17 +00001746static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001747xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1748 xmlChar buf[XML_MAX_NAMELEN + 5];
1749 int len = 0, l;
1750 int c;
1751 int count = 0;
1752
1753 /*
1754 * Handler for more complex cases
1755 */
1756 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001757 c = CUR_CHAR(l);
1758 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1759 (!IS_LETTER(c) && (c != '_') &&
1760 (c != ':'))) {
1761 return(NULL);
1762 }
1763
1764 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1765 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1766 (c == '.') || (c == '-') ||
1767 (c == '_') || (c == ':') ||
1768 (IS_COMBINING(c)) ||
1769 (IS_EXTENDER(c)))) {
1770 if (count++ > 100) {
1771 count = 0;
1772 GROW;
1773 }
1774 COPY_BUF(l,buf,len,c);
1775 NEXTL(l);
1776 c = CUR_CHAR(l);
1777 if (len >= XML_MAX_NAMELEN) {
1778 /*
1779 * Okay someone managed to make a huge name, so he's ready to pay
1780 * for the processing speed.
1781 */
1782 xmlChar *buffer;
1783 int max = len * 2;
1784
1785 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1786 if (buffer == NULL) {
1787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1788 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001789 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001790 return(NULL);
1791 }
1792 memcpy(buffer, buf, len);
1793 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1794 (c == '.') || (c == '-') ||
1795 (c == '_') || (c == ':') ||
1796 (IS_COMBINING(c)) ||
1797 (IS_EXTENDER(c))) {
1798 if (count++ > 100) {
1799 count = 0;
1800 GROW;
1801 }
1802 if (len + 10 > max) {
1803 max *= 2;
1804 buffer = (xmlChar *) xmlRealloc(buffer,
1805 max * sizeof(xmlChar));
1806 if (buffer == NULL) {
1807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1808 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001809 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001810 return(NULL);
1811 }
1812 }
1813 COPY_BUF(l,buffer,len,c);
1814 NEXTL(l);
1815 c = CUR_CHAR(l);
1816 }
1817 buffer[len] = 0;
1818 return(buffer);
1819 }
1820 }
1821 return(xmlStrndup(buf, len));
1822}
1823
1824/**
1825 * xmlParseStringName:
1826 * @ctxt: an XML parser context
1827 * @str: a pointer to the string pointer (IN/OUT)
1828 *
1829 * parse an XML name.
1830 *
1831 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1832 * CombiningChar | Extender
1833 *
1834 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1835 *
1836 * [6] Names ::= Name (S Name)*
1837 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001838 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001839 * is updated to the current location in the string.
1840 */
1841
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001842static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001843xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1844 xmlChar buf[XML_MAX_NAMELEN + 5];
1845 const xmlChar *cur = *str;
1846 int len = 0, l;
1847 int c;
1848
1849 c = CUR_SCHAR(cur, l);
1850 if (!IS_LETTER(c) && (c != '_') &&
1851 (c != ':')) {
1852 return(NULL);
1853 }
1854
1855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1856 (c == '.') || (c == '-') ||
1857 (c == '_') || (c == ':') ||
1858 (IS_COMBINING(c)) ||
1859 (IS_EXTENDER(c))) {
1860 COPY_BUF(l,buf,len,c);
1861 cur += l;
1862 c = CUR_SCHAR(cur, l);
1863 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1864 /*
1865 * Okay someone managed to make a huge name, so he's ready to pay
1866 * for the processing speed.
1867 */
1868 xmlChar *buffer;
1869 int max = len * 2;
1870
1871 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1872 if (buffer == NULL) {
1873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1874 ctxt->sax->error(ctxt->userData,
1875 "xmlParseStringName: out of memory\n");
1876 return(NULL);
1877 }
1878 memcpy(buffer, buf, len);
1879 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1880 (c == '.') || (c == '-') ||
1881 (c == '_') || (c == ':') ||
1882 (IS_COMBINING(c)) ||
1883 (IS_EXTENDER(c))) {
1884 if (len + 10 > max) {
1885 max *= 2;
1886 buffer = (xmlChar *) xmlRealloc(buffer,
1887 max * sizeof(xmlChar));
1888 if (buffer == NULL) {
1889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1890 ctxt->sax->error(ctxt->userData,
1891 "xmlParseStringName: out of memory\n");
1892 return(NULL);
1893 }
1894 }
1895 COPY_BUF(l,buffer,len,c);
1896 cur += l;
1897 c = CUR_SCHAR(cur, l);
1898 }
1899 buffer[len] = 0;
1900 *str = cur;
1901 return(buffer);
1902 }
1903 }
1904 *str = cur;
1905 return(xmlStrndup(buf, len));
1906}
1907
1908/**
1909 * xmlParseNmtoken:
1910 * @ctxt: an XML parser context
1911 *
1912 * parse an XML Nmtoken.
1913 *
1914 * [7] Nmtoken ::= (NameChar)+
1915 *
1916 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1917 *
1918 * Returns the Nmtoken parsed or NULL
1919 */
1920
1921xmlChar *
1922xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1923 xmlChar buf[XML_MAX_NAMELEN + 5];
1924 int len = 0, l;
1925 int c;
1926 int count = 0;
1927
1928 GROW;
1929 c = CUR_CHAR(l);
1930
1931 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1932 (c == '.') || (c == '-') ||
1933 (c == '_') || (c == ':') ||
1934 (IS_COMBINING(c)) ||
1935 (IS_EXTENDER(c))) {
1936 if (count++ > 100) {
1937 count = 0;
1938 GROW;
1939 }
1940 COPY_BUF(l,buf,len,c);
1941 NEXTL(l);
1942 c = CUR_CHAR(l);
1943 if (len >= XML_MAX_NAMELEN) {
1944 /*
1945 * Okay someone managed to make a huge token, so he's ready to pay
1946 * for the processing speed.
1947 */
1948 xmlChar *buffer;
1949 int max = len * 2;
1950
1951 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1952 if (buffer == NULL) {
1953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1954 ctxt->sax->error(ctxt->userData,
1955 "xmlParseNmtoken: out of memory\n");
1956 return(NULL);
1957 }
1958 memcpy(buffer, buf, len);
1959 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1960 (c == '.') || (c == '-') ||
1961 (c == '_') || (c == ':') ||
1962 (IS_COMBINING(c)) ||
1963 (IS_EXTENDER(c))) {
1964 if (count++ > 100) {
1965 count = 0;
1966 GROW;
1967 }
1968 if (len + 10 > max) {
1969 max *= 2;
1970 buffer = (xmlChar *) xmlRealloc(buffer,
1971 max * sizeof(xmlChar));
1972 if (buffer == NULL) {
1973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1974 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001975 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001976 return(NULL);
1977 }
1978 }
1979 COPY_BUF(l,buffer,len,c);
1980 NEXTL(l);
1981 c = CUR_CHAR(l);
1982 }
1983 buffer[len] = 0;
1984 return(buffer);
1985 }
1986 }
1987 if (len == 0)
1988 return(NULL);
1989 return(xmlStrndup(buf, len));
1990}
1991
1992/**
1993 * xmlParseEntityValue:
1994 * @ctxt: an XML parser context
1995 * @orig: if non-NULL store a copy of the original entity value
1996 *
1997 * parse a value for ENTITY declarations
1998 *
1999 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2000 * "'" ([^%&'] | PEReference | Reference)* "'"
2001 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002002 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002003 */
2004
2005xmlChar *
2006xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2007 xmlChar *buf = NULL;
2008 int len = 0;
2009 int size = XML_PARSER_BUFFER_SIZE;
2010 int c, l;
2011 xmlChar stop;
2012 xmlChar *ret = NULL;
2013 const xmlChar *cur = NULL;
2014 xmlParserInputPtr input;
2015
2016 if (RAW == '"') stop = '"';
2017 else if (RAW == '\'') stop = '\'';
2018 else {
2019 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2021 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2022 ctxt->wellFormed = 0;
2023 ctxt->disableSAX = 1;
2024 return(NULL);
2025 }
2026 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2027 if (buf == NULL) {
2028 xmlGenericError(xmlGenericErrorContext,
2029 "malloc of %d byte failed\n", size);
2030 return(NULL);
2031 }
2032
2033 /*
2034 * The content of the entity definition is copied in a buffer.
2035 */
2036
2037 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2038 input = ctxt->input;
2039 GROW;
2040 NEXT;
2041 c = CUR_CHAR(l);
2042 /*
2043 * NOTE: 4.4.5 Included in Literal
2044 * When a parameter entity reference appears in a literal entity
2045 * value, ... a single or double quote character in the replacement
2046 * text is always treated as a normal data character and will not
2047 * terminate the literal.
2048 * In practice it means we stop the loop only when back at parsing
2049 * the initial entity and the quote is found
2050 */
2051 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2052 (ctxt->input != input))) {
2053 if (len + 5 >= size) {
2054 size *= 2;
2055 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2056 if (buf == NULL) {
2057 xmlGenericError(xmlGenericErrorContext,
2058 "realloc of %d byte failed\n", size);
2059 return(NULL);
2060 }
2061 }
2062 COPY_BUF(l,buf,len,c);
2063 NEXTL(l);
2064 /*
2065 * Pop-up of finished entities.
2066 */
2067 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2068 xmlPopInput(ctxt);
2069
2070 GROW;
2071 c = CUR_CHAR(l);
2072 if (c == 0) {
2073 GROW;
2074 c = CUR_CHAR(l);
2075 }
2076 }
2077 buf[len] = 0;
2078
2079 /*
2080 * Raise problem w.r.t. '&' and '%' being used in non-entities
2081 * reference constructs. Note Charref will be handled in
2082 * xmlStringDecodeEntities()
2083 */
2084 cur = buf;
2085 while (*cur != 0) { /* non input consuming */
2086 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2087 xmlChar *name;
2088 xmlChar tmp = *cur;
2089
2090 cur++;
2091 name = xmlParseStringName(ctxt, &cur);
2092 if ((name == NULL) || (*cur != ';')) {
2093 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2095 ctxt->sax->error(ctxt->userData,
2096 "EntityValue: '%c' forbidden except for entities references\n",
2097 tmp);
2098 ctxt->wellFormed = 0;
2099 ctxt->disableSAX = 1;
2100 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002101 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2102 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002103 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2105 ctxt->sax->error(ctxt->userData,
2106 "EntityValue: PEReferences forbidden in internal subset\n",
2107 tmp);
2108 ctxt->wellFormed = 0;
2109 ctxt->disableSAX = 1;
2110 }
2111 if (name != NULL)
2112 xmlFree(name);
2113 }
2114 cur++;
2115 }
2116
2117 /*
2118 * Then PEReference entities are substituted.
2119 */
2120 if (c != stop) {
2121 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2123 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2124 ctxt->wellFormed = 0;
2125 ctxt->disableSAX = 1;
2126 xmlFree(buf);
2127 } else {
2128 NEXT;
2129 /*
2130 * NOTE: 4.4.7 Bypassed
2131 * When a general entity reference appears in the EntityValue in
2132 * an entity declaration, it is bypassed and left as is.
2133 * so XML_SUBSTITUTE_REF is not set here.
2134 */
2135 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2136 0, 0, 0);
2137 if (orig != NULL)
2138 *orig = buf;
2139 else
2140 xmlFree(buf);
2141 }
2142
2143 return(ret);
2144}
2145
2146/**
2147 * xmlParseAttValue:
2148 * @ctxt: an XML parser context
2149 *
2150 * parse a value for an attribute
2151 * Note: the parser won't do substitution of entities here, this
2152 * will be handled later in xmlStringGetNodeList
2153 *
2154 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2155 * "'" ([^<&'] | Reference)* "'"
2156 *
2157 * 3.3.3 Attribute-Value Normalization:
2158 * Before the value of an attribute is passed to the application or
2159 * checked for validity, the XML processor must normalize it as follows:
2160 * - a character reference is processed by appending the referenced
2161 * character to the attribute value
2162 * - an entity reference is processed by recursively processing the
2163 * replacement text of the entity
2164 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2165 * appending #x20 to the normalized value, except that only a single
2166 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2167 * parsed entity or the literal entity value of an internal parsed entity
2168 * - other characters are processed by appending them to the normalized value
2169 * If the declared value is not CDATA, then the XML processor must further
2170 * process the normalized attribute value by discarding any leading and
2171 * trailing space (#x20) characters, and by replacing sequences of space
2172 * (#x20) characters by a single space (#x20) character.
2173 * All attributes for which no declaration has been read should be treated
2174 * by a non-validating parser as if declared CDATA.
2175 *
2176 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2177 */
2178
2179xmlChar *
2180xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2181 xmlChar limit = 0;
2182 xmlChar *buf = NULL;
2183 int len = 0;
2184 int buf_size = 0;
2185 int c, l;
2186 xmlChar *current = NULL;
2187 xmlEntityPtr ent;
2188
2189
2190 SHRINK;
2191 if (NXT(0) == '"') {
2192 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2193 limit = '"';
2194 NEXT;
2195 } else if (NXT(0) == '\'') {
2196 limit = '\'';
2197 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2198 NEXT;
2199 } else {
2200 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2202 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2203 ctxt->wellFormed = 0;
2204 ctxt->disableSAX = 1;
2205 return(NULL);
2206 }
2207
2208 /*
2209 * allocate a translation buffer.
2210 */
2211 buf_size = XML_PARSER_BUFFER_SIZE;
2212 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2213 if (buf == NULL) {
2214 perror("xmlParseAttValue: malloc failed");
2215 return(NULL);
2216 }
2217
2218 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002219 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002220 */
2221 c = CUR_CHAR(l);
2222 while (((NXT(0) != limit) && /* checked */
2223 (c != '<')) || (ctxt->token != 0)) {
2224 if (c == 0) break;
2225 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002226 if (ctxt->replaceEntities) {
2227 if (len > buf_size - 10) {
2228 growBuffer(buf);
2229 }
2230 buf[len++] = '&';
2231 } else {
2232 /*
2233 * The reparsing will be done in xmlStringGetNodeList()
2234 * called by the attribute() function in SAX.c
2235 */
2236 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002237
Daniel Veillard319a7422001-09-11 09:27:09 +00002238 if (len > buf_size - 10) {
2239 growBuffer(buf);
2240 }
2241 current = &buffer[0];
2242 while (*current != 0) { /* non input consuming */
2243 buf[len++] = *current++;
2244 }
2245 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002246 }
Owen Taylor3473f882001-02-23 17:55:21 +00002247 } else if (c == '&') {
2248 if (NXT(1) == '#') {
2249 int val = xmlParseCharRef(ctxt);
2250 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002251 if (ctxt->replaceEntities) {
2252 if (len > buf_size - 10) {
2253 growBuffer(buf);
2254 }
2255 buf[len++] = '&';
2256 } else {
2257 /*
2258 * The reparsing will be done in xmlStringGetNodeList()
2259 * called by the attribute() function in SAX.c
2260 */
2261 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002262
Daniel Veillard319a7422001-09-11 09:27:09 +00002263 if (len > buf_size - 10) {
2264 growBuffer(buf);
2265 }
2266 current = &buffer[0];
2267 while (*current != 0) { /* non input consuming */
2268 buf[len++] = *current++;
2269 }
Owen Taylor3473f882001-02-23 17:55:21 +00002270 }
2271 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002272 if (len > buf_size - 10) {
2273 growBuffer(buf);
2274 }
Owen Taylor3473f882001-02-23 17:55:21 +00002275 len += xmlCopyChar(0, &buf[len], val);
2276 }
2277 } else {
2278 ent = xmlParseEntityRef(ctxt);
2279 if ((ent != NULL) &&
2280 (ctxt->replaceEntities != 0)) {
2281 xmlChar *rep;
2282
2283 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2284 rep = xmlStringDecodeEntities(ctxt, ent->content,
2285 XML_SUBSTITUTE_REF, 0, 0, 0);
2286 if (rep != NULL) {
2287 current = rep;
2288 while (*current != 0) { /* non input consuming */
2289 buf[len++] = *current++;
2290 if (len > buf_size - 10) {
2291 growBuffer(buf);
2292 }
2293 }
2294 xmlFree(rep);
2295 }
2296 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002297 if (len > buf_size - 10) {
2298 growBuffer(buf);
2299 }
Owen Taylor3473f882001-02-23 17:55:21 +00002300 if (ent->content != NULL)
2301 buf[len++] = ent->content[0];
2302 }
2303 } else if (ent != NULL) {
2304 int i = xmlStrlen(ent->name);
2305 const xmlChar *cur = ent->name;
2306
2307 /*
2308 * This may look absurd but is needed to detect
2309 * entities problems
2310 */
2311 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2312 (ent->content != NULL)) {
2313 xmlChar *rep;
2314 rep = xmlStringDecodeEntities(ctxt, ent->content,
2315 XML_SUBSTITUTE_REF, 0, 0, 0);
2316 if (rep != NULL)
2317 xmlFree(rep);
2318 }
2319
2320 /*
2321 * Just output the reference
2322 */
2323 buf[len++] = '&';
2324 if (len > buf_size - i - 10) {
2325 growBuffer(buf);
2326 }
2327 for (;i > 0;i--)
2328 buf[len++] = *cur++;
2329 buf[len++] = ';';
2330 }
2331 }
2332 } else {
2333 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2334 COPY_BUF(l,buf,len,0x20);
2335 if (len > buf_size - 10) {
2336 growBuffer(buf);
2337 }
2338 } else {
2339 COPY_BUF(l,buf,len,c);
2340 if (len > buf_size - 10) {
2341 growBuffer(buf);
2342 }
2343 }
2344 NEXTL(l);
2345 }
2346 GROW;
2347 c = CUR_CHAR(l);
2348 }
2349 buf[len++] = 0;
2350 if (RAW == '<') {
2351 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2353 ctxt->sax->error(ctxt->userData,
2354 "Unescaped '<' not allowed in attributes values\n");
2355 ctxt->wellFormed = 0;
2356 ctxt->disableSAX = 1;
2357 } else if (RAW != limit) {
2358 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2360 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2361 ctxt->wellFormed = 0;
2362 ctxt->disableSAX = 1;
2363 } else
2364 NEXT;
2365 return(buf);
2366}
2367
2368/**
2369 * xmlParseSystemLiteral:
2370 * @ctxt: an XML parser context
2371 *
2372 * parse an XML Literal
2373 *
2374 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2375 *
2376 * Returns the SystemLiteral parsed or NULL
2377 */
2378
2379xmlChar *
2380xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2381 xmlChar *buf = NULL;
2382 int len = 0;
2383 int size = XML_PARSER_BUFFER_SIZE;
2384 int cur, l;
2385 xmlChar stop;
2386 int state = ctxt->instate;
2387 int count = 0;
2388
2389 SHRINK;
2390 if (RAW == '"') {
2391 NEXT;
2392 stop = '"';
2393 } else if (RAW == '\'') {
2394 NEXT;
2395 stop = '\'';
2396 } else {
2397 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2399 ctxt->sax->error(ctxt->userData,
2400 "SystemLiteral \" or ' expected\n");
2401 ctxt->wellFormed = 0;
2402 ctxt->disableSAX = 1;
2403 return(NULL);
2404 }
2405
2406 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2407 if (buf == NULL) {
2408 xmlGenericError(xmlGenericErrorContext,
2409 "malloc of %d byte failed\n", size);
2410 return(NULL);
2411 }
2412 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2413 cur = CUR_CHAR(l);
2414 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2415 if (len + 5 >= size) {
2416 size *= 2;
2417 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2418 if (buf == NULL) {
2419 xmlGenericError(xmlGenericErrorContext,
2420 "realloc of %d byte failed\n", size);
2421 ctxt->instate = (xmlParserInputState) state;
2422 return(NULL);
2423 }
2424 }
2425 count++;
2426 if (count > 50) {
2427 GROW;
2428 count = 0;
2429 }
2430 COPY_BUF(l,buf,len,cur);
2431 NEXTL(l);
2432 cur = CUR_CHAR(l);
2433 if (cur == 0) {
2434 GROW;
2435 SHRINK;
2436 cur = CUR_CHAR(l);
2437 }
2438 }
2439 buf[len] = 0;
2440 ctxt->instate = (xmlParserInputState) state;
2441 if (!IS_CHAR(cur)) {
2442 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2444 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2445 ctxt->wellFormed = 0;
2446 ctxt->disableSAX = 1;
2447 } else {
2448 NEXT;
2449 }
2450 return(buf);
2451}
2452
2453/**
2454 * xmlParsePubidLiteral:
2455 * @ctxt: an XML parser context
2456 *
2457 * parse an XML public literal
2458 *
2459 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2460 *
2461 * Returns the PubidLiteral parsed or NULL.
2462 */
2463
2464xmlChar *
2465xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2466 xmlChar *buf = NULL;
2467 int len = 0;
2468 int size = XML_PARSER_BUFFER_SIZE;
2469 xmlChar cur;
2470 xmlChar stop;
2471 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002472 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002473
2474 SHRINK;
2475 if (RAW == '"') {
2476 NEXT;
2477 stop = '"';
2478 } else if (RAW == '\'') {
2479 NEXT;
2480 stop = '\'';
2481 } else {
2482 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2484 ctxt->sax->error(ctxt->userData,
2485 "SystemLiteral \" or ' expected\n");
2486 ctxt->wellFormed = 0;
2487 ctxt->disableSAX = 1;
2488 return(NULL);
2489 }
2490 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2491 if (buf == NULL) {
2492 xmlGenericError(xmlGenericErrorContext,
2493 "malloc of %d byte failed\n", size);
2494 return(NULL);
2495 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002496 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002497 cur = CUR;
2498 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2499 if (len + 1 >= size) {
2500 size *= 2;
2501 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2502 if (buf == NULL) {
2503 xmlGenericError(xmlGenericErrorContext,
2504 "realloc of %d byte failed\n", size);
2505 return(NULL);
2506 }
2507 }
2508 buf[len++] = cur;
2509 count++;
2510 if (count > 50) {
2511 GROW;
2512 count = 0;
2513 }
2514 NEXT;
2515 cur = CUR;
2516 if (cur == 0) {
2517 GROW;
2518 SHRINK;
2519 cur = CUR;
2520 }
2521 }
2522 buf[len] = 0;
2523 if (cur != stop) {
2524 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2526 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2527 ctxt->wellFormed = 0;
2528 ctxt->disableSAX = 1;
2529 } else {
2530 NEXT;
2531 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002532 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002533 return(buf);
2534}
2535
Daniel Veillard48b2f892001-02-25 16:11:03 +00002536void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002537/**
2538 * xmlParseCharData:
2539 * @ctxt: an XML parser context
2540 * @cdata: int indicating whether we are within a CDATA section
2541 *
2542 * parse a CharData section.
2543 * if we are within a CDATA section ']]>' marks an end of section.
2544 *
2545 * The right angle bracket (>) may be represented using the string "&gt;",
2546 * and must, for compatibility, be escaped using "&gt;" or a character
2547 * reference when it appears in the string "]]>" in content, when that
2548 * string is not marking the end of a CDATA section.
2549 *
2550 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2551 */
2552
2553void
2554xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002555 const xmlChar *in;
2556 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002557 int line = ctxt->input->line;
2558 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002559
2560 SHRINK;
2561 GROW;
2562 /*
2563 * Accelerated common case where input don't need to be
2564 * modified before passing it to the handler.
2565 */
2566 if ((ctxt->token == 0) && (!cdata)) {
2567 in = ctxt->input->cur;
2568 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002569get_more:
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002570 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
Daniel Veillard48b2f892001-02-25 16:11:03 +00002571 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2572 in++;
2573 if (*in == 0xA) {
2574 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002575 in++;
2576 while (*in == 0xA) {
2577 ctxt->input->line++;
2578 in++;
2579 }
2580 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002581 }
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002582 if (*in == ']') {
2583 if ((in[1] == ']') && (in[2] == '>')) {
2584 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2586 ctxt->sax->error(ctxt->userData,
2587 "Sequence ']]>' not allowed in content\n");
2588 ctxt->input->cur = in;
2589 ctxt->wellFormed = 0;
2590 ctxt->disableSAX = 1;
2591 return;
2592 }
2593 in++;
2594 goto get_more;
2595 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002596 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002597 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002598 if (IS_BLANK(*ctxt->input->cur)) {
2599 const xmlChar *tmp = ctxt->input->cur;
2600 ctxt->input->cur = in;
2601 if (areBlanks(ctxt, tmp, nbchar)) {
2602 if (ctxt->sax->ignorableWhitespace != NULL)
2603 ctxt->sax->ignorableWhitespace(ctxt->userData,
2604 tmp, nbchar);
2605 } else {
2606 if (ctxt->sax->characters != NULL)
2607 ctxt->sax->characters(ctxt->userData,
2608 tmp, nbchar);
2609 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002610 line = ctxt->input->line;
2611 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002612 } else {
2613 if (ctxt->sax->characters != NULL)
2614 ctxt->sax->characters(ctxt->userData,
2615 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002616 line = ctxt->input->line;
2617 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002618 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002619 }
2620 ctxt->input->cur = in;
2621 if (*in == 0xD) {
2622 in++;
2623 if (*in == 0xA) {
2624 ctxt->input->cur = in;
2625 in++;
2626 ctxt->input->line++;
2627 continue; /* while */
2628 }
2629 in--;
2630 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002631 if (*in == '<') {
2632 return;
2633 }
2634 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002635 return;
2636 }
2637 SHRINK;
2638 GROW;
2639 in = ctxt->input->cur;
2640 } while ((*in >= 0x20) && (*in <= 0x7F));
2641 nbchar = 0;
2642 }
Daniel Veillard50582112001-03-26 22:52:16 +00002643 ctxt->input->line = line;
2644 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002645 xmlParseCharDataComplex(ctxt, cdata);
2646}
2647
2648void
2649xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002650 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2651 int nbchar = 0;
2652 int cur, l;
2653 int count = 0;
2654
2655 SHRINK;
2656 GROW;
2657 cur = CUR_CHAR(l);
2658 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2659 ((cur != '&') || (ctxt->token == '&')) &&
2660 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2661 if ((cur == ']') && (NXT(1) == ']') &&
2662 (NXT(2) == '>')) {
2663 if (cdata) break;
2664 else {
2665 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2667 ctxt->sax->error(ctxt->userData,
2668 "Sequence ']]>' not allowed in content\n");
2669 /* Should this be relaxed ??? I see a "must here */
2670 ctxt->wellFormed = 0;
2671 ctxt->disableSAX = 1;
2672 }
2673 }
2674 COPY_BUF(l,buf,nbchar,cur);
2675 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2676 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002677 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002678 */
2679 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2680 if (areBlanks(ctxt, buf, nbchar)) {
2681 if (ctxt->sax->ignorableWhitespace != NULL)
2682 ctxt->sax->ignorableWhitespace(ctxt->userData,
2683 buf, nbchar);
2684 } else {
2685 if (ctxt->sax->characters != NULL)
2686 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2687 }
2688 }
2689 nbchar = 0;
2690 }
2691 count++;
2692 if (count > 50) {
2693 GROW;
2694 count = 0;
2695 }
2696 NEXTL(l);
2697 cur = CUR_CHAR(l);
2698 }
2699 if (nbchar != 0) {
2700 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002701 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002702 */
2703 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2704 if (areBlanks(ctxt, buf, nbchar)) {
2705 if (ctxt->sax->ignorableWhitespace != NULL)
2706 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2707 } else {
2708 if (ctxt->sax->characters != NULL)
2709 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2710 }
2711 }
2712 }
2713}
2714
2715/**
2716 * xmlParseExternalID:
2717 * @ctxt: an XML parser context
2718 * @publicID: a xmlChar** receiving PubidLiteral
2719 * @strict: indicate whether we should restrict parsing to only
2720 * production [75], see NOTE below
2721 *
2722 * Parse an External ID or a Public ID
2723 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002724 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002725 * 'PUBLIC' S PubidLiteral S SystemLiteral
2726 *
2727 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2728 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2729 *
2730 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2731 *
2732 * Returns the function returns SystemLiteral and in the second
2733 * case publicID receives PubidLiteral, is strict is off
2734 * it is possible to return NULL and have publicID set.
2735 */
2736
2737xmlChar *
2738xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2739 xmlChar *URI = NULL;
2740
2741 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002742
2743 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002744 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2745 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2746 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2747 SKIP(6);
2748 if (!IS_BLANK(CUR)) {
2749 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2751 ctxt->sax->error(ctxt->userData,
2752 "Space required after 'SYSTEM'\n");
2753 ctxt->wellFormed = 0;
2754 ctxt->disableSAX = 1;
2755 }
2756 SKIP_BLANKS;
2757 URI = xmlParseSystemLiteral(ctxt);
2758 if (URI == NULL) {
2759 ctxt->errNo = XML_ERR_URI_REQUIRED;
2760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2761 ctxt->sax->error(ctxt->userData,
2762 "xmlParseExternalID: SYSTEM, no URI\n");
2763 ctxt->wellFormed = 0;
2764 ctxt->disableSAX = 1;
2765 }
2766 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2767 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2768 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2769 SKIP(6);
2770 if (!IS_BLANK(CUR)) {
2771 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2773 ctxt->sax->error(ctxt->userData,
2774 "Space required after 'PUBLIC'\n");
2775 ctxt->wellFormed = 0;
2776 ctxt->disableSAX = 1;
2777 }
2778 SKIP_BLANKS;
2779 *publicID = xmlParsePubidLiteral(ctxt);
2780 if (*publicID == NULL) {
2781 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2783 ctxt->sax->error(ctxt->userData,
2784 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2785 ctxt->wellFormed = 0;
2786 ctxt->disableSAX = 1;
2787 }
2788 if (strict) {
2789 /*
2790 * We don't handle [83] so "S SystemLiteral" is required.
2791 */
2792 if (!IS_BLANK(CUR)) {
2793 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2795 ctxt->sax->error(ctxt->userData,
2796 "Space required after the Public Identifier\n");
2797 ctxt->wellFormed = 0;
2798 ctxt->disableSAX = 1;
2799 }
2800 } else {
2801 /*
2802 * We handle [83] so we return immediately, if
2803 * "S SystemLiteral" is not detected. From a purely parsing
2804 * point of view that's a nice mess.
2805 */
2806 const xmlChar *ptr;
2807 GROW;
2808
2809 ptr = CUR_PTR;
2810 if (!IS_BLANK(*ptr)) return(NULL);
2811
2812 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2813 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2814 }
2815 SKIP_BLANKS;
2816 URI = xmlParseSystemLiteral(ctxt);
2817 if (URI == NULL) {
2818 ctxt->errNo = XML_ERR_URI_REQUIRED;
2819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2820 ctxt->sax->error(ctxt->userData,
2821 "xmlParseExternalID: PUBLIC, no URI\n");
2822 ctxt->wellFormed = 0;
2823 ctxt->disableSAX = 1;
2824 }
2825 }
2826 return(URI);
2827}
2828
2829/**
2830 * xmlParseComment:
2831 * @ctxt: an XML parser context
2832 *
2833 * Skip an XML (SGML) comment <!-- .... -->
2834 * The spec says that "For compatibility, the string "--" (double-hyphen)
2835 * must not occur within comments. "
2836 *
2837 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2838 */
2839void
2840xmlParseComment(xmlParserCtxtPtr ctxt) {
2841 xmlChar *buf = NULL;
2842 int len;
2843 int size = XML_PARSER_BUFFER_SIZE;
2844 int q, ql;
2845 int r, rl;
2846 int cur, l;
2847 xmlParserInputState state;
2848 xmlParserInputPtr input = ctxt->input;
2849 int count = 0;
2850
2851 /*
2852 * Check that there is a comment right here.
2853 */
2854 if ((RAW != '<') || (NXT(1) != '!') ||
2855 (NXT(2) != '-') || (NXT(3) != '-')) return;
2856
2857 state = ctxt->instate;
2858 ctxt->instate = XML_PARSER_COMMENT;
2859 SHRINK;
2860 SKIP(4);
2861 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2862 if (buf == NULL) {
2863 xmlGenericError(xmlGenericErrorContext,
2864 "malloc of %d byte failed\n", size);
2865 ctxt->instate = state;
2866 return;
2867 }
2868 q = CUR_CHAR(ql);
2869 NEXTL(ql);
2870 r = CUR_CHAR(rl);
2871 NEXTL(rl);
2872 cur = CUR_CHAR(l);
2873 len = 0;
2874 while (IS_CHAR(cur) && /* checked */
2875 ((cur != '>') ||
2876 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002877 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002878 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2880 ctxt->sax->error(ctxt->userData,
2881 "Comment must not contain '--' (double-hyphen)`\n");
2882 ctxt->wellFormed = 0;
2883 ctxt->disableSAX = 1;
2884 }
2885 if (len + 5 >= size) {
2886 size *= 2;
2887 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2888 if (buf == NULL) {
2889 xmlGenericError(xmlGenericErrorContext,
2890 "realloc of %d byte failed\n", size);
2891 ctxt->instate = state;
2892 return;
2893 }
2894 }
2895 COPY_BUF(ql,buf,len,q);
2896 q = r;
2897 ql = rl;
2898 r = cur;
2899 rl = l;
2900
2901 count++;
2902 if (count > 50) {
2903 GROW;
2904 count = 0;
2905 }
2906 NEXTL(l);
2907 cur = CUR_CHAR(l);
2908 if (cur == 0) {
2909 SHRINK;
2910 GROW;
2911 cur = CUR_CHAR(l);
2912 }
2913 }
2914 buf[len] = 0;
2915 if (!IS_CHAR(cur)) {
2916 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2917 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2918 ctxt->sax->error(ctxt->userData,
2919 "Comment not terminated \n<!--%.50s\n", buf);
2920 ctxt->wellFormed = 0;
2921 ctxt->disableSAX = 1;
2922 xmlFree(buf);
2923 } else {
2924 if (input != ctxt->input) {
2925 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2927 ctxt->sax->error(ctxt->userData,
2928"Comment doesn't start and stop in the same entity\n");
2929 ctxt->wellFormed = 0;
2930 ctxt->disableSAX = 1;
2931 }
2932 NEXT;
2933 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2934 (!ctxt->disableSAX))
2935 ctxt->sax->comment(ctxt->userData, buf);
2936 xmlFree(buf);
2937 }
2938 ctxt->instate = state;
2939}
2940
2941/**
2942 * xmlParsePITarget:
2943 * @ctxt: an XML parser context
2944 *
2945 * parse the name of a PI
2946 *
2947 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2948 *
2949 * Returns the PITarget name or NULL
2950 */
2951
2952xmlChar *
2953xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2954 xmlChar *name;
2955
2956 name = xmlParseName(ctxt);
2957 if ((name != NULL) &&
2958 ((name[0] == 'x') || (name[0] == 'X')) &&
2959 ((name[1] == 'm') || (name[1] == 'M')) &&
2960 ((name[2] == 'l') || (name[2] == 'L'))) {
2961 int i;
2962 if ((name[0] == 'x') && (name[1] == 'm') &&
2963 (name[2] == 'l') && (name[3] == 0)) {
2964 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2966 ctxt->sax->error(ctxt->userData,
2967 "XML declaration allowed only at the start of the document\n");
2968 ctxt->wellFormed = 0;
2969 ctxt->disableSAX = 1;
2970 return(name);
2971 } else if (name[3] == 0) {
2972 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2974 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2975 ctxt->wellFormed = 0;
2976 ctxt->disableSAX = 1;
2977 return(name);
2978 }
2979 for (i = 0;;i++) {
2980 if (xmlW3CPIs[i] == NULL) break;
2981 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2982 return(name);
2983 }
2984 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2985 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2986 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002987 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002988 }
2989 }
2990 return(name);
2991}
2992
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002993#ifdef LIBXML_CATALOG_ENABLED
2994/**
2995 * xmlParseCatalogPI:
2996 * @ctxt: an XML parser context
2997 * @catalog: the PI value string
2998 *
2999 * parse an XML Catalog Processing Instruction.
3000 *
3001 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3002 *
3003 * Occurs only if allowed by the user and if happening in the Misc
3004 * part of the document before any doctype informations
3005 * This will add the given catalog to the parsing context in order
3006 * to be used if there is a resolution need further down in the document
3007 */
3008
3009static void
3010xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3011 xmlChar *URL = NULL;
3012 const xmlChar *tmp, *base;
3013 xmlChar marker;
3014
3015 tmp = catalog;
3016 while (IS_BLANK(*tmp)) tmp++;
3017 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3018 goto error;
3019 tmp += 7;
3020 while (IS_BLANK(*tmp)) tmp++;
3021 if (*tmp != '=') {
3022 return;
3023 }
3024 tmp++;
3025 while (IS_BLANK(*tmp)) tmp++;
3026 marker = *tmp;
3027 if ((marker != '\'') && (marker != '"'))
3028 goto error;
3029 tmp++;
3030 base = tmp;
3031 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3032 if (*tmp == 0)
3033 goto error;
3034 URL = xmlStrndup(base, tmp - base);
3035 tmp++;
3036 while (IS_BLANK(*tmp)) tmp++;
3037 if (*tmp != 0)
3038 goto error;
3039
3040 if (URL != NULL) {
3041 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3042 xmlFree(URL);
3043 }
3044 return;
3045
3046error:
3047 ctxt->errNo = XML_WAR_CATALOG_PI;
3048 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3049 ctxt->sax->warning(ctxt->userData,
3050 "Catalog PI syntax error: %s\n", catalog);
3051 if (URL != NULL)
3052 xmlFree(URL);
3053}
3054#endif
3055
Owen Taylor3473f882001-02-23 17:55:21 +00003056/**
3057 * xmlParsePI:
3058 * @ctxt: an XML parser context
3059 *
3060 * parse an XML Processing Instruction.
3061 *
3062 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3063 *
3064 * The processing is transfered to SAX once parsed.
3065 */
3066
3067void
3068xmlParsePI(xmlParserCtxtPtr ctxt) {
3069 xmlChar *buf = NULL;
3070 int len = 0;
3071 int size = XML_PARSER_BUFFER_SIZE;
3072 int cur, l;
3073 xmlChar *target;
3074 xmlParserInputState state;
3075 int count = 0;
3076
3077 if ((RAW == '<') && (NXT(1) == '?')) {
3078 xmlParserInputPtr input = ctxt->input;
3079 state = ctxt->instate;
3080 ctxt->instate = XML_PARSER_PI;
3081 /*
3082 * this is a Processing Instruction.
3083 */
3084 SKIP(2);
3085 SHRINK;
3086
3087 /*
3088 * Parse the target name and check for special support like
3089 * namespace.
3090 */
3091 target = xmlParsePITarget(ctxt);
3092 if (target != NULL) {
3093 if ((RAW == '?') && (NXT(1) == '>')) {
3094 if (input != ctxt->input) {
3095 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3097 ctxt->sax->error(ctxt->userData,
3098 "PI declaration doesn't start and stop in the same entity\n");
3099 ctxt->wellFormed = 0;
3100 ctxt->disableSAX = 1;
3101 }
3102 SKIP(2);
3103
3104 /*
3105 * SAX: PI detected.
3106 */
3107 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3108 (ctxt->sax->processingInstruction != NULL))
3109 ctxt->sax->processingInstruction(ctxt->userData,
3110 target, NULL);
3111 ctxt->instate = state;
3112 xmlFree(target);
3113 return;
3114 }
3115 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3116 if (buf == NULL) {
3117 xmlGenericError(xmlGenericErrorContext,
3118 "malloc of %d byte failed\n", size);
3119 ctxt->instate = state;
3120 return;
3121 }
3122 cur = CUR;
3123 if (!IS_BLANK(cur)) {
3124 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3126 ctxt->sax->error(ctxt->userData,
3127 "xmlParsePI: PI %s space expected\n", target);
3128 ctxt->wellFormed = 0;
3129 ctxt->disableSAX = 1;
3130 }
3131 SKIP_BLANKS;
3132 cur = CUR_CHAR(l);
3133 while (IS_CHAR(cur) && /* checked */
3134 ((cur != '?') || (NXT(1) != '>'))) {
3135 if (len + 5 >= size) {
3136 size *= 2;
3137 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3138 if (buf == NULL) {
3139 xmlGenericError(xmlGenericErrorContext,
3140 "realloc of %d byte failed\n", size);
3141 ctxt->instate = state;
3142 return;
3143 }
3144 }
3145 count++;
3146 if (count > 50) {
3147 GROW;
3148 count = 0;
3149 }
3150 COPY_BUF(l,buf,len,cur);
3151 NEXTL(l);
3152 cur = CUR_CHAR(l);
3153 if (cur == 0) {
3154 SHRINK;
3155 GROW;
3156 cur = CUR_CHAR(l);
3157 }
3158 }
3159 buf[len] = 0;
3160 if (cur != '?') {
3161 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3163 ctxt->sax->error(ctxt->userData,
3164 "xmlParsePI: PI %s never end ...\n", target);
3165 ctxt->wellFormed = 0;
3166 ctxt->disableSAX = 1;
3167 } else {
3168 if (input != ctxt->input) {
3169 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3171 ctxt->sax->error(ctxt->userData,
3172 "PI declaration doesn't start and stop in the same entity\n");
3173 ctxt->wellFormed = 0;
3174 ctxt->disableSAX = 1;
3175 }
3176 SKIP(2);
3177
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003178#ifdef LIBXML_CATALOG_ENABLED
3179 if (((state == XML_PARSER_MISC) ||
3180 (state == XML_PARSER_START)) &&
3181 (xmlStrEqual(target, XML_CATALOG_PI))) {
3182 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3183 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3184 (allow == XML_CATA_ALLOW_ALL))
3185 xmlParseCatalogPI(ctxt, buf);
3186 }
3187#endif
3188
3189
Owen Taylor3473f882001-02-23 17:55:21 +00003190 /*
3191 * SAX: PI detected.
3192 */
3193 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3194 (ctxt->sax->processingInstruction != NULL))
3195 ctxt->sax->processingInstruction(ctxt->userData,
3196 target, buf);
3197 }
3198 xmlFree(buf);
3199 xmlFree(target);
3200 } else {
3201 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3203 ctxt->sax->error(ctxt->userData,
3204 "xmlParsePI : no target name\n");
3205 ctxt->wellFormed = 0;
3206 ctxt->disableSAX = 1;
3207 }
3208 ctxt->instate = state;
3209 }
3210}
3211
3212/**
3213 * xmlParseNotationDecl:
3214 * @ctxt: an XML parser context
3215 *
3216 * parse a notation declaration
3217 *
3218 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3219 *
3220 * Hence there is actually 3 choices:
3221 * 'PUBLIC' S PubidLiteral
3222 * 'PUBLIC' S PubidLiteral S SystemLiteral
3223 * and 'SYSTEM' S SystemLiteral
3224 *
3225 * See the NOTE on xmlParseExternalID().
3226 */
3227
3228void
3229xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3230 xmlChar *name;
3231 xmlChar *Pubid;
3232 xmlChar *Systemid;
3233
3234 if ((RAW == '<') && (NXT(1) == '!') &&
3235 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3236 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3237 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3238 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3239 xmlParserInputPtr input = ctxt->input;
3240 SHRINK;
3241 SKIP(10);
3242 if (!IS_BLANK(CUR)) {
3243 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3245 ctxt->sax->error(ctxt->userData,
3246 "Space required after '<!NOTATION'\n");
3247 ctxt->wellFormed = 0;
3248 ctxt->disableSAX = 1;
3249 return;
3250 }
3251 SKIP_BLANKS;
3252
Daniel Veillard76d66f42001-05-16 21:05:17 +00003253 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003254 if (name == NULL) {
3255 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3257 ctxt->sax->error(ctxt->userData,
3258 "NOTATION: Name expected here\n");
3259 ctxt->wellFormed = 0;
3260 ctxt->disableSAX = 1;
3261 return;
3262 }
3263 if (!IS_BLANK(CUR)) {
3264 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3266 ctxt->sax->error(ctxt->userData,
3267 "Space required after the NOTATION name'\n");
3268 ctxt->wellFormed = 0;
3269 ctxt->disableSAX = 1;
3270 return;
3271 }
3272 SKIP_BLANKS;
3273
3274 /*
3275 * Parse the IDs.
3276 */
3277 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3278 SKIP_BLANKS;
3279
3280 if (RAW == '>') {
3281 if (input != ctxt->input) {
3282 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3284 ctxt->sax->error(ctxt->userData,
3285"Notation declaration doesn't start and stop in the same entity\n");
3286 ctxt->wellFormed = 0;
3287 ctxt->disableSAX = 1;
3288 }
3289 NEXT;
3290 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3291 (ctxt->sax->notationDecl != NULL))
3292 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3293 } else {
3294 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3295 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3296 ctxt->sax->error(ctxt->userData,
3297 "'>' required to close NOTATION declaration\n");
3298 ctxt->wellFormed = 0;
3299 ctxt->disableSAX = 1;
3300 }
3301 xmlFree(name);
3302 if (Systemid != NULL) xmlFree(Systemid);
3303 if (Pubid != NULL) xmlFree(Pubid);
3304 }
3305}
3306
3307/**
3308 * xmlParseEntityDecl:
3309 * @ctxt: an XML parser context
3310 *
3311 * parse <!ENTITY declarations
3312 *
3313 * [70] EntityDecl ::= GEDecl | PEDecl
3314 *
3315 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3316 *
3317 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3318 *
3319 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3320 *
3321 * [74] PEDef ::= EntityValue | ExternalID
3322 *
3323 * [76] NDataDecl ::= S 'NDATA' S Name
3324 *
3325 * [ VC: Notation Declared ]
3326 * The Name must match the declared name of a notation.
3327 */
3328
3329void
3330xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3331 xmlChar *name = NULL;
3332 xmlChar *value = NULL;
3333 xmlChar *URI = NULL, *literal = NULL;
3334 xmlChar *ndata = NULL;
3335 int isParameter = 0;
3336 xmlChar *orig = NULL;
3337
3338 GROW;
3339 if ((RAW == '<') && (NXT(1) == '!') &&
3340 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3341 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3342 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3343 xmlParserInputPtr input = ctxt->input;
3344 ctxt->instate = XML_PARSER_ENTITY_DECL;
3345 SHRINK;
3346 SKIP(8);
3347 if (!IS_BLANK(CUR)) {
3348 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3350 ctxt->sax->error(ctxt->userData,
3351 "Space required after '<!ENTITY'\n");
3352 ctxt->wellFormed = 0;
3353 ctxt->disableSAX = 1;
3354 }
3355 SKIP_BLANKS;
3356
3357 if (RAW == '%') {
3358 NEXT;
3359 if (!IS_BLANK(CUR)) {
3360 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3362 ctxt->sax->error(ctxt->userData,
3363 "Space required after '%'\n");
3364 ctxt->wellFormed = 0;
3365 ctxt->disableSAX = 1;
3366 }
3367 SKIP_BLANKS;
3368 isParameter = 1;
3369 }
3370
Daniel Veillard76d66f42001-05-16 21:05:17 +00003371 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003372 if (name == NULL) {
3373 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3375 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3376 ctxt->wellFormed = 0;
3377 ctxt->disableSAX = 1;
3378 return;
3379 }
3380 if (!IS_BLANK(CUR)) {
3381 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3383 ctxt->sax->error(ctxt->userData,
3384 "Space required after the entity name\n");
3385 ctxt->wellFormed = 0;
3386 ctxt->disableSAX = 1;
3387 }
3388 SKIP_BLANKS;
3389
3390 /*
3391 * handle the various case of definitions...
3392 */
3393 if (isParameter) {
3394 if ((RAW == '"') || (RAW == '\'')) {
3395 value = xmlParseEntityValue(ctxt, &orig);
3396 if (value) {
3397 if ((ctxt->sax != NULL) &&
3398 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3399 ctxt->sax->entityDecl(ctxt->userData, name,
3400 XML_INTERNAL_PARAMETER_ENTITY,
3401 NULL, NULL, value);
3402 }
3403 } else {
3404 URI = xmlParseExternalID(ctxt, &literal, 1);
3405 if ((URI == NULL) && (literal == NULL)) {
3406 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3408 ctxt->sax->error(ctxt->userData,
3409 "Entity value required\n");
3410 ctxt->wellFormed = 0;
3411 ctxt->disableSAX = 1;
3412 }
3413 if (URI) {
3414 xmlURIPtr uri;
3415
3416 uri = xmlParseURI((const char *) URI);
3417 if (uri == NULL) {
3418 ctxt->errNo = XML_ERR_INVALID_URI;
3419 if ((ctxt->sax != NULL) &&
3420 (!ctxt->disableSAX) &&
3421 (ctxt->sax->error != NULL))
3422 ctxt->sax->error(ctxt->userData,
3423 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003424 /*
3425 * This really ought to be a well formedness error
3426 * but the XML Core WG decided otherwise c.f. issue
3427 * E26 of the XML erratas.
3428 */
Owen Taylor3473f882001-02-23 17:55:21 +00003429 } else {
3430 if (uri->fragment != NULL) {
3431 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3432 if ((ctxt->sax != NULL) &&
3433 (!ctxt->disableSAX) &&
3434 (ctxt->sax->error != NULL))
3435 ctxt->sax->error(ctxt->userData,
3436 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003437 /*
3438 * Okay this is foolish to block those but not
3439 * invalid URIs.
3440 */
Owen Taylor3473f882001-02-23 17:55:21 +00003441 ctxt->wellFormed = 0;
3442 } else {
3443 if ((ctxt->sax != NULL) &&
3444 (!ctxt->disableSAX) &&
3445 (ctxt->sax->entityDecl != NULL))
3446 ctxt->sax->entityDecl(ctxt->userData, name,
3447 XML_EXTERNAL_PARAMETER_ENTITY,
3448 literal, URI, NULL);
3449 }
3450 xmlFreeURI(uri);
3451 }
3452 }
3453 }
3454 } else {
3455 if ((RAW == '"') || (RAW == '\'')) {
3456 value = xmlParseEntityValue(ctxt, &orig);
3457 if ((ctxt->sax != NULL) &&
3458 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3459 ctxt->sax->entityDecl(ctxt->userData, name,
3460 XML_INTERNAL_GENERAL_ENTITY,
3461 NULL, NULL, value);
3462 } else {
3463 URI = xmlParseExternalID(ctxt, &literal, 1);
3464 if ((URI == NULL) && (literal == NULL)) {
3465 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3467 ctxt->sax->error(ctxt->userData,
3468 "Entity value required\n");
3469 ctxt->wellFormed = 0;
3470 ctxt->disableSAX = 1;
3471 }
3472 if (URI) {
3473 xmlURIPtr uri;
3474
3475 uri = xmlParseURI((const char *)URI);
3476 if (uri == NULL) {
3477 ctxt->errNo = XML_ERR_INVALID_URI;
3478 if ((ctxt->sax != NULL) &&
3479 (!ctxt->disableSAX) &&
3480 (ctxt->sax->error != NULL))
3481 ctxt->sax->error(ctxt->userData,
3482 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003483 /*
3484 * This really ought to be a well formedness error
3485 * but the XML Core WG decided otherwise c.f. issue
3486 * E26 of the XML erratas.
3487 */
Owen Taylor3473f882001-02-23 17:55:21 +00003488 } else {
3489 if (uri->fragment != NULL) {
3490 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3491 if ((ctxt->sax != NULL) &&
3492 (!ctxt->disableSAX) &&
3493 (ctxt->sax->error != NULL))
3494 ctxt->sax->error(ctxt->userData,
3495 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003496 /*
3497 * Okay this is foolish to block those but not
3498 * invalid URIs.
3499 */
Owen Taylor3473f882001-02-23 17:55:21 +00003500 ctxt->wellFormed = 0;
3501 }
3502 xmlFreeURI(uri);
3503 }
3504 }
3505 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3506 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3508 ctxt->sax->error(ctxt->userData,
3509 "Space required before 'NDATA'\n");
3510 ctxt->wellFormed = 0;
3511 ctxt->disableSAX = 1;
3512 }
3513 SKIP_BLANKS;
3514 if ((RAW == 'N') && (NXT(1) == 'D') &&
3515 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3516 (NXT(4) == 'A')) {
3517 SKIP(5);
3518 if (!IS_BLANK(CUR)) {
3519 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3521 ctxt->sax->error(ctxt->userData,
3522 "Space required after 'NDATA'\n");
3523 ctxt->wellFormed = 0;
3524 ctxt->disableSAX = 1;
3525 }
3526 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003527 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003528 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3529 (ctxt->sax->unparsedEntityDecl != NULL))
3530 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3531 literal, URI, ndata);
3532 } else {
3533 if ((ctxt->sax != NULL) &&
3534 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3535 ctxt->sax->entityDecl(ctxt->userData, name,
3536 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3537 literal, URI, NULL);
3538 }
3539 }
3540 }
3541 SKIP_BLANKS;
3542 if (RAW != '>') {
3543 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3545 ctxt->sax->error(ctxt->userData,
3546 "xmlParseEntityDecl: entity %s not terminated\n", name);
3547 ctxt->wellFormed = 0;
3548 ctxt->disableSAX = 1;
3549 } else {
3550 if (input != ctxt->input) {
3551 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3553 ctxt->sax->error(ctxt->userData,
3554"Entity declaration doesn't start and stop in the same entity\n");
3555 ctxt->wellFormed = 0;
3556 ctxt->disableSAX = 1;
3557 }
3558 NEXT;
3559 }
3560 if (orig != NULL) {
3561 /*
3562 * Ugly mechanism to save the raw entity value.
3563 */
3564 xmlEntityPtr cur = NULL;
3565
3566 if (isParameter) {
3567 if ((ctxt->sax != NULL) &&
3568 (ctxt->sax->getParameterEntity != NULL))
3569 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3570 } else {
3571 if ((ctxt->sax != NULL) &&
3572 (ctxt->sax->getEntity != NULL))
3573 cur = ctxt->sax->getEntity(ctxt->userData, name);
3574 }
3575 if (cur != NULL) {
3576 if (cur->orig != NULL)
3577 xmlFree(orig);
3578 else
3579 cur->orig = orig;
3580 } else
3581 xmlFree(orig);
3582 }
3583 if (name != NULL) xmlFree(name);
3584 if (value != NULL) xmlFree(value);
3585 if (URI != NULL) xmlFree(URI);
3586 if (literal != NULL) xmlFree(literal);
3587 if (ndata != NULL) xmlFree(ndata);
3588 }
3589}
3590
3591/**
3592 * xmlParseDefaultDecl:
3593 * @ctxt: an XML parser context
3594 * @value: Receive a possible fixed default value for the attribute
3595 *
3596 * Parse an attribute default declaration
3597 *
3598 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3599 *
3600 * [ VC: Required Attribute ]
3601 * if the default declaration is the keyword #REQUIRED, then the
3602 * attribute must be specified for all elements of the type in the
3603 * attribute-list declaration.
3604 *
3605 * [ VC: Attribute Default Legal ]
3606 * The declared default value must meet the lexical constraints of
3607 * the declared attribute type c.f. xmlValidateAttributeDecl()
3608 *
3609 * [ VC: Fixed Attribute Default ]
3610 * if an attribute has a default value declared with the #FIXED
3611 * keyword, instances of that attribute must match the default value.
3612 *
3613 * [ WFC: No < in Attribute Values ]
3614 * handled in xmlParseAttValue()
3615 *
3616 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3617 * or XML_ATTRIBUTE_FIXED.
3618 */
3619
3620int
3621xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3622 int val;
3623 xmlChar *ret;
3624
3625 *value = NULL;
3626 if ((RAW == '#') && (NXT(1) == 'R') &&
3627 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3628 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3629 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3630 (NXT(8) == 'D')) {
3631 SKIP(9);
3632 return(XML_ATTRIBUTE_REQUIRED);
3633 }
3634 if ((RAW == '#') && (NXT(1) == 'I') &&
3635 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3636 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3637 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3638 SKIP(8);
3639 return(XML_ATTRIBUTE_IMPLIED);
3640 }
3641 val = XML_ATTRIBUTE_NONE;
3642 if ((RAW == '#') && (NXT(1) == 'F') &&
3643 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3644 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3645 SKIP(6);
3646 val = XML_ATTRIBUTE_FIXED;
3647 if (!IS_BLANK(CUR)) {
3648 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3650 ctxt->sax->error(ctxt->userData,
3651 "Space required after '#FIXED'\n");
3652 ctxt->wellFormed = 0;
3653 ctxt->disableSAX = 1;
3654 }
3655 SKIP_BLANKS;
3656 }
3657 ret = xmlParseAttValue(ctxt);
3658 ctxt->instate = XML_PARSER_DTD;
3659 if (ret == NULL) {
3660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3661 ctxt->sax->error(ctxt->userData,
3662 "Attribute default value declaration error\n");
3663 ctxt->wellFormed = 0;
3664 ctxt->disableSAX = 1;
3665 } else
3666 *value = ret;
3667 return(val);
3668}
3669
3670/**
3671 * xmlParseNotationType:
3672 * @ctxt: an XML parser context
3673 *
3674 * parse an Notation attribute type.
3675 *
3676 * Note: the leading 'NOTATION' S part has already being parsed...
3677 *
3678 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3679 *
3680 * [ VC: Notation Attributes ]
3681 * Values of this type must match one of the notation names included
3682 * in the declaration; all notation names in the declaration must be declared.
3683 *
3684 * Returns: the notation attribute tree built while parsing
3685 */
3686
3687xmlEnumerationPtr
3688xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3689 xmlChar *name;
3690 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3691
3692 if (RAW != '(') {
3693 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3695 ctxt->sax->error(ctxt->userData,
3696 "'(' required to start 'NOTATION'\n");
3697 ctxt->wellFormed = 0;
3698 ctxt->disableSAX = 1;
3699 return(NULL);
3700 }
3701 SHRINK;
3702 do {
3703 NEXT;
3704 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003705 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003706 if (name == NULL) {
3707 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3709 ctxt->sax->error(ctxt->userData,
3710 "Name expected in NOTATION declaration\n");
3711 ctxt->wellFormed = 0;
3712 ctxt->disableSAX = 1;
3713 return(ret);
3714 }
3715 cur = xmlCreateEnumeration(name);
3716 xmlFree(name);
3717 if (cur == NULL) return(ret);
3718 if (last == NULL) ret = last = cur;
3719 else {
3720 last->next = cur;
3721 last = cur;
3722 }
3723 SKIP_BLANKS;
3724 } while (RAW == '|');
3725 if (RAW != ')') {
3726 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3728 ctxt->sax->error(ctxt->userData,
3729 "')' required to finish NOTATION declaration\n");
3730 ctxt->wellFormed = 0;
3731 ctxt->disableSAX = 1;
3732 if ((last != NULL) && (last != ret))
3733 xmlFreeEnumeration(last);
3734 return(ret);
3735 }
3736 NEXT;
3737 return(ret);
3738}
3739
3740/**
3741 * xmlParseEnumerationType:
3742 * @ctxt: an XML parser context
3743 *
3744 * parse an Enumeration attribute type.
3745 *
3746 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3747 *
3748 * [ VC: Enumeration ]
3749 * Values of this type must match one of the Nmtoken tokens in
3750 * the declaration
3751 *
3752 * Returns: the enumeration attribute tree built while parsing
3753 */
3754
3755xmlEnumerationPtr
3756xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3757 xmlChar *name;
3758 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3759
3760 if (RAW != '(') {
3761 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3763 ctxt->sax->error(ctxt->userData,
3764 "'(' required to start ATTLIST enumeration\n");
3765 ctxt->wellFormed = 0;
3766 ctxt->disableSAX = 1;
3767 return(NULL);
3768 }
3769 SHRINK;
3770 do {
3771 NEXT;
3772 SKIP_BLANKS;
3773 name = xmlParseNmtoken(ctxt);
3774 if (name == NULL) {
3775 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3777 ctxt->sax->error(ctxt->userData,
3778 "NmToken expected in ATTLIST enumeration\n");
3779 ctxt->wellFormed = 0;
3780 ctxt->disableSAX = 1;
3781 return(ret);
3782 }
3783 cur = xmlCreateEnumeration(name);
3784 xmlFree(name);
3785 if (cur == NULL) return(ret);
3786 if (last == NULL) ret = last = cur;
3787 else {
3788 last->next = cur;
3789 last = cur;
3790 }
3791 SKIP_BLANKS;
3792 } while (RAW == '|');
3793 if (RAW != ')') {
3794 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3796 ctxt->sax->error(ctxt->userData,
3797 "')' required to finish ATTLIST enumeration\n");
3798 ctxt->wellFormed = 0;
3799 ctxt->disableSAX = 1;
3800 return(ret);
3801 }
3802 NEXT;
3803 return(ret);
3804}
3805
3806/**
3807 * xmlParseEnumeratedType:
3808 * @ctxt: an XML parser context
3809 * @tree: the enumeration tree built while parsing
3810 *
3811 * parse an Enumerated attribute type.
3812 *
3813 * [57] EnumeratedType ::= NotationType | Enumeration
3814 *
3815 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3816 *
3817 *
3818 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3819 */
3820
3821int
3822xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3823 if ((RAW == 'N') && (NXT(1) == 'O') &&
3824 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3825 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3826 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3827 SKIP(8);
3828 if (!IS_BLANK(CUR)) {
3829 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3831 ctxt->sax->error(ctxt->userData,
3832 "Space required after 'NOTATION'\n");
3833 ctxt->wellFormed = 0;
3834 ctxt->disableSAX = 1;
3835 return(0);
3836 }
3837 SKIP_BLANKS;
3838 *tree = xmlParseNotationType(ctxt);
3839 if (*tree == NULL) return(0);
3840 return(XML_ATTRIBUTE_NOTATION);
3841 }
3842 *tree = xmlParseEnumerationType(ctxt);
3843 if (*tree == NULL) return(0);
3844 return(XML_ATTRIBUTE_ENUMERATION);
3845}
3846
3847/**
3848 * xmlParseAttributeType:
3849 * @ctxt: an XML parser context
3850 * @tree: the enumeration tree built while parsing
3851 *
3852 * parse the Attribute list def for an element
3853 *
3854 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3855 *
3856 * [55] StringType ::= 'CDATA'
3857 *
3858 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3859 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3860 *
3861 * Validity constraints for attribute values syntax are checked in
3862 * xmlValidateAttributeValue()
3863 *
3864 * [ VC: ID ]
3865 * Values of type ID must match the Name production. A name must not
3866 * appear more than once in an XML document as a value of this type;
3867 * i.e., ID values must uniquely identify the elements which bear them.
3868 *
3869 * [ VC: One ID per Element Type ]
3870 * No element type may have more than one ID attribute specified.
3871 *
3872 * [ VC: ID Attribute Default ]
3873 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3874 *
3875 * [ VC: IDREF ]
3876 * Values of type IDREF must match the Name production, and values
3877 * of type IDREFS must match Names; each IDREF Name must match the value
3878 * of an ID attribute on some element in the XML document; i.e. IDREF
3879 * values must match the value of some ID attribute.
3880 *
3881 * [ VC: Entity Name ]
3882 * Values of type ENTITY must match the Name production, values
3883 * of type ENTITIES must match Names; each Entity Name must match the
3884 * name of an unparsed entity declared in the DTD.
3885 *
3886 * [ VC: Name Token ]
3887 * Values of type NMTOKEN must match the Nmtoken production; values
3888 * of type NMTOKENS must match Nmtokens.
3889 *
3890 * Returns the attribute type
3891 */
3892int
3893xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3894 SHRINK;
3895 if ((RAW == 'C') && (NXT(1) == 'D') &&
3896 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3897 (NXT(4) == 'A')) {
3898 SKIP(5);
3899 return(XML_ATTRIBUTE_CDATA);
3900 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3901 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3902 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3903 SKIP(6);
3904 return(XML_ATTRIBUTE_IDREFS);
3905 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3906 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3907 (NXT(4) == 'F')) {
3908 SKIP(5);
3909 return(XML_ATTRIBUTE_IDREF);
3910 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3911 SKIP(2);
3912 return(XML_ATTRIBUTE_ID);
3913 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3914 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3915 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3916 SKIP(6);
3917 return(XML_ATTRIBUTE_ENTITY);
3918 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3919 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3920 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3921 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3922 SKIP(8);
3923 return(XML_ATTRIBUTE_ENTITIES);
3924 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3925 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3926 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3927 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3928 SKIP(8);
3929 return(XML_ATTRIBUTE_NMTOKENS);
3930 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3931 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3932 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3933 (NXT(6) == 'N')) {
3934 SKIP(7);
3935 return(XML_ATTRIBUTE_NMTOKEN);
3936 }
3937 return(xmlParseEnumeratedType(ctxt, tree));
3938}
3939
3940/**
3941 * xmlParseAttributeListDecl:
3942 * @ctxt: an XML parser context
3943 *
3944 * : parse the Attribute list def for an element
3945 *
3946 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3947 *
3948 * [53] AttDef ::= S Name S AttType S DefaultDecl
3949 *
3950 */
3951void
3952xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3953 xmlChar *elemName;
3954 xmlChar *attrName;
3955 xmlEnumerationPtr tree;
3956
3957 if ((RAW == '<') && (NXT(1) == '!') &&
3958 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3959 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3960 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3961 (NXT(8) == 'T')) {
3962 xmlParserInputPtr input = ctxt->input;
3963
3964 SKIP(9);
3965 if (!IS_BLANK(CUR)) {
3966 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3967 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3968 ctxt->sax->error(ctxt->userData,
3969 "Space required after '<!ATTLIST'\n");
3970 ctxt->wellFormed = 0;
3971 ctxt->disableSAX = 1;
3972 }
3973 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003974 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003975 if (elemName == NULL) {
3976 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3978 ctxt->sax->error(ctxt->userData,
3979 "ATTLIST: no name for Element\n");
3980 ctxt->wellFormed = 0;
3981 ctxt->disableSAX = 1;
3982 return;
3983 }
3984 SKIP_BLANKS;
3985 GROW;
3986 while (RAW != '>') {
3987 const xmlChar *check = CUR_PTR;
3988 int type;
3989 int def;
3990 xmlChar *defaultValue = NULL;
3991
3992 GROW;
3993 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003994 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003995 if (attrName == NULL) {
3996 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3998 ctxt->sax->error(ctxt->userData,
3999 "ATTLIST: no name for Attribute\n");
4000 ctxt->wellFormed = 0;
4001 ctxt->disableSAX = 1;
4002 break;
4003 }
4004 GROW;
4005 if (!IS_BLANK(CUR)) {
4006 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4008 ctxt->sax->error(ctxt->userData,
4009 "Space required after the attribute name\n");
4010 ctxt->wellFormed = 0;
4011 ctxt->disableSAX = 1;
4012 if (attrName != NULL)
4013 xmlFree(attrName);
4014 if (defaultValue != NULL)
4015 xmlFree(defaultValue);
4016 break;
4017 }
4018 SKIP_BLANKS;
4019
4020 type = xmlParseAttributeType(ctxt, &tree);
4021 if (type <= 0) {
4022 if (attrName != NULL)
4023 xmlFree(attrName);
4024 if (defaultValue != NULL)
4025 xmlFree(defaultValue);
4026 break;
4027 }
4028
4029 GROW;
4030 if (!IS_BLANK(CUR)) {
4031 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4033 ctxt->sax->error(ctxt->userData,
4034 "Space required after the attribute type\n");
4035 ctxt->wellFormed = 0;
4036 ctxt->disableSAX = 1;
4037 if (attrName != NULL)
4038 xmlFree(attrName);
4039 if (defaultValue != NULL)
4040 xmlFree(defaultValue);
4041 if (tree != NULL)
4042 xmlFreeEnumeration(tree);
4043 break;
4044 }
4045 SKIP_BLANKS;
4046
4047 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4048 if (def <= 0) {
4049 if (attrName != NULL)
4050 xmlFree(attrName);
4051 if (defaultValue != NULL)
4052 xmlFree(defaultValue);
4053 if (tree != NULL)
4054 xmlFreeEnumeration(tree);
4055 break;
4056 }
4057
4058 GROW;
4059 if (RAW != '>') {
4060 if (!IS_BLANK(CUR)) {
4061 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4063 ctxt->sax->error(ctxt->userData,
4064 "Space required after the attribute default value\n");
4065 ctxt->wellFormed = 0;
4066 ctxt->disableSAX = 1;
4067 if (attrName != NULL)
4068 xmlFree(attrName);
4069 if (defaultValue != NULL)
4070 xmlFree(defaultValue);
4071 if (tree != NULL)
4072 xmlFreeEnumeration(tree);
4073 break;
4074 }
4075 SKIP_BLANKS;
4076 }
4077 if (check == CUR_PTR) {
4078 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4080 ctxt->sax->error(ctxt->userData,
4081 "xmlParseAttributeListDecl: detected internal error\n");
4082 if (attrName != NULL)
4083 xmlFree(attrName);
4084 if (defaultValue != NULL)
4085 xmlFree(defaultValue);
4086 if (tree != NULL)
4087 xmlFreeEnumeration(tree);
4088 break;
4089 }
4090 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4091 (ctxt->sax->attributeDecl != NULL))
4092 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4093 type, def, defaultValue, tree);
4094 if (attrName != NULL)
4095 xmlFree(attrName);
4096 if (defaultValue != NULL)
4097 xmlFree(defaultValue);
4098 GROW;
4099 }
4100 if (RAW == '>') {
4101 if (input != ctxt->input) {
4102 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4104 ctxt->sax->error(ctxt->userData,
4105"Attribute list declaration doesn't start and stop in the same entity\n");
4106 ctxt->wellFormed = 0;
4107 ctxt->disableSAX = 1;
4108 }
4109 NEXT;
4110 }
4111
4112 xmlFree(elemName);
4113 }
4114}
4115
4116/**
4117 * xmlParseElementMixedContentDecl:
4118 * @ctxt: an XML parser context
4119 *
4120 * parse the declaration for a Mixed Element content
4121 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4122 *
4123 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4124 * '(' S? '#PCDATA' S? ')'
4125 *
4126 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4127 *
4128 * [ VC: No Duplicate Types ]
4129 * The same name must not appear more than once in a single
4130 * mixed-content declaration.
4131 *
4132 * returns: the list of the xmlElementContentPtr describing the element choices
4133 */
4134xmlElementContentPtr
4135xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
4136 xmlElementContentPtr ret = NULL, cur = NULL, n;
4137 xmlChar *elem = NULL;
4138
4139 GROW;
4140 if ((RAW == '#') && (NXT(1) == 'P') &&
4141 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4142 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4143 (NXT(6) == 'A')) {
4144 SKIP(7);
4145 SKIP_BLANKS;
4146 SHRINK;
4147 if (RAW == ')') {
4148 ctxt->entity = ctxt->input;
4149 NEXT;
4150 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4151 if (RAW == '*') {
4152 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4153 NEXT;
4154 }
4155 return(ret);
4156 }
4157 if ((RAW == '(') || (RAW == '|')) {
4158 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4159 if (ret == NULL) return(NULL);
4160 }
4161 while (RAW == '|') {
4162 NEXT;
4163 if (elem == NULL) {
4164 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4165 if (ret == NULL) return(NULL);
4166 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004167 if (cur != NULL)
4168 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004169 cur = ret;
4170 } else {
4171 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4172 if (n == NULL) return(NULL);
4173 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004174 if (n->c1 != NULL)
4175 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004176 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004177 if (n != NULL)
4178 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004179 cur = n;
4180 xmlFree(elem);
4181 }
4182 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004183 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004184 if (elem == NULL) {
4185 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4187 ctxt->sax->error(ctxt->userData,
4188 "xmlParseElementMixedContentDecl : Name expected\n");
4189 ctxt->wellFormed = 0;
4190 ctxt->disableSAX = 1;
4191 xmlFreeElementContent(cur);
4192 return(NULL);
4193 }
4194 SKIP_BLANKS;
4195 GROW;
4196 }
4197 if ((RAW == ')') && (NXT(1) == '*')) {
4198 if (elem != NULL) {
4199 cur->c2 = xmlNewElementContent(elem,
4200 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004201 if (cur->c2 != NULL)
4202 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004203 xmlFree(elem);
4204 }
4205 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4206 ctxt->entity = ctxt->input;
4207 SKIP(2);
4208 } else {
4209 if (elem != NULL) xmlFree(elem);
4210 xmlFreeElementContent(ret);
4211 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4213 ctxt->sax->error(ctxt->userData,
4214 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4215 ctxt->wellFormed = 0;
4216 ctxt->disableSAX = 1;
4217 return(NULL);
4218 }
4219
4220 } else {
4221 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4223 ctxt->sax->error(ctxt->userData,
4224 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4225 ctxt->wellFormed = 0;
4226 ctxt->disableSAX = 1;
4227 }
4228 return(ret);
4229}
4230
4231/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004232 * xmlParseElementChildrenContentD:
4233 * @ctxt: an XML parser context
4234 *
4235 * VMS version of xmlParseElementChildrenContentDecl()
4236 *
4237 * Returns the tree of xmlElementContentPtr describing the element
4238 * hierarchy.
4239 */
4240/**
Owen Taylor3473f882001-02-23 17:55:21 +00004241 * xmlParseElementChildrenContentDecl:
4242 * @ctxt: an XML parser context
4243 *
4244 * parse the declaration for a Mixed Element content
4245 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4246 *
4247 *
4248 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4249 *
4250 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4251 *
4252 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4253 *
4254 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4255 *
4256 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4257 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004258 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004259 * opening or closing parentheses in a choice, seq, or Mixed
4260 * construct is contained in the replacement text for a parameter
4261 * entity, both must be contained in the same replacement text. For
4262 * interoperability, if a parameter-entity reference appears in a
4263 * choice, seq, or Mixed construct, its replacement text should not
4264 * be empty, and neither the first nor last non-blank character of
4265 * the replacement text should be a connector (| or ,).
4266 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004267 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004268 * hierarchy.
4269 */
4270xmlElementContentPtr
4271#ifdef VMS
4272xmlParseElementChildrenContentD
4273#else
4274xmlParseElementChildrenContentDecl
4275#endif
4276(xmlParserCtxtPtr ctxt) {
4277 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4278 xmlChar *elem;
4279 xmlChar type = 0;
4280
4281 SKIP_BLANKS;
4282 GROW;
4283 if (RAW == '(') {
4284 /* Recurse on first child */
4285 NEXT;
4286 SKIP_BLANKS;
4287 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4288 SKIP_BLANKS;
4289 GROW;
4290 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004291 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004292 if (elem == NULL) {
4293 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4295 ctxt->sax->error(ctxt->userData,
4296 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4297 ctxt->wellFormed = 0;
4298 ctxt->disableSAX = 1;
4299 return(NULL);
4300 }
4301 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4302 GROW;
4303 if (RAW == '?') {
4304 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4305 NEXT;
4306 } else if (RAW == '*') {
4307 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4308 NEXT;
4309 } else if (RAW == '+') {
4310 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4311 NEXT;
4312 } else {
4313 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4314 }
4315 xmlFree(elem);
4316 GROW;
4317 }
4318 SKIP_BLANKS;
4319 SHRINK;
4320 while (RAW != ')') {
4321 /*
4322 * Each loop we parse one separator and one element.
4323 */
4324 if (RAW == ',') {
4325 if (type == 0) type = CUR;
4326
4327 /*
4328 * Detect "Name | Name , Name" error
4329 */
4330 else if (type != CUR) {
4331 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4333 ctxt->sax->error(ctxt->userData,
4334 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4335 type);
4336 ctxt->wellFormed = 0;
4337 ctxt->disableSAX = 1;
4338 if ((op != NULL) && (op != ret))
4339 xmlFreeElementContent(op);
4340 if ((last != NULL) && (last != ret) &&
4341 (last != ret->c1) && (last != ret->c2))
4342 xmlFreeElementContent(last);
4343 if (ret != NULL)
4344 xmlFreeElementContent(ret);
4345 return(NULL);
4346 }
4347 NEXT;
4348
4349 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4350 if (op == NULL) {
4351 xmlFreeElementContent(ret);
4352 return(NULL);
4353 }
4354 if (last == NULL) {
4355 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004356 if (ret != NULL)
4357 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004358 ret = cur = op;
4359 } else {
4360 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004361 if (op != NULL)
4362 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004363 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004364 if (last != NULL)
4365 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004366 cur =op;
4367 last = NULL;
4368 }
4369 } else if (RAW == '|') {
4370 if (type == 0) type = CUR;
4371
4372 /*
4373 * Detect "Name , Name | Name" error
4374 */
4375 else if (type != CUR) {
4376 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4378 ctxt->sax->error(ctxt->userData,
4379 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4380 type);
4381 ctxt->wellFormed = 0;
4382 ctxt->disableSAX = 1;
4383 if ((op != NULL) && (op != ret) && (op != last))
4384 xmlFreeElementContent(op);
4385 if ((last != NULL) && (last != ret) &&
4386 (last != ret->c1) && (last != ret->c2))
4387 xmlFreeElementContent(last);
4388 if (ret != NULL)
4389 xmlFreeElementContent(ret);
4390 return(NULL);
4391 }
4392 NEXT;
4393
4394 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4395 if (op == NULL) {
4396 if ((op != NULL) && (op != ret))
4397 xmlFreeElementContent(op);
4398 if ((last != NULL) && (last != ret) &&
4399 (last != ret->c1) && (last != ret->c2))
4400 xmlFreeElementContent(last);
4401 if (ret != NULL)
4402 xmlFreeElementContent(ret);
4403 return(NULL);
4404 }
4405 if (last == NULL) {
4406 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004407 if (ret != NULL)
4408 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004409 ret = cur = op;
4410 } else {
4411 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004412 if (op != NULL)
4413 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004414 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004415 if (last != NULL)
4416 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004417 cur =op;
4418 last = NULL;
4419 }
4420 } else {
4421 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4423 ctxt->sax->error(ctxt->userData,
4424 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4425 ctxt->wellFormed = 0;
4426 ctxt->disableSAX = 1;
4427 if ((op != NULL) && (op != ret))
4428 xmlFreeElementContent(op);
4429 if ((last != NULL) && (last != ret) &&
4430 (last != ret->c1) && (last != ret->c2))
4431 xmlFreeElementContent(last);
4432 if (ret != NULL)
4433 xmlFreeElementContent(ret);
4434 return(NULL);
4435 }
4436 GROW;
4437 SKIP_BLANKS;
4438 GROW;
4439 if (RAW == '(') {
4440 /* Recurse on second child */
4441 NEXT;
4442 SKIP_BLANKS;
4443 last = xmlParseElementChildrenContentDecl(ctxt);
4444 SKIP_BLANKS;
4445 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004446 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004447 if (elem == NULL) {
4448 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4450 ctxt->sax->error(ctxt->userData,
4451 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4452 ctxt->wellFormed = 0;
4453 ctxt->disableSAX = 1;
4454 if ((op != NULL) && (op != ret))
4455 xmlFreeElementContent(op);
4456 if ((last != NULL) && (last != ret) &&
4457 (last != ret->c1) && (last != ret->c2))
4458 xmlFreeElementContent(last);
4459 if (ret != NULL)
4460 xmlFreeElementContent(ret);
4461 return(NULL);
4462 }
4463 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4464 xmlFree(elem);
4465 if (RAW == '?') {
4466 last->ocur = XML_ELEMENT_CONTENT_OPT;
4467 NEXT;
4468 } else if (RAW == '*') {
4469 last->ocur = XML_ELEMENT_CONTENT_MULT;
4470 NEXT;
4471 } else if (RAW == '+') {
4472 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4473 NEXT;
4474 } else {
4475 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4476 }
4477 }
4478 SKIP_BLANKS;
4479 GROW;
4480 }
4481 if ((cur != NULL) && (last != NULL)) {
4482 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004483 if (last != NULL)
4484 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004485 }
4486 ctxt->entity = ctxt->input;
4487 NEXT;
4488 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004489 if (ret != NULL)
4490 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004491 NEXT;
4492 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004493 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004494 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004495 cur = ret;
4496 /*
4497 * Some normalization:
4498 * (a | b* | c?)* == (a | b | c)*
4499 */
4500 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4501 if ((cur->c1 != NULL) &&
4502 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4503 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4504 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4505 if ((cur->c2 != NULL) &&
4506 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4507 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4508 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4509 cur = cur->c2;
4510 }
4511 }
Owen Taylor3473f882001-02-23 17:55:21 +00004512 NEXT;
4513 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004514 if (ret != NULL) {
4515 int found = 0;
4516
Daniel Veillarde470df72001-04-18 21:41:07 +00004517 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004518 /*
4519 * Some normalization:
4520 * (a | b*)+ == (a | b)*
4521 * (a | b?)+ == (a | b)*
4522 */
4523 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4524 if ((cur->c1 != NULL) &&
4525 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4526 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4527 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4528 found = 1;
4529 }
4530 if ((cur->c2 != NULL) &&
4531 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4532 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4533 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4534 found = 1;
4535 }
4536 cur = cur->c2;
4537 }
4538 if (found)
4539 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4540 }
Owen Taylor3473f882001-02-23 17:55:21 +00004541 NEXT;
4542 }
4543 return(ret);
4544}
4545
4546/**
4547 * xmlParseElementContentDecl:
4548 * @ctxt: an XML parser context
4549 * @name: the name of the element being defined.
4550 * @result: the Element Content pointer will be stored here if any
4551 *
4552 * parse the declaration for an Element content either Mixed or Children,
4553 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4554 *
4555 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4556 *
4557 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4558 */
4559
4560int
4561xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4562 xmlElementContentPtr *result) {
4563
4564 xmlElementContentPtr tree = NULL;
4565 xmlParserInputPtr input = ctxt->input;
4566 int res;
4567
4568 *result = NULL;
4569
4570 if (RAW != '(') {
4571 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4573 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004574 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004575 ctxt->wellFormed = 0;
4576 ctxt->disableSAX = 1;
4577 return(-1);
4578 }
4579 NEXT;
4580 GROW;
4581 SKIP_BLANKS;
4582 if ((RAW == '#') && (NXT(1) == 'P') &&
4583 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4584 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4585 (NXT(6) == 'A')) {
4586 tree = xmlParseElementMixedContentDecl(ctxt);
4587 res = XML_ELEMENT_TYPE_MIXED;
4588 } else {
4589 tree = xmlParseElementChildrenContentDecl(ctxt);
4590 res = XML_ELEMENT_TYPE_ELEMENT;
4591 }
4592 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4593 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4595 ctxt->sax->error(ctxt->userData,
4596"Element content declaration doesn't start and stop in the same entity\n");
4597 ctxt->wellFormed = 0;
4598 ctxt->disableSAX = 1;
4599 }
4600 SKIP_BLANKS;
4601 *result = tree;
4602 return(res);
4603}
4604
4605/**
4606 * xmlParseElementDecl:
4607 * @ctxt: an XML parser context
4608 *
4609 * parse an Element declaration.
4610 *
4611 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4612 *
4613 * [ VC: Unique Element Type Declaration ]
4614 * No element type may be declared more than once
4615 *
4616 * Returns the type of the element, or -1 in case of error
4617 */
4618int
4619xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4620 xmlChar *name;
4621 int ret = -1;
4622 xmlElementContentPtr content = NULL;
4623
4624 GROW;
4625 if ((RAW == '<') && (NXT(1) == '!') &&
4626 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4627 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4628 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4629 (NXT(8) == 'T')) {
4630 xmlParserInputPtr input = ctxt->input;
4631
4632 SKIP(9);
4633 if (!IS_BLANK(CUR)) {
4634 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4636 ctxt->sax->error(ctxt->userData,
4637 "Space required after 'ELEMENT'\n");
4638 ctxt->wellFormed = 0;
4639 ctxt->disableSAX = 1;
4640 }
4641 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004642 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004643 if (name == NULL) {
4644 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4646 ctxt->sax->error(ctxt->userData,
4647 "xmlParseElementDecl: no name for Element\n");
4648 ctxt->wellFormed = 0;
4649 ctxt->disableSAX = 1;
4650 return(-1);
4651 }
4652 while ((RAW == 0) && (ctxt->inputNr > 1))
4653 xmlPopInput(ctxt);
4654 if (!IS_BLANK(CUR)) {
4655 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4657 ctxt->sax->error(ctxt->userData,
4658 "Space required after the element name\n");
4659 ctxt->wellFormed = 0;
4660 ctxt->disableSAX = 1;
4661 }
4662 SKIP_BLANKS;
4663 if ((RAW == 'E') && (NXT(1) == 'M') &&
4664 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4665 (NXT(4) == 'Y')) {
4666 SKIP(5);
4667 /*
4668 * Element must always be empty.
4669 */
4670 ret = XML_ELEMENT_TYPE_EMPTY;
4671 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4672 (NXT(2) == 'Y')) {
4673 SKIP(3);
4674 /*
4675 * Element is a generic container.
4676 */
4677 ret = XML_ELEMENT_TYPE_ANY;
4678 } else if (RAW == '(') {
4679 ret = xmlParseElementContentDecl(ctxt, name, &content);
4680 } else {
4681 /*
4682 * [ WFC: PEs in Internal Subset ] error handling.
4683 */
4684 if ((RAW == '%') && (ctxt->external == 0) &&
4685 (ctxt->inputNr == 1)) {
4686 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4688 ctxt->sax->error(ctxt->userData,
4689 "PEReference: forbidden within markup decl in internal subset\n");
4690 } else {
4691 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4693 ctxt->sax->error(ctxt->userData,
4694 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4695 }
4696 ctxt->wellFormed = 0;
4697 ctxt->disableSAX = 1;
4698 if (name != NULL) xmlFree(name);
4699 return(-1);
4700 }
4701
4702 SKIP_BLANKS;
4703 /*
4704 * Pop-up of finished entities.
4705 */
4706 while ((RAW == 0) && (ctxt->inputNr > 1))
4707 xmlPopInput(ctxt);
4708 SKIP_BLANKS;
4709
4710 if (RAW != '>') {
4711 ctxt->errNo = XML_ERR_GT_REQUIRED;
4712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4713 ctxt->sax->error(ctxt->userData,
4714 "xmlParseElementDecl: expected '>' at the end\n");
4715 ctxt->wellFormed = 0;
4716 ctxt->disableSAX = 1;
4717 } else {
4718 if (input != ctxt->input) {
4719 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4721 ctxt->sax->error(ctxt->userData,
4722"Element declaration doesn't start and stop in the same entity\n");
4723 ctxt->wellFormed = 0;
4724 ctxt->disableSAX = 1;
4725 }
4726
4727 NEXT;
4728 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4729 (ctxt->sax->elementDecl != NULL))
4730 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4731 content);
4732 }
4733 if (content != NULL) {
4734 xmlFreeElementContent(content);
4735 }
4736 if (name != NULL) {
4737 xmlFree(name);
4738 }
4739 }
4740 return(ret);
4741}
4742
4743/**
Owen Taylor3473f882001-02-23 17:55:21 +00004744 * xmlParseConditionalSections
4745 * @ctxt: an XML parser context
4746 *
4747 * [61] conditionalSect ::= includeSect | ignoreSect
4748 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4749 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4750 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4751 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4752 */
4753
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004754static void
Owen Taylor3473f882001-02-23 17:55:21 +00004755xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4756 SKIP(3);
4757 SKIP_BLANKS;
4758 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4759 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4760 (NXT(6) == 'E')) {
4761 SKIP(7);
4762 SKIP_BLANKS;
4763 if (RAW != '[') {
4764 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4766 ctxt->sax->error(ctxt->userData,
4767 "XML conditional section '[' expected\n");
4768 ctxt->wellFormed = 0;
4769 ctxt->disableSAX = 1;
4770 } else {
4771 NEXT;
4772 }
4773 if (xmlParserDebugEntities) {
4774 if ((ctxt->input != NULL) && (ctxt->input->filename))
4775 xmlGenericError(xmlGenericErrorContext,
4776 "%s(%d): ", ctxt->input->filename,
4777 ctxt->input->line);
4778 xmlGenericError(xmlGenericErrorContext,
4779 "Entering INCLUDE Conditional Section\n");
4780 }
4781
4782 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4783 (NXT(2) != '>'))) {
4784 const xmlChar *check = CUR_PTR;
4785 int cons = ctxt->input->consumed;
4786 int tok = ctxt->token;
4787
4788 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4789 xmlParseConditionalSections(ctxt);
4790 } else if (IS_BLANK(CUR)) {
4791 NEXT;
4792 } else if (RAW == '%') {
4793 xmlParsePEReference(ctxt);
4794 } else
4795 xmlParseMarkupDecl(ctxt);
4796
4797 /*
4798 * Pop-up of finished entities.
4799 */
4800 while ((RAW == 0) && (ctxt->inputNr > 1))
4801 xmlPopInput(ctxt);
4802
4803 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4804 (tok == ctxt->token)) {
4805 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4807 ctxt->sax->error(ctxt->userData,
4808 "Content error in the external subset\n");
4809 ctxt->wellFormed = 0;
4810 ctxt->disableSAX = 1;
4811 break;
4812 }
4813 }
4814 if (xmlParserDebugEntities) {
4815 if ((ctxt->input != NULL) && (ctxt->input->filename))
4816 xmlGenericError(xmlGenericErrorContext,
4817 "%s(%d): ", ctxt->input->filename,
4818 ctxt->input->line);
4819 xmlGenericError(xmlGenericErrorContext,
4820 "Leaving INCLUDE Conditional Section\n");
4821 }
4822
4823 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4824 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4825 int state;
4826 int instate;
4827 int depth = 0;
4828
4829 SKIP(6);
4830 SKIP_BLANKS;
4831 if (RAW != '[') {
4832 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4834 ctxt->sax->error(ctxt->userData,
4835 "XML conditional section '[' expected\n");
4836 ctxt->wellFormed = 0;
4837 ctxt->disableSAX = 1;
4838 } else {
4839 NEXT;
4840 }
4841 if (xmlParserDebugEntities) {
4842 if ((ctxt->input != NULL) && (ctxt->input->filename))
4843 xmlGenericError(xmlGenericErrorContext,
4844 "%s(%d): ", ctxt->input->filename,
4845 ctxt->input->line);
4846 xmlGenericError(xmlGenericErrorContext,
4847 "Entering IGNORE Conditional Section\n");
4848 }
4849
4850 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004851 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004852 * But disable SAX event generating DTD building in the meantime
4853 */
4854 state = ctxt->disableSAX;
4855 instate = ctxt->instate;
4856 ctxt->disableSAX = 1;
4857 ctxt->instate = XML_PARSER_IGNORE;
4858
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004859 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004860 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4861 depth++;
4862 SKIP(3);
4863 continue;
4864 }
4865 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4866 if (--depth >= 0) SKIP(3);
4867 continue;
4868 }
4869 NEXT;
4870 continue;
4871 }
4872
4873 ctxt->disableSAX = state;
4874 ctxt->instate = instate;
4875
4876 if (xmlParserDebugEntities) {
4877 if ((ctxt->input != NULL) && (ctxt->input->filename))
4878 xmlGenericError(xmlGenericErrorContext,
4879 "%s(%d): ", ctxt->input->filename,
4880 ctxt->input->line);
4881 xmlGenericError(xmlGenericErrorContext,
4882 "Leaving IGNORE Conditional Section\n");
4883 }
4884
4885 } else {
4886 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4888 ctxt->sax->error(ctxt->userData,
4889 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4890 ctxt->wellFormed = 0;
4891 ctxt->disableSAX = 1;
4892 }
4893
4894 if (RAW == 0)
4895 SHRINK;
4896
4897 if (RAW == 0) {
4898 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4900 ctxt->sax->error(ctxt->userData,
4901 "XML conditional section not closed\n");
4902 ctxt->wellFormed = 0;
4903 ctxt->disableSAX = 1;
4904 } else {
4905 SKIP(3);
4906 }
4907}
4908
4909/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004910 * xmlParseMarkupDecl:
4911 * @ctxt: an XML parser context
4912 *
4913 * parse Markup declarations
4914 *
4915 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4916 * NotationDecl | PI | Comment
4917 *
4918 * [ VC: Proper Declaration/PE Nesting ]
4919 * Parameter-entity replacement text must be properly nested with
4920 * markup declarations. That is to say, if either the first character
4921 * or the last character of a markup declaration (markupdecl above) is
4922 * contained in the replacement text for a parameter-entity reference,
4923 * both must be contained in the same replacement text.
4924 *
4925 * [ WFC: PEs in Internal Subset ]
4926 * In the internal DTD subset, parameter-entity references can occur
4927 * only where markup declarations can occur, not within markup declarations.
4928 * (This does not apply to references that occur in external parameter
4929 * entities or to the external subset.)
4930 */
4931void
4932xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4933 GROW;
4934 xmlParseElementDecl(ctxt);
4935 xmlParseAttributeListDecl(ctxt);
4936 xmlParseEntityDecl(ctxt);
4937 xmlParseNotationDecl(ctxt);
4938 xmlParsePI(ctxt);
4939 xmlParseComment(ctxt);
4940 /*
4941 * This is only for internal subset. On external entities,
4942 * the replacement is done before parsing stage
4943 */
4944 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4945 xmlParsePEReference(ctxt);
4946
4947 /*
4948 * Conditional sections are allowed from entities included
4949 * by PE References in the internal subset.
4950 */
4951 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4952 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4953 xmlParseConditionalSections(ctxt);
4954 }
4955 }
4956
4957 ctxt->instate = XML_PARSER_DTD;
4958}
4959
4960/**
4961 * xmlParseTextDecl:
4962 * @ctxt: an XML parser context
4963 *
4964 * parse an XML declaration header for external entities
4965 *
4966 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4967 *
4968 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4969 */
4970
4971void
4972xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4973 xmlChar *version;
4974
4975 /*
4976 * We know that '<?xml' is here.
4977 */
4978 if ((RAW == '<') && (NXT(1) == '?') &&
4979 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4980 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4981 SKIP(5);
4982 } else {
4983 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4985 ctxt->sax->error(ctxt->userData,
4986 "Text declaration '<?xml' required\n");
4987 ctxt->wellFormed = 0;
4988 ctxt->disableSAX = 1;
4989
4990 return;
4991 }
4992
4993 if (!IS_BLANK(CUR)) {
4994 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4996 ctxt->sax->error(ctxt->userData,
4997 "Space needed after '<?xml'\n");
4998 ctxt->wellFormed = 0;
4999 ctxt->disableSAX = 1;
5000 }
5001 SKIP_BLANKS;
5002
5003 /*
5004 * We may have the VersionInfo here.
5005 */
5006 version = xmlParseVersionInfo(ctxt);
5007 if (version == NULL)
5008 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005009 else {
5010 if (!IS_BLANK(CUR)) {
5011 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5013 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5014 ctxt->wellFormed = 0;
5015 ctxt->disableSAX = 1;
5016 }
5017 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005018 ctxt->input->version = version;
5019
5020 /*
5021 * We must have the encoding declaration
5022 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005023 xmlParseEncodingDecl(ctxt);
5024 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5025 /*
5026 * The XML REC instructs us to stop parsing right here
5027 */
5028 return;
5029 }
5030
5031 SKIP_BLANKS;
5032 if ((RAW == '?') && (NXT(1) == '>')) {
5033 SKIP(2);
5034 } else if (RAW == '>') {
5035 /* Deprecated old WD ... */
5036 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5038 ctxt->sax->error(ctxt->userData,
5039 "XML declaration must end-up with '?>'\n");
5040 ctxt->wellFormed = 0;
5041 ctxt->disableSAX = 1;
5042 NEXT;
5043 } else {
5044 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5046 ctxt->sax->error(ctxt->userData,
5047 "parsing XML declaration: '?>' expected\n");
5048 ctxt->wellFormed = 0;
5049 ctxt->disableSAX = 1;
5050 MOVETO_ENDTAG(CUR_PTR);
5051 NEXT;
5052 }
5053}
5054
5055/**
Owen Taylor3473f882001-02-23 17:55:21 +00005056 * xmlParseExternalSubset:
5057 * @ctxt: an XML parser context
5058 * @ExternalID: the external identifier
5059 * @SystemID: the system identifier (or URL)
5060 *
5061 * parse Markup declarations from an external subset
5062 *
5063 * [30] extSubset ::= textDecl? extSubsetDecl
5064 *
5065 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5066 */
5067void
5068xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5069 const xmlChar *SystemID) {
5070 GROW;
5071 if ((RAW == '<') && (NXT(1) == '?') &&
5072 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5073 (NXT(4) == 'l')) {
5074 xmlParseTextDecl(ctxt);
5075 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5076 /*
5077 * The XML REC instructs us to stop parsing right here
5078 */
5079 ctxt->instate = XML_PARSER_EOF;
5080 return;
5081 }
5082 }
5083 if (ctxt->myDoc == NULL) {
5084 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5085 }
5086 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5087 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5088
5089 ctxt->instate = XML_PARSER_DTD;
5090 ctxt->external = 1;
5091 while (((RAW == '<') && (NXT(1) == '?')) ||
5092 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005093 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005094 const xmlChar *check = CUR_PTR;
5095 int cons = ctxt->input->consumed;
5096 int tok = ctxt->token;
5097
5098 GROW;
5099 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5100 xmlParseConditionalSections(ctxt);
5101 } else if (IS_BLANK(CUR)) {
5102 NEXT;
5103 } else if (RAW == '%') {
5104 xmlParsePEReference(ctxt);
5105 } else
5106 xmlParseMarkupDecl(ctxt);
5107
5108 /*
5109 * Pop-up of finished entities.
5110 */
5111 while ((RAW == 0) && (ctxt->inputNr > 1))
5112 xmlPopInput(ctxt);
5113
5114 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5115 (tok == ctxt->token)) {
5116 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5118 ctxt->sax->error(ctxt->userData,
5119 "Content error in the external subset\n");
5120 ctxt->wellFormed = 0;
5121 ctxt->disableSAX = 1;
5122 break;
5123 }
5124 }
5125
5126 if (RAW != 0) {
5127 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5129 ctxt->sax->error(ctxt->userData,
5130 "Extra content at the end of the document\n");
5131 ctxt->wellFormed = 0;
5132 ctxt->disableSAX = 1;
5133 }
5134
5135}
5136
5137/**
5138 * xmlParseReference:
5139 * @ctxt: an XML parser context
5140 *
5141 * parse and handle entity references in content, depending on the SAX
5142 * interface, this may end-up in a call to character() if this is a
5143 * CharRef, a predefined entity, if there is no reference() callback.
5144 * or if the parser was asked to switch to that mode.
5145 *
5146 * [67] Reference ::= EntityRef | CharRef
5147 */
5148void
5149xmlParseReference(xmlParserCtxtPtr ctxt) {
5150 xmlEntityPtr ent;
5151 xmlChar *val;
5152 if (RAW != '&') return;
5153
5154 if (NXT(1) == '#') {
5155 int i = 0;
5156 xmlChar out[10];
5157 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005158 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005159
5160 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5161 /*
5162 * So we are using non-UTF-8 buffers
5163 * Check that the char fit on 8bits, if not
5164 * generate a CharRef.
5165 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005166 if (value <= 0xFF) {
5167 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005168 out[1] = 0;
5169 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5170 (!ctxt->disableSAX))
5171 ctxt->sax->characters(ctxt->userData, out, 1);
5172 } else {
5173 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005174 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005175 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005176 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005177 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5178 (!ctxt->disableSAX))
5179 ctxt->sax->reference(ctxt->userData, out);
5180 }
5181 } else {
5182 /*
5183 * Just encode the value in UTF-8
5184 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005185 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005186 out[i] = 0;
5187 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5188 (!ctxt->disableSAX))
5189 ctxt->sax->characters(ctxt->userData, out, i);
5190 }
5191 } else {
5192 ent = xmlParseEntityRef(ctxt);
5193 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005194 if (!ctxt->wellFormed)
5195 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005196 if ((ent->name != NULL) &&
5197 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5198 xmlNodePtr list = NULL;
5199 int ret;
5200
5201
5202 /*
5203 * The first reference to the entity trigger a parsing phase
5204 * where the ent->children is filled with the result from
5205 * the parsing.
5206 */
5207 if (ent->children == NULL) {
5208 xmlChar *value;
5209 value = ent->content;
5210
5211 /*
5212 * Check that this entity is well formed
5213 */
5214 if ((value != NULL) &&
5215 (value[1] == 0) && (value[0] == '<') &&
5216 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5217 /*
5218 * DONE: get definite answer on this !!!
5219 * Lots of entity decls are used to declare a single
5220 * char
5221 * <!ENTITY lt "<">
5222 * Which seems to be valid since
5223 * 2.4: The ampersand character (&) and the left angle
5224 * bracket (<) may appear in their literal form only
5225 * when used ... They are also legal within the literal
5226 * entity value of an internal entity declaration;i
5227 * see "4.3.2 Well-Formed Parsed Entities".
5228 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5229 * Looking at the OASIS test suite and James Clark
5230 * tests, this is broken. However the XML REC uses
5231 * it. Is the XML REC not well-formed ????
5232 * This is a hack to avoid this problem
5233 *
5234 * ANSWER: since lt gt amp .. are already defined,
5235 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005236 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005237 * is lousy but acceptable.
5238 */
5239 list = xmlNewDocText(ctxt->myDoc, value);
5240 if (list != NULL) {
5241 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5242 (ent->children == NULL)) {
5243 ent->children = list;
5244 ent->last = list;
5245 list->parent = (xmlNodePtr) ent;
5246 } else {
5247 xmlFreeNodeList(list);
5248 }
5249 } else if (list != NULL) {
5250 xmlFreeNodeList(list);
5251 }
5252 } else {
5253 /*
5254 * 4.3.2: An internal general parsed entity is well-formed
5255 * if its replacement text matches the production labeled
5256 * content.
5257 */
5258 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5259 ctxt->depth++;
5260 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5261 ctxt->sax, NULL, ctxt->depth,
5262 value, &list);
5263 ctxt->depth--;
5264 } else if (ent->etype ==
5265 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5266 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005267 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005268 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005269 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005270 ctxt->depth--;
5271 } else {
5272 ret = -1;
5273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5274 ctxt->sax->error(ctxt->userData,
5275 "Internal: invalid entity type\n");
5276 }
5277 if (ret == XML_ERR_ENTITY_LOOP) {
5278 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5280 ctxt->sax->error(ctxt->userData,
5281 "Detected entity reference loop\n");
5282 ctxt->wellFormed = 0;
5283 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005284 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005285 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005286 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5287 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005288 (ent->children == NULL)) {
5289 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005290 if (ctxt->replaceEntities) {
5291 /*
5292 * Prune it directly in the generated document
5293 * except for single text nodes.
5294 */
5295 if ((list->type == XML_TEXT_NODE) &&
5296 (list->next == NULL)) {
5297 list->parent = (xmlNodePtr) ent;
5298 list = NULL;
5299 } else {
5300 while (list != NULL) {
5301 list->parent = (xmlNodePtr) ctxt->node;
5302 if (list->next == NULL)
5303 ent->last = list;
5304 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005305 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005306 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005307 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5308 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005309 }
5310 } else {
5311 while (list != NULL) {
5312 list->parent = (xmlNodePtr) ent;
5313 if (list->next == NULL)
5314 ent->last = list;
5315 list = list->next;
5316 }
Owen Taylor3473f882001-02-23 17:55:21 +00005317 }
5318 } else {
5319 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005320 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005321 }
5322 } else if (ret > 0) {
5323 ctxt->errNo = ret;
5324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5325 ctxt->sax->error(ctxt->userData,
5326 "Entity value required\n");
5327 ctxt->wellFormed = 0;
5328 ctxt->disableSAX = 1;
5329 } else if (list != NULL) {
5330 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005331 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005332 }
5333 }
5334 }
5335 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5336 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5337 /*
5338 * Create a node.
5339 */
5340 ctxt->sax->reference(ctxt->userData, ent->name);
5341 return;
5342 } else if (ctxt->replaceEntities) {
5343 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5344 /*
5345 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005346 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005347 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005348 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005349 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005350 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005351 cur = ent->children;
5352 while (cur != NULL) {
5353 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005354 if (firstChild == NULL){
5355 firstChild = new;
5356 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005357 xmlAddChild(ctxt->node, new);
5358 if (cur == ent->last)
5359 break;
5360 cur = cur->next;
5361 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005362 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5363 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005364 } else {
5365 /*
5366 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005367 * node with a possible previous text one which
5368 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005369 */
5370 if (ent->children->type == XML_TEXT_NODE)
5371 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5372 if ((ent->last != ent->children) &&
5373 (ent->last->type == XML_TEXT_NODE))
5374 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5375 xmlAddChildList(ctxt->node, ent->children);
5376 }
5377
Owen Taylor3473f882001-02-23 17:55:21 +00005378 /*
5379 * This is to avoid a nasty side effect, see
5380 * characters() in SAX.c
5381 */
5382 ctxt->nodemem = 0;
5383 ctxt->nodelen = 0;
5384 return;
5385 } else {
5386 /*
5387 * Probably running in SAX mode
5388 */
5389 xmlParserInputPtr input;
5390
5391 input = xmlNewEntityInputStream(ctxt, ent);
5392 xmlPushInput(ctxt, input);
5393 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5394 (RAW == '<') && (NXT(1) == '?') &&
5395 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5396 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5397 xmlParseTextDecl(ctxt);
5398 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5399 /*
5400 * The XML REC instructs us to stop parsing right here
5401 */
5402 ctxt->instate = XML_PARSER_EOF;
5403 return;
5404 }
5405 if (input->standalone == 1) {
5406 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5408 ctxt->sax->error(ctxt->userData,
5409 "external parsed entities cannot be standalone\n");
5410 ctxt->wellFormed = 0;
5411 ctxt->disableSAX = 1;
5412 }
5413 }
5414 return;
5415 }
5416 }
5417 } else {
5418 val = ent->content;
5419 if (val == NULL) return;
5420 /*
5421 * inline the entity.
5422 */
5423 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5424 (!ctxt->disableSAX))
5425 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5426 }
5427 }
5428}
5429
5430/**
5431 * xmlParseEntityRef:
5432 * @ctxt: an XML parser context
5433 *
5434 * parse ENTITY references declarations
5435 *
5436 * [68] EntityRef ::= '&' Name ';'
5437 *
5438 * [ WFC: Entity Declared ]
5439 * In a document without any DTD, a document with only an internal DTD
5440 * subset which contains no parameter entity references, or a document
5441 * with "standalone='yes'", the Name given in the entity reference
5442 * must match that in an entity declaration, except that well-formed
5443 * documents need not declare any of the following entities: amp, lt,
5444 * gt, apos, quot. The declaration of a parameter entity must precede
5445 * any reference to it. Similarly, the declaration of a general entity
5446 * must precede any reference to it which appears in a default value in an
5447 * attribute-list declaration. Note that if entities are declared in the
5448 * external subset or in external parameter entities, a non-validating
5449 * processor is not obligated to read and process their declarations;
5450 * for such documents, the rule that an entity must be declared is a
5451 * well-formedness constraint only if standalone='yes'.
5452 *
5453 * [ WFC: Parsed Entity ]
5454 * An entity reference must not contain the name of an unparsed entity
5455 *
5456 * Returns the xmlEntityPtr if found, or NULL otherwise.
5457 */
5458xmlEntityPtr
5459xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5460 xmlChar *name;
5461 xmlEntityPtr ent = NULL;
5462
5463 GROW;
5464
5465 if (RAW == '&') {
5466 NEXT;
5467 name = xmlParseName(ctxt);
5468 if (name == NULL) {
5469 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5471 ctxt->sax->error(ctxt->userData,
5472 "xmlParseEntityRef: no name\n");
5473 ctxt->wellFormed = 0;
5474 ctxt->disableSAX = 1;
5475 } else {
5476 if (RAW == ';') {
5477 NEXT;
5478 /*
5479 * Ask first SAX for entity resolution, otherwise try the
5480 * predefined set.
5481 */
5482 if (ctxt->sax != NULL) {
5483 if (ctxt->sax->getEntity != NULL)
5484 ent = ctxt->sax->getEntity(ctxt->userData, name);
5485 if (ent == NULL)
5486 ent = xmlGetPredefinedEntity(name);
5487 }
5488 /*
5489 * [ WFC: Entity Declared ]
5490 * In a document without any DTD, a document with only an
5491 * internal DTD subset which contains no parameter entity
5492 * references, or a document with "standalone='yes'", the
5493 * Name given in the entity reference must match that in an
5494 * entity declaration, except that well-formed documents
5495 * need not declare any of the following entities: amp, lt,
5496 * gt, apos, quot.
5497 * The declaration of a parameter entity must precede any
5498 * reference to it.
5499 * Similarly, the declaration of a general entity must
5500 * precede any reference to it which appears in a default
5501 * value in an attribute-list declaration. Note that if
5502 * entities are declared in the external subset or in
5503 * external parameter entities, a non-validating processor
5504 * is not obligated to read and process their declarations;
5505 * for such documents, the rule that an entity must be
5506 * declared is a well-formedness constraint only if
5507 * standalone='yes'.
5508 */
5509 if (ent == NULL) {
5510 if ((ctxt->standalone == 1) ||
5511 ((ctxt->hasExternalSubset == 0) &&
5512 (ctxt->hasPErefs == 0))) {
5513 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5515 ctxt->sax->error(ctxt->userData,
5516 "Entity '%s' not defined\n", name);
5517 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005518 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005519 ctxt->disableSAX = 1;
5520 } else {
5521 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005523 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005524 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005525 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005526 }
5527 }
5528
5529 /*
5530 * [ WFC: Parsed Entity ]
5531 * An entity reference must not contain the name of an
5532 * unparsed entity
5533 */
5534 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5535 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5536 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5537 ctxt->sax->error(ctxt->userData,
5538 "Entity reference to unparsed entity %s\n", name);
5539 ctxt->wellFormed = 0;
5540 ctxt->disableSAX = 1;
5541 }
5542
5543 /*
5544 * [ WFC: No External Entity References ]
5545 * Attribute values cannot contain direct or indirect
5546 * entity references to external entities.
5547 */
5548 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5549 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5550 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5552 ctxt->sax->error(ctxt->userData,
5553 "Attribute references external entity '%s'\n", name);
5554 ctxt->wellFormed = 0;
5555 ctxt->disableSAX = 1;
5556 }
5557 /*
5558 * [ WFC: No < in Attribute Values ]
5559 * The replacement text of any entity referred to directly or
5560 * indirectly in an attribute value (other than "&lt;") must
5561 * not contain a <.
5562 */
5563 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5564 (ent != NULL) &&
5565 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5566 (ent->content != NULL) &&
5567 (xmlStrchr(ent->content, '<'))) {
5568 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5570 ctxt->sax->error(ctxt->userData,
5571 "'<' in entity '%s' is not allowed in attributes values\n", name);
5572 ctxt->wellFormed = 0;
5573 ctxt->disableSAX = 1;
5574 }
5575
5576 /*
5577 * Internal check, no parameter entities here ...
5578 */
5579 else {
5580 switch (ent->etype) {
5581 case XML_INTERNAL_PARAMETER_ENTITY:
5582 case XML_EXTERNAL_PARAMETER_ENTITY:
5583 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5585 ctxt->sax->error(ctxt->userData,
5586 "Attempt to reference the parameter entity '%s'\n", name);
5587 ctxt->wellFormed = 0;
5588 ctxt->disableSAX = 1;
5589 break;
5590 default:
5591 break;
5592 }
5593 }
5594
5595 /*
5596 * [ WFC: No Recursion ]
5597 * A parsed entity must not contain a recursive reference
5598 * to itself, either directly or indirectly.
5599 * Done somewhere else
5600 */
5601
5602 } else {
5603 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5605 ctxt->sax->error(ctxt->userData,
5606 "xmlParseEntityRef: expecting ';'\n");
5607 ctxt->wellFormed = 0;
5608 ctxt->disableSAX = 1;
5609 }
5610 xmlFree(name);
5611 }
5612 }
5613 return(ent);
5614}
5615
5616/**
5617 * xmlParseStringEntityRef:
5618 * @ctxt: an XML parser context
5619 * @str: a pointer to an index in the string
5620 *
5621 * parse ENTITY references declarations, but this version parses it from
5622 * a string value.
5623 *
5624 * [68] EntityRef ::= '&' Name ';'
5625 *
5626 * [ WFC: Entity Declared ]
5627 * In a document without any DTD, a document with only an internal DTD
5628 * subset which contains no parameter entity references, or a document
5629 * with "standalone='yes'", the Name given in the entity reference
5630 * must match that in an entity declaration, except that well-formed
5631 * documents need not declare any of the following entities: amp, lt,
5632 * gt, apos, quot. The declaration of a parameter entity must precede
5633 * any reference to it. Similarly, the declaration of a general entity
5634 * must precede any reference to it which appears in a default value in an
5635 * attribute-list declaration. Note that if entities are declared in the
5636 * external subset or in external parameter entities, a non-validating
5637 * processor is not obligated to read and process their declarations;
5638 * for such documents, the rule that an entity must be declared is a
5639 * well-formedness constraint only if standalone='yes'.
5640 *
5641 * [ WFC: Parsed Entity ]
5642 * An entity reference must not contain the name of an unparsed entity
5643 *
5644 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5645 * is updated to the current location in the string.
5646 */
5647xmlEntityPtr
5648xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5649 xmlChar *name;
5650 const xmlChar *ptr;
5651 xmlChar cur;
5652 xmlEntityPtr ent = NULL;
5653
5654 if ((str == NULL) || (*str == NULL))
5655 return(NULL);
5656 ptr = *str;
5657 cur = *ptr;
5658 if (cur == '&') {
5659 ptr++;
5660 cur = *ptr;
5661 name = xmlParseStringName(ctxt, &ptr);
5662 if (name == NULL) {
5663 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5665 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005666 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005667 ctxt->wellFormed = 0;
5668 ctxt->disableSAX = 1;
5669 } else {
5670 if (*ptr == ';') {
5671 ptr++;
5672 /*
5673 * Ask first SAX for entity resolution, otherwise try the
5674 * predefined set.
5675 */
5676 if (ctxt->sax != NULL) {
5677 if (ctxt->sax->getEntity != NULL)
5678 ent = ctxt->sax->getEntity(ctxt->userData, name);
5679 if (ent == NULL)
5680 ent = xmlGetPredefinedEntity(name);
5681 }
5682 /*
5683 * [ WFC: Entity Declared ]
5684 * In a document without any DTD, a document with only an
5685 * internal DTD subset which contains no parameter entity
5686 * references, or a document with "standalone='yes'", the
5687 * Name given in the entity reference must match that in an
5688 * entity declaration, except that well-formed documents
5689 * need not declare any of the following entities: amp, lt,
5690 * gt, apos, quot.
5691 * The declaration of a parameter entity must precede any
5692 * reference to it.
5693 * Similarly, the declaration of a general entity must
5694 * precede any reference to it which appears in a default
5695 * value in an attribute-list declaration. Note that if
5696 * entities are declared in the external subset or in
5697 * external parameter entities, a non-validating processor
5698 * is not obligated to read and process their declarations;
5699 * for such documents, the rule that an entity must be
5700 * declared is a well-formedness constraint only if
5701 * standalone='yes'.
5702 */
5703 if (ent == NULL) {
5704 if ((ctxt->standalone == 1) ||
5705 ((ctxt->hasExternalSubset == 0) &&
5706 (ctxt->hasPErefs == 0))) {
5707 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5709 ctxt->sax->error(ctxt->userData,
5710 "Entity '%s' not defined\n", name);
5711 ctxt->wellFormed = 0;
5712 ctxt->disableSAX = 1;
5713 } else {
5714 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5715 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5716 ctxt->sax->warning(ctxt->userData,
5717 "Entity '%s' not defined\n", name);
5718 }
5719 }
5720
5721 /*
5722 * [ WFC: Parsed Entity ]
5723 * An entity reference must not contain the name of an
5724 * unparsed entity
5725 */
5726 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5727 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5729 ctxt->sax->error(ctxt->userData,
5730 "Entity reference to unparsed entity %s\n", name);
5731 ctxt->wellFormed = 0;
5732 ctxt->disableSAX = 1;
5733 }
5734
5735 /*
5736 * [ WFC: No External Entity References ]
5737 * Attribute values cannot contain direct or indirect
5738 * entity references to external entities.
5739 */
5740 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5741 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5742 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5744 ctxt->sax->error(ctxt->userData,
5745 "Attribute references external entity '%s'\n", name);
5746 ctxt->wellFormed = 0;
5747 ctxt->disableSAX = 1;
5748 }
5749 /*
5750 * [ WFC: No < in Attribute Values ]
5751 * The replacement text of any entity referred to directly or
5752 * indirectly in an attribute value (other than "&lt;") must
5753 * not contain a <.
5754 */
5755 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5756 (ent != NULL) &&
5757 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5758 (ent->content != NULL) &&
5759 (xmlStrchr(ent->content, '<'))) {
5760 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5762 ctxt->sax->error(ctxt->userData,
5763 "'<' in entity '%s' is not allowed in attributes values\n", name);
5764 ctxt->wellFormed = 0;
5765 ctxt->disableSAX = 1;
5766 }
5767
5768 /*
5769 * Internal check, no parameter entities here ...
5770 */
5771 else {
5772 switch (ent->etype) {
5773 case XML_INTERNAL_PARAMETER_ENTITY:
5774 case XML_EXTERNAL_PARAMETER_ENTITY:
5775 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5777 ctxt->sax->error(ctxt->userData,
5778 "Attempt to reference the parameter entity '%s'\n", name);
5779 ctxt->wellFormed = 0;
5780 ctxt->disableSAX = 1;
5781 break;
5782 default:
5783 break;
5784 }
5785 }
5786
5787 /*
5788 * [ WFC: No Recursion ]
5789 * A parsed entity must not contain a recursive reference
5790 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005791 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005792 */
5793
5794 } else {
5795 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5797 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005798 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005799 ctxt->wellFormed = 0;
5800 ctxt->disableSAX = 1;
5801 }
5802 xmlFree(name);
5803 }
5804 }
5805 *str = ptr;
5806 return(ent);
5807}
5808
5809/**
5810 * xmlParsePEReference:
5811 * @ctxt: an XML parser context
5812 *
5813 * parse PEReference declarations
5814 * The entity content is handled directly by pushing it's content as
5815 * a new input stream.
5816 *
5817 * [69] PEReference ::= '%' Name ';'
5818 *
5819 * [ WFC: No Recursion ]
5820 * A parsed entity must not contain a recursive
5821 * reference to itself, either directly or indirectly.
5822 *
5823 * [ WFC: Entity Declared ]
5824 * In a document without any DTD, a document with only an internal DTD
5825 * subset which contains no parameter entity references, or a document
5826 * with "standalone='yes'", ... ... The declaration of a parameter
5827 * entity must precede any reference to it...
5828 *
5829 * [ VC: Entity Declared ]
5830 * In a document with an external subset or external parameter entities
5831 * with "standalone='no'", ... ... The declaration of a parameter entity
5832 * must precede any reference to it...
5833 *
5834 * [ WFC: In DTD ]
5835 * Parameter-entity references may only appear in the DTD.
5836 * NOTE: misleading but this is handled.
5837 */
5838void
5839xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5840 xmlChar *name;
5841 xmlEntityPtr entity = NULL;
5842 xmlParserInputPtr input;
5843
5844 if (RAW == '%') {
5845 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005846 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005847 if (name == NULL) {
5848 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5849 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5850 ctxt->sax->error(ctxt->userData,
5851 "xmlParsePEReference: no name\n");
5852 ctxt->wellFormed = 0;
5853 ctxt->disableSAX = 1;
5854 } else {
5855 if (RAW == ';') {
5856 NEXT;
5857 if ((ctxt->sax != NULL) &&
5858 (ctxt->sax->getParameterEntity != NULL))
5859 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5860 name);
5861 if (entity == NULL) {
5862 /*
5863 * [ WFC: Entity Declared ]
5864 * In a document without any DTD, a document with only an
5865 * internal DTD subset which contains no parameter entity
5866 * references, or a document with "standalone='yes'", ...
5867 * ... The declaration of a parameter entity must precede
5868 * any reference to it...
5869 */
5870 if ((ctxt->standalone == 1) ||
5871 ((ctxt->hasExternalSubset == 0) &&
5872 (ctxt->hasPErefs == 0))) {
5873 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5874 if ((!ctxt->disableSAX) &&
5875 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5876 ctxt->sax->error(ctxt->userData,
5877 "PEReference: %%%s; not found\n", name);
5878 ctxt->wellFormed = 0;
5879 ctxt->disableSAX = 1;
5880 } else {
5881 /*
5882 * [ VC: Entity Declared ]
5883 * In a document with an external subset or external
5884 * parameter entities with "standalone='no'", ...
5885 * ... The declaration of a parameter entity must precede
5886 * any reference to it...
5887 */
5888 if ((!ctxt->disableSAX) &&
5889 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5890 ctxt->sax->warning(ctxt->userData,
5891 "PEReference: %%%s; not found\n", name);
5892 ctxt->valid = 0;
5893 }
5894 } else {
5895 /*
5896 * Internal checking in case the entity quest barfed
5897 */
5898 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5899 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5900 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5901 ctxt->sax->warning(ctxt->userData,
5902 "Internal: %%%s; is not a parameter entity\n", name);
5903 } else {
5904 /*
5905 * TODO !!!
5906 * handle the extra spaces added before and after
5907 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5908 */
5909 input = xmlNewEntityInputStream(ctxt, entity);
5910 xmlPushInput(ctxt, input);
5911 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5912 (RAW == '<') && (NXT(1) == '?') &&
5913 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5914 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5915 xmlParseTextDecl(ctxt);
5916 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5917 /*
5918 * The XML REC instructs us to stop parsing
5919 * right here
5920 */
5921 ctxt->instate = XML_PARSER_EOF;
5922 xmlFree(name);
5923 return;
5924 }
5925 }
5926 if (ctxt->token == 0)
5927 ctxt->token = ' ';
5928 }
5929 }
5930 ctxt->hasPErefs = 1;
5931 } else {
5932 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5934 ctxt->sax->error(ctxt->userData,
5935 "xmlParsePEReference: expecting ';'\n");
5936 ctxt->wellFormed = 0;
5937 ctxt->disableSAX = 1;
5938 }
5939 xmlFree(name);
5940 }
5941 }
5942}
5943
5944/**
5945 * xmlParseStringPEReference:
5946 * @ctxt: an XML parser context
5947 * @str: a pointer to an index in the string
5948 *
5949 * parse PEReference declarations
5950 *
5951 * [69] PEReference ::= '%' Name ';'
5952 *
5953 * [ WFC: No Recursion ]
5954 * A parsed entity must not contain a recursive
5955 * reference to itself, either directly or indirectly.
5956 *
5957 * [ WFC: Entity Declared ]
5958 * In a document without any DTD, a document with only an internal DTD
5959 * subset which contains no parameter entity references, or a document
5960 * with "standalone='yes'", ... ... The declaration of a parameter
5961 * entity must precede any reference to it...
5962 *
5963 * [ VC: Entity Declared ]
5964 * In a document with an external subset or external parameter entities
5965 * with "standalone='no'", ... ... The declaration of a parameter entity
5966 * must precede any reference to it...
5967 *
5968 * [ WFC: In DTD ]
5969 * Parameter-entity references may only appear in the DTD.
5970 * NOTE: misleading but this is handled.
5971 *
5972 * Returns the string of the entity content.
5973 * str is updated to the current value of the index
5974 */
5975xmlEntityPtr
5976xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5977 const xmlChar *ptr;
5978 xmlChar cur;
5979 xmlChar *name;
5980 xmlEntityPtr entity = NULL;
5981
5982 if ((str == NULL) || (*str == NULL)) return(NULL);
5983 ptr = *str;
5984 cur = *ptr;
5985 if (cur == '%') {
5986 ptr++;
5987 cur = *ptr;
5988 name = xmlParseStringName(ctxt, &ptr);
5989 if (name == NULL) {
5990 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5992 ctxt->sax->error(ctxt->userData,
5993 "xmlParseStringPEReference: no name\n");
5994 ctxt->wellFormed = 0;
5995 ctxt->disableSAX = 1;
5996 } else {
5997 cur = *ptr;
5998 if (cur == ';') {
5999 ptr++;
6000 cur = *ptr;
6001 if ((ctxt->sax != NULL) &&
6002 (ctxt->sax->getParameterEntity != NULL))
6003 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6004 name);
6005 if (entity == NULL) {
6006 /*
6007 * [ WFC: Entity Declared ]
6008 * In a document without any DTD, a document with only an
6009 * internal DTD subset which contains no parameter entity
6010 * references, or a document with "standalone='yes'", ...
6011 * ... The declaration of a parameter entity must precede
6012 * any reference to it...
6013 */
6014 if ((ctxt->standalone == 1) ||
6015 ((ctxt->hasExternalSubset == 0) &&
6016 (ctxt->hasPErefs == 0))) {
6017 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6019 ctxt->sax->error(ctxt->userData,
6020 "PEReference: %%%s; not found\n", name);
6021 ctxt->wellFormed = 0;
6022 ctxt->disableSAX = 1;
6023 } else {
6024 /*
6025 * [ VC: Entity Declared ]
6026 * In a document with an external subset or external
6027 * parameter entities with "standalone='no'", ...
6028 * ... The declaration of a parameter entity must
6029 * precede any reference to it...
6030 */
6031 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6032 ctxt->sax->warning(ctxt->userData,
6033 "PEReference: %%%s; not found\n", name);
6034 ctxt->valid = 0;
6035 }
6036 } else {
6037 /*
6038 * Internal checking in case the entity quest barfed
6039 */
6040 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6041 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6042 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6043 ctxt->sax->warning(ctxt->userData,
6044 "Internal: %%%s; is not a parameter entity\n", name);
6045 }
6046 }
6047 ctxt->hasPErefs = 1;
6048 } else {
6049 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6051 ctxt->sax->error(ctxt->userData,
6052 "xmlParseStringPEReference: expecting ';'\n");
6053 ctxt->wellFormed = 0;
6054 ctxt->disableSAX = 1;
6055 }
6056 xmlFree(name);
6057 }
6058 }
6059 *str = ptr;
6060 return(entity);
6061}
6062
6063/**
6064 * xmlParseDocTypeDecl:
6065 * @ctxt: an XML parser context
6066 *
6067 * parse a DOCTYPE declaration
6068 *
6069 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6070 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6071 *
6072 * [ VC: Root Element Type ]
6073 * The Name in the document type declaration must match the element
6074 * type of the root element.
6075 */
6076
6077void
6078xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6079 xmlChar *name = NULL;
6080 xmlChar *ExternalID = NULL;
6081 xmlChar *URI = NULL;
6082
6083 /*
6084 * We know that '<!DOCTYPE' has been detected.
6085 */
6086 SKIP(9);
6087
6088 SKIP_BLANKS;
6089
6090 /*
6091 * Parse the DOCTYPE name.
6092 */
6093 name = xmlParseName(ctxt);
6094 if (name == NULL) {
6095 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6097 ctxt->sax->error(ctxt->userData,
6098 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6099 ctxt->wellFormed = 0;
6100 ctxt->disableSAX = 1;
6101 }
6102 ctxt->intSubName = name;
6103
6104 SKIP_BLANKS;
6105
6106 /*
6107 * Check for SystemID and ExternalID
6108 */
6109 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6110
6111 if ((URI != NULL) || (ExternalID != NULL)) {
6112 ctxt->hasExternalSubset = 1;
6113 }
6114 ctxt->extSubURI = URI;
6115 ctxt->extSubSystem = ExternalID;
6116
6117 SKIP_BLANKS;
6118
6119 /*
6120 * Create and update the internal subset.
6121 */
6122 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6123 (!ctxt->disableSAX))
6124 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6125
6126 /*
6127 * Is there any internal subset declarations ?
6128 * they are handled separately in xmlParseInternalSubset()
6129 */
6130 if (RAW == '[')
6131 return;
6132
6133 /*
6134 * We should be at the end of the DOCTYPE declaration.
6135 */
6136 if (RAW != '>') {
6137 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006139 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006140 ctxt->wellFormed = 0;
6141 ctxt->disableSAX = 1;
6142 }
6143 NEXT;
6144}
6145
6146/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006147 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006148 * @ctxt: an XML parser context
6149 *
6150 * parse the internal subset declaration
6151 *
6152 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6153 */
6154
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006155static void
Owen Taylor3473f882001-02-23 17:55:21 +00006156xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6157 /*
6158 * Is there any DTD definition ?
6159 */
6160 if (RAW == '[') {
6161 ctxt->instate = XML_PARSER_DTD;
6162 NEXT;
6163 /*
6164 * Parse the succession of Markup declarations and
6165 * PEReferences.
6166 * Subsequence (markupdecl | PEReference | S)*
6167 */
6168 while (RAW != ']') {
6169 const xmlChar *check = CUR_PTR;
6170 int cons = ctxt->input->consumed;
6171
6172 SKIP_BLANKS;
6173 xmlParseMarkupDecl(ctxt);
6174 xmlParsePEReference(ctxt);
6175
6176 /*
6177 * Pop-up of finished entities.
6178 */
6179 while ((RAW == 0) && (ctxt->inputNr > 1))
6180 xmlPopInput(ctxt);
6181
6182 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6183 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6185 ctxt->sax->error(ctxt->userData,
6186 "xmlParseInternalSubset: error detected in Markup declaration\n");
6187 ctxt->wellFormed = 0;
6188 ctxt->disableSAX = 1;
6189 break;
6190 }
6191 }
6192 if (RAW == ']') {
6193 NEXT;
6194 SKIP_BLANKS;
6195 }
6196 }
6197
6198 /*
6199 * We should be at the end of the DOCTYPE declaration.
6200 */
6201 if (RAW != '>') {
6202 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006204 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006205 ctxt->wellFormed = 0;
6206 ctxt->disableSAX = 1;
6207 }
6208 NEXT;
6209}
6210
6211/**
6212 * xmlParseAttribute:
6213 * @ctxt: an XML parser context
6214 * @value: a xmlChar ** used to store the value of the attribute
6215 *
6216 * parse an attribute
6217 *
6218 * [41] Attribute ::= Name Eq AttValue
6219 *
6220 * [ WFC: No External Entity References ]
6221 * Attribute values cannot contain direct or indirect entity references
6222 * to external entities.
6223 *
6224 * [ WFC: No < in Attribute Values ]
6225 * The replacement text of any entity referred to directly or indirectly in
6226 * an attribute value (other than "&lt;") must not contain a <.
6227 *
6228 * [ VC: Attribute Value Type ]
6229 * The attribute must have been declared; the value must be of the type
6230 * declared for it.
6231 *
6232 * [25] Eq ::= S? '=' S?
6233 *
6234 * With namespace:
6235 *
6236 * [NS 11] Attribute ::= QName Eq AttValue
6237 *
6238 * Also the case QName == xmlns:??? is handled independently as a namespace
6239 * definition.
6240 *
6241 * Returns the attribute name, and the value in *value.
6242 */
6243
6244xmlChar *
6245xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6246 xmlChar *name, *val;
6247
6248 *value = NULL;
6249 name = xmlParseName(ctxt);
6250 if (name == NULL) {
6251 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6253 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6254 ctxt->wellFormed = 0;
6255 ctxt->disableSAX = 1;
6256 return(NULL);
6257 }
6258
6259 /*
6260 * read the value
6261 */
6262 SKIP_BLANKS;
6263 if (RAW == '=') {
6264 NEXT;
6265 SKIP_BLANKS;
6266 val = xmlParseAttValue(ctxt);
6267 ctxt->instate = XML_PARSER_CONTENT;
6268 } else {
6269 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6271 ctxt->sax->error(ctxt->userData,
6272 "Specification mandate value for attribute %s\n", name);
6273 ctxt->wellFormed = 0;
6274 ctxt->disableSAX = 1;
6275 xmlFree(name);
6276 return(NULL);
6277 }
6278
6279 /*
6280 * Check that xml:lang conforms to the specification
6281 * No more registered as an error, just generate a warning now
6282 * since this was deprecated in XML second edition
6283 */
6284 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6285 if (!xmlCheckLanguageID(val)) {
6286 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6287 ctxt->sax->warning(ctxt->userData,
6288 "Malformed value for xml:lang : %s\n", val);
6289 }
6290 }
6291
6292 /*
6293 * Check that xml:space conforms to the specification
6294 */
6295 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6296 if (xmlStrEqual(val, BAD_CAST "default"))
6297 *(ctxt->space) = 0;
6298 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6299 *(ctxt->space) = 1;
6300 else {
6301 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6303 ctxt->sax->error(ctxt->userData,
6304"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6305 val);
6306 ctxt->wellFormed = 0;
6307 ctxt->disableSAX = 1;
6308 }
6309 }
6310
6311 *value = val;
6312 return(name);
6313}
6314
6315/**
6316 * xmlParseStartTag:
6317 * @ctxt: an XML parser context
6318 *
6319 * parse a start of tag either for rule element or
6320 * EmptyElement. In both case we don't parse the tag closing chars.
6321 *
6322 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6323 *
6324 * [ WFC: Unique Att Spec ]
6325 * No attribute name may appear more than once in the same start-tag or
6326 * empty-element tag.
6327 *
6328 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6329 *
6330 * [ WFC: Unique Att Spec ]
6331 * No attribute name may appear more than once in the same start-tag or
6332 * empty-element tag.
6333 *
6334 * With namespace:
6335 *
6336 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6337 *
6338 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6339 *
6340 * Returns the element name parsed
6341 */
6342
6343xmlChar *
6344xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6345 xmlChar *name;
6346 xmlChar *attname;
6347 xmlChar *attvalue;
6348 const xmlChar **atts = NULL;
6349 int nbatts = 0;
6350 int maxatts = 0;
6351 int i;
6352
6353 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006354 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006355
6356 name = xmlParseName(ctxt);
6357 if (name == NULL) {
6358 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6360 ctxt->sax->error(ctxt->userData,
6361 "xmlParseStartTag: invalid element name\n");
6362 ctxt->wellFormed = 0;
6363 ctxt->disableSAX = 1;
6364 return(NULL);
6365 }
6366
6367 /*
6368 * Now parse the attributes, it ends up with the ending
6369 *
6370 * (S Attribute)* S?
6371 */
6372 SKIP_BLANKS;
6373 GROW;
6374
Daniel Veillard21a0f912001-02-25 19:54:14 +00006375 while ((RAW != '>') &&
6376 ((RAW != '/') || (NXT(1) != '>')) &&
6377 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006378 const xmlChar *q = CUR_PTR;
6379 int cons = ctxt->input->consumed;
6380
6381 attname = xmlParseAttribute(ctxt, &attvalue);
6382 if ((attname != NULL) && (attvalue != NULL)) {
6383 /*
6384 * [ WFC: Unique Att Spec ]
6385 * No attribute name may appear more than once in the same
6386 * start-tag or empty-element tag.
6387 */
6388 for (i = 0; i < nbatts;i += 2) {
6389 if (xmlStrEqual(atts[i], attname)) {
6390 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6392 ctxt->sax->error(ctxt->userData,
6393 "Attribute %s redefined\n",
6394 attname);
6395 ctxt->wellFormed = 0;
6396 ctxt->disableSAX = 1;
6397 xmlFree(attname);
6398 xmlFree(attvalue);
6399 goto failed;
6400 }
6401 }
6402
6403 /*
6404 * Add the pair to atts
6405 */
6406 if (atts == NULL) {
6407 maxatts = 10;
6408 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6409 if (atts == NULL) {
6410 xmlGenericError(xmlGenericErrorContext,
6411 "malloc of %ld byte failed\n",
6412 maxatts * (long)sizeof(xmlChar *));
6413 return(NULL);
6414 }
6415 } else if (nbatts + 4 > maxatts) {
6416 maxatts *= 2;
6417 atts = (const xmlChar **) xmlRealloc((void *) atts,
6418 maxatts * sizeof(xmlChar *));
6419 if (atts == NULL) {
6420 xmlGenericError(xmlGenericErrorContext,
6421 "realloc of %ld byte failed\n",
6422 maxatts * (long)sizeof(xmlChar *));
6423 return(NULL);
6424 }
6425 }
6426 atts[nbatts++] = attname;
6427 atts[nbatts++] = attvalue;
6428 atts[nbatts] = NULL;
6429 atts[nbatts + 1] = NULL;
6430 } else {
6431 if (attname != NULL)
6432 xmlFree(attname);
6433 if (attvalue != NULL)
6434 xmlFree(attvalue);
6435 }
6436
6437failed:
6438
6439 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6440 break;
6441 if (!IS_BLANK(RAW)) {
6442 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6444 ctxt->sax->error(ctxt->userData,
6445 "attributes construct error\n");
6446 ctxt->wellFormed = 0;
6447 ctxt->disableSAX = 1;
6448 }
6449 SKIP_BLANKS;
6450 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6451 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6453 ctxt->sax->error(ctxt->userData,
6454 "xmlParseStartTag: problem parsing attributes\n");
6455 ctxt->wellFormed = 0;
6456 ctxt->disableSAX = 1;
6457 break;
6458 }
6459 GROW;
6460 }
6461
6462 /*
6463 * SAX: Start of Element !
6464 */
6465 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6466 (!ctxt->disableSAX))
6467 ctxt->sax->startElement(ctxt->userData, name, atts);
6468
6469 if (atts != NULL) {
6470 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6471 xmlFree((void *) atts);
6472 }
6473 return(name);
6474}
6475
6476/**
6477 * xmlParseEndTag:
6478 * @ctxt: an XML parser context
6479 *
6480 * parse an end of tag
6481 *
6482 * [42] ETag ::= '</' Name S? '>'
6483 *
6484 * With namespace
6485 *
6486 * [NS 9] ETag ::= '</' QName S? '>'
6487 */
6488
6489void
6490xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6491 xmlChar *name;
6492 xmlChar *oldname;
6493
6494 GROW;
6495 if ((RAW != '<') || (NXT(1) != '/')) {
6496 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6498 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6499 ctxt->wellFormed = 0;
6500 ctxt->disableSAX = 1;
6501 return;
6502 }
6503 SKIP(2);
6504
6505 name = xmlParseName(ctxt);
6506
6507 /*
6508 * We should definitely be at the ending "S? '>'" part
6509 */
6510 GROW;
6511 SKIP_BLANKS;
6512 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6513 ctxt->errNo = XML_ERR_GT_REQUIRED;
6514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6515 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6516 ctxt->wellFormed = 0;
6517 ctxt->disableSAX = 1;
6518 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006519 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006520
6521 /*
6522 * [ WFC: Element Type Match ]
6523 * The Name in an element's end-tag must match the element type in the
6524 * start-tag.
6525 *
6526 */
6527 if ((name == NULL) || (ctxt->name == NULL) ||
6528 (!xmlStrEqual(name, ctxt->name))) {
6529 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6531 if ((name != NULL) && (ctxt->name != NULL)) {
6532 ctxt->sax->error(ctxt->userData,
6533 "Opening and ending tag mismatch: %s and %s\n",
6534 ctxt->name, name);
6535 } else if (ctxt->name != NULL) {
6536 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006537 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006538 } else {
6539 ctxt->sax->error(ctxt->userData,
6540 "Ending tag error: internal error ???\n");
6541 }
6542
6543 }
6544 ctxt->wellFormed = 0;
6545 ctxt->disableSAX = 1;
6546 }
6547
6548 /*
6549 * SAX: End of Tag
6550 */
6551 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6552 (!ctxt->disableSAX))
6553 ctxt->sax->endElement(ctxt->userData, name);
6554
6555 if (name != NULL)
6556 xmlFree(name);
6557 oldname = namePop(ctxt);
6558 spacePop(ctxt);
6559 if (oldname != NULL) {
6560#ifdef DEBUG_STACK
6561 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6562#endif
6563 xmlFree(oldname);
6564 }
6565 return;
6566}
6567
6568/**
6569 * xmlParseCDSect:
6570 * @ctxt: an XML parser context
6571 *
6572 * Parse escaped pure raw content.
6573 *
6574 * [18] CDSect ::= CDStart CData CDEnd
6575 *
6576 * [19] CDStart ::= '<![CDATA['
6577 *
6578 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6579 *
6580 * [21] CDEnd ::= ']]>'
6581 */
6582void
6583xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6584 xmlChar *buf = NULL;
6585 int len = 0;
6586 int size = XML_PARSER_BUFFER_SIZE;
6587 int r, rl;
6588 int s, sl;
6589 int cur, l;
6590 int count = 0;
6591
6592 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6593 (NXT(2) == '[') && (NXT(3) == 'C') &&
6594 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6595 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6596 (NXT(8) == '[')) {
6597 SKIP(9);
6598 } else
6599 return;
6600
6601 ctxt->instate = XML_PARSER_CDATA_SECTION;
6602 r = CUR_CHAR(rl);
6603 if (!IS_CHAR(r)) {
6604 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6606 ctxt->sax->error(ctxt->userData,
6607 "CData section not finished\n");
6608 ctxt->wellFormed = 0;
6609 ctxt->disableSAX = 1;
6610 ctxt->instate = XML_PARSER_CONTENT;
6611 return;
6612 }
6613 NEXTL(rl);
6614 s = CUR_CHAR(sl);
6615 if (!IS_CHAR(s)) {
6616 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6618 ctxt->sax->error(ctxt->userData,
6619 "CData section not finished\n");
6620 ctxt->wellFormed = 0;
6621 ctxt->disableSAX = 1;
6622 ctxt->instate = XML_PARSER_CONTENT;
6623 return;
6624 }
6625 NEXTL(sl);
6626 cur = CUR_CHAR(l);
6627 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6628 if (buf == NULL) {
6629 xmlGenericError(xmlGenericErrorContext,
6630 "malloc of %d byte failed\n", size);
6631 return;
6632 }
6633 while (IS_CHAR(cur) &&
6634 ((r != ']') || (s != ']') || (cur != '>'))) {
6635 if (len + 5 >= size) {
6636 size *= 2;
6637 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6638 if (buf == NULL) {
6639 xmlGenericError(xmlGenericErrorContext,
6640 "realloc of %d byte failed\n", size);
6641 return;
6642 }
6643 }
6644 COPY_BUF(rl,buf,len,r);
6645 r = s;
6646 rl = sl;
6647 s = cur;
6648 sl = l;
6649 count++;
6650 if (count > 50) {
6651 GROW;
6652 count = 0;
6653 }
6654 NEXTL(l);
6655 cur = CUR_CHAR(l);
6656 }
6657 buf[len] = 0;
6658 ctxt->instate = XML_PARSER_CONTENT;
6659 if (cur != '>') {
6660 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6662 ctxt->sax->error(ctxt->userData,
6663 "CData section not finished\n%.50s\n", buf);
6664 ctxt->wellFormed = 0;
6665 ctxt->disableSAX = 1;
6666 xmlFree(buf);
6667 return;
6668 }
6669 NEXTL(l);
6670
6671 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006672 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006673 */
6674 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6675 if (ctxt->sax->cdataBlock != NULL)
6676 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006677 else if (ctxt->sax->characters != NULL)
6678 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006679 }
6680 xmlFree(buf);
6681}
6682
6683/**
6684 * xmlParseContent:
6685 * @ctxt: an XML parser context
6686 *
6687 * Parse a content:
6688 *
6689 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6690 */
6691
6692void
6693xmlParseContent(xmlParserCtxtPtr ctxt) {
6694 GROW;
6695 while (((RAW != 0) || (ctxt->token != 0)) &&
6696 ((RAW != '<') || (NXT(1) != '/'))) {
6697 const xmlChar *test = CUR_PTR;
6698 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006699 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006700 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006701
6702 /*
6703 * Handle possible processed charrefs.
6704 */
6705 if (ctxt->token != 0) {
6706 xmlParseCharData(ctxt, 0);
6707 }
6708 /*
6709 * First case : a Processing Instruction.
6710 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006711 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006712 xmlParsePI(ctxt);
6713 }
6714
6715 /*
6716 * Second case : a CDSection
6717 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006718 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006719 (NXT(2) == '[') && (NXT(3) == 'C') &&
6720 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6721 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6722 (NXT(8) == '[')) {
6723 xmlParseCDSect(ctxt);
6724 }
6725
6726 /*
6727 * Third case : a comment
6728 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006729 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006730 (NXT(2) == '-') && (NXT(3) == '-')) {
6731 xmlParseComment(ctxt);
6732 ctxt->instate = XML_PARSER_CONTENT;
6733 }
6734
6735 /*
6736 * Fourth case : a sub-element.
6737 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006738 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006739 xmlParseElement(ctxt);
6740 }
6741
6742 /*
6743 * Fifth case : a reference. If if has not been resolved,
6744 * parsing returns it's Name, create the node
6745 */
6746
Daniel Veillard21a0f912001-02-25 19:54:14 +00006747 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006748 xmlParseReference(ctxt);
6749 }
6750
6751 /*
6752 * Last case, text. Note that References are handled directly.
6753 */
6754 else {
6755 xmlParseCharData(ctxt, 0);
6756 }
6757
6758 GROW;
6759 /*
6760 * Pop-up of finished entities.
6761 */
6762 while ((RAW == 0) && (ctxt->inputNr > 1))
6763 xmlPopInput(ctxt);
6764 SHRINK;
6765
6766 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6767 (tok == ctxt->token)) {
6768 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6770 ctxt->sax->error(ctxt->userData,
6771 "detected an error in element content\n");
6772 ctxt->wellFormed = 0;
6773 ctxt->disableSAX = 1;
6774 ctxt->instate = XML_PARSER_EOF;
6775 break;
6776 }
6777 }
6778}
6779
6780/**
6781 * xmlParseElement:
6782 * @ctxt: an XML parser context
6783 *
6784 * parse an XML element, this is highly recursive
6785 *
6786 * [39] element ::= EmptyElemTag | STag content ETag
6787 *
6788 * [ WFC: Element Type Match ]
6789 * The Name in an element's end-tag must match the element type in the
6790 * start-tag.
6791 *
6792 * [ VC: Element Valid ]
6793 * An element is valid if there is a declaration matching elementdecl
6794 * where the Name matches the element type and one of the following holds:
6795 * - The declaration matches EMPTY and the element has no content.
6796 * - The declaration matches children and the sequence of child elements
6797 * belongs to the language generated by the regular expression in the
6798 * content model, with optional white space (characters matching the
6799 * nonterminal S) between each pair of child elements.
6800 * - The declaration matches Mixed and the content consists of character
6801 * data and child elements whose types match names in the content model.
6802 * - The declaration matches ANY, and the types of any child elements have
6803 * been declared.
6804 */
6805
6806void
6807xmlParseElement(xmlParserCtxtPtr ctxt) {
6808 const xmlChar *openTag = CUR_PTR;
6809 xmlChar *name;
6810 xmlChar *oldname;
6811 xmlParserNodeInfo node_info;
6812 xmlNodePtr ret;
6813
6814 /* Capture start position */
6815 if (ctxt->record_info) {
6816 node_info.begin_pos = ctxt->input->consumed +
6817 (CUR_PTR - ctxt->input->base);
6818 node_info.begin_line = ctxt->input->line;
6819 }
6820
6821 if (ctxt->spaceNr == 0)
6822 spacePush(ctxt, -1);
6823 else
6824 spacePush(ctxt, *ctxt->space);
6825
6826 name = xmlParseStartTag(ctxt);
6827 if (name == NULL) {
6828 spacePop(ctxt);
6829 return;
6830 }
6831 namePush(ctxt, name);
6832 ret = ctxt->node;
6833
6834 /*
6835 * [ VC: Root Element Type ]
6836 * The Name in the document type declaration must match the element
6837 * type of the root element.
6838 */
6839 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6840 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6841 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6842
6843 /*
6844 * Check for an Empty Element.
6845 */
6846 if ((RAW == '/') && (NXT(1) == '>')) {
6847 SKIP(2);
6848 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6849 (!ctxt->disableSAX))
6850 ctxt->sax->endElement(ctxt->userData, name);
6851 oldname = namePop(ctxt);
6852 spacePop(ctxt);
6853 if (oldname != NULL) {
6854#ifdef DEBUG_STACK
6855 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6856#endif
6857 xmlFree(oldname);
6858 }
6859 if ( ret != NULL && ctxt->record_info ) {
6860 node_info.end_pos = ctxt->input->consumed +
6861 (CUR_PTR - ctxt->input->base);
6862 node_info.end_line = ctxt->input->line;
6863 node_info.node = ret;
6864 xmlParserAddNodeInfo(ctxt, &node_info);
6865 }
6866 return;
6867 }
6868 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006869 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006870 } else {
6871 ctxt->errNo = XML_ERR_GT_REQUIRED;
6872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6873 ctxt->sax->error(ctxt->userData,
6874 "Couldn't find end of Start Tag\n%.30s\n",
6875 openTag);
6876 ctxt->wellFormed = 0;
6877 ctxt->disableSAX = 1;
6878
6879 /*
6880 * end of parsing of this node.
6881 */
6882 nodePop(ctxt);
6883 oldname = namePop(ctxt);
6884 spacePop(ctxt);
6885 if (oldname != NULL) {
6886#ifdef DEBUG_STACK
6887 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6888#endif
6889 xmlFree(oldname);
6890 }
6891
6892 /*
6893 * Capture end position and add node
6894 */
6895 if ( ret != NULL && ctxt->record_info ) {
6896 node_info.end_pos = ctxt->input->consumed +
6897 (CUR_PTR - ctxt->input->base);
6898 node_info.end_line = ctxt->input->line;
6899 node_info.node = ret;
6900 xmlParserAddNodeInfo(ctxt, &node_info);
6901 }
6902 return;
6903 }
6904
6905 /*
6906 * Parse the content of the element:
6907 */
6908 xmlParseContent(ctxt);
6909 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00006910 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00006911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6912 ctxt->sax->error(ctxt->userData,
6913 "Premature end of data in tag %.30s\n", openTag);
6914 ctxt->wellFormed = 0;
6915 ctxt->disableSAX = 1;
6916
6917 /*
6918 * end of parsing of this node.
6919 */
6920 nodePop(ctxt);
6921 oldname = namePop(ctxt);
6922 spacePop(ctxt);
6923 if (oldname != NULL) {
6924#ifdef DEBUG_STACK
6925 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6926#endif
6927 xmlFree(oldname);
6928 }
6929 return;
6930 }
6931
6932 /*
6933 * parse the end of tag: '</' should be here.
6934 */
6935 xmlParseEndTag(ctxt);
6936
6937 /*
6938 * Capture end position and add node
6939 */
6940 if ( ret != NULL && ctxt->record_info ) {
6941 node_info.end_pos = ctxt->input->consumed +
6942 (CUR_PTR - ctxt->input->base);
6943 node_info.end_line = ctxt->input->line;
6944 node_info.node = ret;
6945 xmlParserAddNodeInfo(ctxt, &node_info);
6946 }
6947}
6948
6949/**
6950 * xmlParseVersionNum:
6951 * @ctxt: an XML parser context
6952 *
6953 * parse the XML version value.
6954 *
6955 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6956 *
6957 * Returns the string giving the XML version number, or NULL
6958 */
6959xmlChar *
6960xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6961 xmlChar *buf = NULL;
6962 int len = 0;
6963 int size = 10;
6964 xmlChar cur;
6965
6966 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6967 if (buf == NULL) {
6968 xmlGenericError(xmlGenericErrorContext,
6969 "malloc of %d byte failed\n", size);
6970 return(NULL);
6971 }
6972 cur = CUR;
6973 while (((cur >= 'a') && (cur <= 'z')) ||
6974 ((cur >= 'A') && (cur <= 'Z')) ||
6975 ((cur >= '0') && (cur <= '9')) ||
6976 (cur == '_') || (cur == '.') ||
6977 (cur == ':') || (cur == '-')) {
6978 if (len + 1 >= size) {
6979 size *= 2;
6980 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6981 if (buf == NULL) {
6982 xmlGenericError(xmlGenericErrorContext,
6983 "realloc of %d byte failed\n", size);
6984 return(NULL);
6985 }
6986 }
6987 buf[len++] = cur;
6988 NEXT;
6989 cur=CUR;
6990 }
6991 buf[len] = 0;
6992 return(buf);
6993}
6994
6995/**
6996 * xmlParseVersionInfo:
6997 * @ctxt: an XML parser context
6998 *
6999 * parse the XML version.
7000 *
7001 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7002 *
7003 * [25] Eq ::= S? '=' S?
7004 *
7005 * Returns the version string, e.g. "1.0"
7006 */
7007
7008xmlChar *
7009xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7010 xmlChar *version = NULL;
7011 const xmlChar *q;
7012
7013 if ((RAW == 'v') && (NXT(1) == 'e') &&
7014 (NXT(2) == 'r') && (NXT(3) == 's') &&
7015 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7016 (NXT(6) == 'n')) {
7017 SKIP(7);
7018 SKIP_BLANKS;
7019 if (RAW != '=') {
7020 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7022 ctxt->sax->error(ctxt->userData,
7023 "xmlParseVersionInfo : expected '='\n");
7024 ctxt->wellFormed = 0;
7025 ctxt->disableSAX = 1;
7026 return(NULL);
7027 }
7028 NEXT;
7029 SKIP_BLANKS;
7030 if (RAW == '"') {
7031 NEXT;
7032 q = CUR_PTR;
7033 version = xmlParseVersionNum(ctxt);
7034 if (RAW != '"') {
7035 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7037 ctxt->sax->error(ctxt->userData,
7038 "String not closed\n%.50s\n", q);
7039 ctxt->wellFormed = 0;
7040 ctxt->disableSAX = 1;
7041 } else
7042 NEXT;
7043 } else if (RAW == '\''){
7044 NEXT;
7045 q = CUR_PTR;
7046 version = xmlParseVersionNum(ctxt);
7047 if (RAW != '\'') {
7048 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7050 ctxt->sax->error(ctxt->userData,
7051 "String not closed\n%.50s\n", q);
7052 ctxt->wellFormed = 0;
7053 ctxt->disableSAX = 1;
7054 } else
7055 NEXT;
7056 } else {
7057 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7059 ctxt->sax->error(ctxt->userData,
7060 "xmlParseVersionInfo : expected ' or \"\n");
7061 ctxt->wellFormed = 0;
7062 ctxt->disableSAX = 1;
7063 }
7064 }
7065 return(version);
7066}
7067
7068/**
7069 * xmlParseEncName:
7070 * @ctxt: an XML parser context
7071 *
7072 * parse the XML encoding name
7073 *
7074 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7075 *
7076 * Returns the encoding name value or NULL
7077 */
7078xmlChar *
7079xmlParseEncName(xmlParserCtxtPtr ctxt) {
7080 xmlChar *buf = NULL;
7081 int len = 0;
7082 int size = 10;
7083 xmlChar cur;
7084
7085 cur = CUR;
7086 if (((cur >= 'a') && (cur <= 'z')) ||
7087 ((cur >= 'A') && (cur <= 'Z'))) {
7088 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7089 if (buf == NULL) {
7090 xmlGenericError(xmlGenericErrorContext,
7091 "malloc of %d byte failed\n", size);
7092 return(NULL);
7093 }
7094
7095 buf[len++] = cur;
7096 NEXT;
7097 cur = CUR;
7098 while (((cur >= 'a') && (cur <= 'z')) ||
7099 ((cur >= 'A') && (cur <= 'Z')) ||
7100 ((cur >= '0') && (cur <= '9')) ||
7101 (cur == '.') || (cur == '_') ||
7102 (cur == '-')) {
7103 if (len + 1 >= size) {
7104 size *= 2;
7105 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7106 if (buf == NULL) {
7107 xmlGenericError(xmlGenericErrorContext,
7108 "realloc of %d byte failed\n", size);
7109 return(NULL);
7110 }
7111 }
7112 buf[len++] = cur;
7113 NEXT;
7114 cur = CUR;
7115 if (cur == 0) {
7116 SHRINK;
7117 GROW;
7118 cur = CUR;
7119 }
7120 }
7121 buf[len] = 0;
7122 } else {
7123 ctxt->errNo = XML_ERR_ENCODING_NAME;
7124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7125 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7126 ctxt->wellFormed = 0;
7127 ctxt->disableSAX = 1;
7128 }
7129 return(buf);
7130}
7131
7132/**
7133 * xmlParseEncodingDecl:
7134 * @ctxt: an XML parser context
7135 *
7136 * parse the XML encoding declaration
7137 *
7138 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7139 *
7140 * this setups the conversion filters.
7141 *
7142 * Returns the encoding value or NULL
7143 */
7144
7145xmlChar *
7146xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7147 xmlChar *encoding = NULL;
7148 const xmlChar *q;
7149
7150 SKIP_BLANKS;
7151 if ((RAW == 'e') && (NXT(1) == 'n') &&
7152 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7153 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7154 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7155 SKIP(8);
7156 SKIP_BLANKS;
7157 if (RAW != '=') {
7158 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7159 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7160 ctxt->sax->error(ctxt->userData,
7161 "xmlParseEncodingDecl : expected '='\n");
7162 ctxt->wellFormed = 0;
7163 ctxt->disableSAX = 1;
7164 return(NULL);
7165 }
7166 NEXT;
7167 SKIP_BLANKS;
7168 if (RAW == '"') {
7169 NEXT;
7170 q = CUR_PTR;
7171 encoding = xmlParseEncName(ctxt);
7172 if (RAW != '"') {
7173 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7175 ctxt->sax->error(ctxt->userData,
7176 "String not closed\n%.50s\n", q);
7177 ctxt->wellFormed = 0;
7178 ctxt->disableSAX = 1;
7179 } else
7180 NEXT;
7181 } else if (RAW == '\''){
7182 NEXT;
7183 q = CUR_PTR;
7184 encoding = xmlParseEncName(ctxt);
7185 if (RAW != '\'') {
7186 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7188 ctxt->sax->error(ctxt->userData,
7189 "String not closed\n%.50s\n", q);
7190 ctxt->wellFormed = 0;
7191 ctxt->disableSAX = 1;
7192 } else
7193 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007194 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007195 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7197 ctxt->sax->error(ctxt->userData,
7198 "xmlParseEncodingDecl : expected ' or \"\n");
7199 ctxt->wellFormed = 0;
7200 ctxt->disableSAX = 1;
7201 }
7202 if (encoding != NULL) {
7203 xmlCharEncoding enc;
7204 xmlCharEncodingHandlerPtr handler;
7205
7206 if (ctxt->input->encoding != NULL)
7207 xmlFree((xmlChar *) ctxt->input->encoding);
7208 ctxt->input->encoding = encoding;
7209
7210 enc = xmlParseCharEncoding((const char *) encoding);
7211 /*
7212 * registered set of known encodings
7213 */
7214 if (enc != XML_CHAR_ENCODING_ERROR) {
7215 xmlSwitchEncoding(ctxt, enc);
7216 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7217 xmlFree(encoding);
7218 return(NULL);
7219 }
7220 } else {
7221 /*
7222 * fallback for unknown encodings
7223 */
7224 handler = xmlFindCharEncodingHandler((const char *) encoding);
7225 if (handler != NULL) {
7226 xmlSwitchToEncoding(ctxt, handler);
7227 } else {
7228 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7230 ctxt->sax->error(ctxt->userData,
7231 "Unsupported encoding %s\n", encoding);
7232 return(NULL);
7233 }
7234 }
7235 }
7236 }
7237 return(encoding);
7238}
7239
7240/**
7241 * xmlParseSDDecl:
7242 * @ctxt: an XML parser context
7243 *
7244 * parse the XML standalone declaration
7245 *
7246 * [32] SDDecl ::= S 'standalone' Eq
7247 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7248 *
7249 * [ VC: Standalone Document Declaration ]
7250 * TODO The standalone document declaration must have the value "no"
7251 * if any external markup declarations contain declarations of:
7252 * - attributes with default values, if elements to which these
7253 * attributes apply appear in the document without specifications
7254 * of values for these attributes, or
7255 * - entities (other than amp, lt, gt, apos, quot), if references
7256 * to those entities appear in the document, or
7257 * - attributes with values subject to normalization, where the
7258 * attribute appears in the document with a value which will change
7259 * as a result of normalization, or
7260 * - element types with element content, if white space occurs directly
7261 * within any instance of those types.
7262 *
7263 * Returns 1 if standalone, 0 otherwise
7264 */
7265
7266int
7267xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7268 int standalone = -1;
7269
7270 SKIP_BLANKS;
7271 if ((RAW == 's') && (NXT(1) == 't') &&
7272 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7273 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7274 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7275 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7276 SKIP(10);
7277 SKIP_BLANKS;
7278 if (RAW != '=') {
7279 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7281 ctxt->sax->error(ctxt->userData,
7282 "XML standalone declaration : expected '='\n");
7283 ctxt->wellFormed = 0;
7284 ctxt->disableSAX = 1;
7285 return(standalone);
7286 }
7287 NEXT;
7288 SKIP_BLANKS;
7289 if (RAW == '\''){
7290 NEXT;
7291 if ((RAW == 'n') && (NXT(1) == 'o')) {
7292 standalone = 0;
7293 SKIP(2);
7294 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7295 (NXT(2) == 's')) {
7296 standalone = 1;
7297 SKIP(3);
7298 } else {
7299 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7301 ctxt->sax->error(ctxt->userData,
7302 "standalone accepts only 'yes' or 'no'\n");
7303 ctxt->wellFormed = 0;
7304 ctxt->disableSAX = 1;
7305 }
7306 if (RAW != '\'') {
7307 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7309 ctxt->sax->error(ctxt->userData, "String not closed\n");
7310 ctxt->wellFormed = 0;
7311 ctxt->disableSAX = 1;
7312 } else
7313 NEXT;
7314 } else if (RAW == '"'){
7315 NEXT;
7316 if ((RAW == 'n') && (NXT(1) == 'o')) {
7317 standalone = 0;
7318 SKIP(2);
7319 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7320 (NXT(2) == 's')) {
7321 standalone = 1;
7322 SKIP(3);
7323 } else {
7324 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7326 ctxt->sax->error(ctxt->userData,
7327 "standalone accepts only 'yes' or 'no'\n");
7328 ctxt->wellFormed = 0;
7329 ctxt->disableSAX = 1;
7330 }
7331 if (RAW != '"') {
7332 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7334 ctxt->sax->error(ctxt->userData, "String not closed\n");
7335 ctxt->wellFormed = 0;
7336 ctxt->disableSAX = 1;
7337 } else
7338 NEXT;
7339 } else {
7340 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7342 ctxt->sax->error(ctxt->userData,
7343 "Standalone value not found\n");
7344 ctxt->wellFormed = 0;
7345 ctxt->disableSAX = 1;
7346 }
7347 }
7348 return(standalone);
7349}
7350
7351/**
7352 * xmlParseXMLDecl:
7353 * @ctxt: an XML parser context
7354 *
7355 * parse an XML declaration header
7356 *
7357 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7358 */
7359
7360void
7361xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7362 xmlChar *version;
7363
7364 /*
7365 * We know that '<?xml' is here.
7366 */
7367 SKIP(5);
7368
7369 if (!IS_BLANK(RAW)) {
7370 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7372 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7373 ctxt->wellFormed = 0;
7374 ctxt->disableSAX = 1;
7375 }
7376 SKIP_BLANKS;
7377
7378 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007379 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007380 */
7381 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007382 if (version == NULL) {
7383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7384 ctxt->sax->error(ctxt->userData,
7385 "Malformed declaration expecting version\n");
7386 ctxt->wellFormed = 0;
7387 ctxt->disableSAX = 1;
7388 } else {
7389 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7390 /*
7391 * TODO: Blueberry should be detected here
7392 */
7393 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7394 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7395 version);
7396 }
7397 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007398 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007399 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007400 }
Owen Taylor3473f882001-02-23 17:55:21 +00007401
7402 /*
7403 * We may have the encoding declaration
7404 */
7405 if (!IS_BLANK(RAW)) {
7406 if ((RAW == '?') && (NXT(1) == '>')) {
7407 SKIP(2);
7408 return;
7409 }
7410 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7412 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7413 ctxt->wellFormed = 0;
7414 ctxt->disableSAX = 1;
7415 }
7416 xmlParseEncodingDecl(ctxt);
7417 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7418 /*
7419 * The XML REC instructs us to stop parsing right here
7420 */
7421 return;
7422 }
7423
7424 /*
7425 * We may have the standalone status.
7426 */
7427 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7428 if ((RAW == '?') && (NXT(1) == '>')) {
7429 SKIP(2);
7430 return;
7431 }
7432 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7433 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7434 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7435 ctxt->wellFormed = 0;
7436 ctxt->disableSAX = 1;
7437 }
7438 SKIP_BLANKS;
7439 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7440
7441 SKIP_BLANKS;
7442 if ((RAW == '?') && (NXT(1) == '>')) {
7443 SKIP(2);
7444 } else if (RAW == '>') {
7445 /* Deprecated old WD ... */
7446 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7448 ctxt->sax->error(ctxt->userData,
7449 "XML declaration must end-up with '?>'\n");
7450 ctxt->wellFormed = 0;
7451 ctxt->disableSAX = 1;
7452 NEXT;
7453 } else {
7454 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7456 ctxt->sax->error(ctxt->userData,
7457 "parsing XML declaration: '?>' expected\n");
7458 ctxt->wellFormed = 0;
7459 ctxt->disableSAX = 1;
7460 MOVETO_ENDTAG(CUR_PTR);
7461 NEXT;
7462 }
7463}
7464
7465/**
7466 * xmlParseMisc:
7467 * @ctxt: an XML parser context
7468 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007469 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007470 *
7471 * [27] Misc ::= Comment | PI | S
7472 */
7473
7474void
7475xmlParseMisc(xmlParserCtxtPtr ctxt) {
7476 while (((RAW == '<') && (NXT(1) == '?')) ||
7477 ((RAW == '<') && (NXT(1) == '!') &&
7478 (NXT(2) == '-') && (NXT(3) == '-')) ||
7479 IS_BLANK(CUR)) {
7480 if ((RAW == '<') && (NXT(1) == '?')) {
7481 xmlParsePI(ctxt);
7482 } else if (IS_BLANK(CUR)) {
7483 NEXT;
7484 } else
7485 xmlParseComment(ctxt);
7486 }
7487}
7488
7489/**
7490 * xmlParseDocument:
7491 * @ctxt: an XML parser context
7492 *
7493 * parse an XML document (and build a tree if using the standard SAX
7494 * interface).
7495 *
7496 * [1] document ::= prolog element Misc*
7497 *
7498 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7499 *
7500 * Returns 0, -1 in case of error. the parser context is augmented
7501 * as a result of the parsing.
7502 */
7503
7504int
7505xmlParseDocument(xmlParserCtxtPtr ctxt) {
7506 xmlChar start[4];
7507 xmlCharEncoding enc;
7508
7509 xmlInitParser();
7510
7511 GROW;
7512
7513 /*
7514 * SAX: beginning of the document processing.
7515 */
7516 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7517 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7518
Daniel Veillard50f34372001-08-03 12:06:36 +00007519 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007520 /*
7521 * Get the 4 first bytes and decode the charset
7522 * if enc != XML_CHAR_ENCODING_NONE
7523 * plug some encoding conversion routines.
7524 */
7525 start[0] = RAW;
7526 start[1] = NXT(1);
7527 start[2] = NXT(2);
7528 start[3] = NXT(3);
7529 enc = xmlDetectCharEncoding(start, 4);
7530 if (enc != XML_CHAR_ENCODING_NONE) {
7531 xmlSwitchEncoding(ctxt, enc);
7532 }
Owen Taylor3473f882001-02-23 17:55:21 +00007533 }
7534
7535
7536 if (CUR == 0) {
7537 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7539 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7540 ctxt->wellFormed = 0;
7541 ctxt->disableSAX = 1;
7542 }
7543
7544 /*
7545 * Check for the XMLDecl in the Prolog.
7546 */
7547 GROW;
7548 if ((RAW == '<') && (NXT(1) == '?') &&
7549 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7550 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7551
7552 /*
7553 * Note that we will switch encoding on the fly.
7554 */
7555 xmlParseXMLDecl(ctxt);
7556 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7557 /*
7558 * The XML REC instructs us to stop parsing right here
7559 */
7560 return(-1);
7561 }
7562 ctxt->standalone = ctxt->input->standalone;
7563 SKIP_BLANKS;
7564 } else {
7565 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7566 }
7567 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7568 ctxt->sax->startDocument(ctxt->userData);
7569
7570 /*
7571 * The Misc part of the Prolog
7572 */
7573 GROW;
7574 xmlParseMisc(ctxt);
7575
7576 /*
7577 * Then possibly doc type declaration(s) and more Misc
7578 * (doctypedecl Misc*)?
7579 */
7580 GROW;
7581 if ((RAW == '<') && (NXT(1) == '!') &&
7582 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7583 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7584 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7585 (NXT(8) == 'E')) {
7586
7587 ctxt->inSubset = 1;
7588 xmlParseDocTypeDecl(ctxt);
7589 if (RAW == '[') {
7590 ctxt->instate = XML_PARSER_DTD;
7591 xmlParseInternalSubset(ctxt);
7592 }
7593
7594 /*
7595 * Create and update the external subset.
7596 */
7597 ctxt->inSubset = 2;
7598 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7599 (!ctxt->disableSAX))
7600 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7601 ctxt->extSubSystem, ctxt->extSubURI);
7602 ctxt->inSubset = 0;
7603
7604
7605 ctxt->instate = XML_PARSER_PROLOG;
7606 xmlParseMisc(ctxt);
7607 }
7608
7609 /*
7610 * Time to start parsing the tree itself
7611 */
7612 GROW;
7613 if (RAW != '<') {
7614 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7616 ctxt->sax->error(ctxt->userData,
7617 "Start tag expected, '<' not found\n");
7618 ctxt->wellFormed = 0;
7619 ctxt->disableSAX = 1;
7620 ctxt->instate = XML_PARSER_EOF;
7621 } else {
7622 ctxt->instate = XML_PARSER_CONTENT;
7623 xmlParseElement(ctxt);
7624 ctxt->instate = XML_PARSER_EPILOG;
7625
7626
7627 /*
7628 * The Misc part at the end
7629 */
7630 xmlParseMisc(ctxt);
7631
7632 if (RAW != 0) {
7633 ctxt->errNo = XML_ERR_DOCUMENT_END;
7634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7635 ctxt->sax->error(ctxt->userData,
7636 "Extra content at the end of the document\n");
7637 ctxt->wellFormed = 0;
7638 ctxt->disableSAX = 1;
7639 }
7640 ctxt->instate = XML_PARSER_EOF;
7641 }
7642
7643 /*
7644 * SAX: end of the document processing.
7645 */
7646 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7647 (!ctxt->disableSAX))
7648 ctxt->sax->endDocument(ctxt->userData);
7649
Daniel Veillardc7612992002-02-17 22:47:37 +00007650 if (! ctxt->wellFormed) {
7651 ctxt->valid = 0;
7652 return(-1);
7653 }
Owen Taylor3473f882001-02-23 17:55:21 +00007654 return(0);
7655}
7656
7657/**
7658 * xmlParseExtParsedEnt:
7659 * @ctxt: an XML parser context
7660 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007661 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007662 * An external general parsed entity is well-formed if it matches the
7663 * production labeled extParsedEnt.
7664 *
7665 * [78] extParsedEnt ::= TextDecl? content
7666 *
7667 * Returns 0, -1 in case of error. the parser context is augmented
7668 * as a result of the parsing.
7669 */
7670
7671int
7672xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7673 xmlChar start[4];
7674 xmlCharEncoding enc;
7675
7676 xmlDefaultSAXHandlerInit();
7677
7678 GROW;
7679
7680 /*
7681 * SAX: beginning of the document processing.
7682 */
7683 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7684 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7685
7686 /*
7687 * Get the 4 first bytes and decode the charset
7688 * if enc != XML_CHAR_ENCODING_NONE
7689 * plug some encoding conversion routines.
7690 */
7691 start[0] = RAW;
7692 start[1] = NXT(1);
7693 start[2] = NXT(2);
7694 start[3] = NXT(3);
7695 enc = xmlDetectCharEncoding(start, 4);
7696 if (enc != XML_CHAR_ENCODING_NONE) {
7697 xmlSwitchEncoding(ctxt, enc);
7698 }
7699
7700
7701 if (CUR == 0) {
7702 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7704 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7705 ctxt->wellFormed = 0;
7706 ctxt->disableSAX = 1;
7707 }
7708
7709 /*
7710 * Check for the XMLDecl in the Prolog.
7711 */
7712 GROW;
7713 if ((RAW == '<') && (NXT(1) == '?') &&
7714 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7715 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7716
7717 /*
7718 * Note that we will switch encoding on the fly.
7719 */
7720 xmlParseXMLDecl(ctxt);
7721 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7722 /*
7723 * The XML REC instructs us to stop parsing right here
7724 */
7725 return(-1);
7726 }
7727 SKIP_BLANKS;
7728 } else {
7729 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7730 }
7731 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7732 ctxt->sax->startDocument(ctxt->userData);
7733
7734 /*
7735 * Doing validity checking on chunk doesn't make sense
7736 */
7737 ctxt->instate = XML_PARSER_CONTENT;
7738 ctxt->validate = 0;
7739 ctxt->loadsubset = 0;
7740 ctxt->depth = 0;
7741
7742 xmlParseContent(ctxt);
7743
7744 if ((RAW == '<') && (NXT(1) == '/')) {
7745 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7747 ctxt->sax->error(ctxt->userData,
7748 "chunk is not well balanced\n");
7749 ctxt->wellFormed = 0;
7750 ctxt->disableSAX = 1;
7751 } else if (RAW != 0) {
7752 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7753 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7754 ctxt->sax->error(ctxt->userData,
7755 "extra content at the end of well balanced chunk\n");
7756 ctxt->wellFormed = 0;
7757 ctxt->disableSAX = 1;
7758 }
7759
7760 /*
7761 * SAX: end of the document processing.
7762 */
7763 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7764 (!ctxt->disableSAX))
7765 ctxt->sax->endDocument(ctxt->userData);
7766
7767 if (! ctxt->wellFormed) return(-1);
7768 return(0);
7769}
7770
7771/************************************************************************
7772 * *
7773 * Progressive parsing interfaces *
7774 * *
7775 ************************************************************************/
7776
7777/**
7778 * xmlParseLookupSequence:
7779 * @ctxt: an XML parser context
7780 * @first: the first char to lookup
7781 * @next: the next char to lookup or zero
7782 * @third: the next char to lookup or zero
7783 *
7784 * Try to find if a sequence (first, next, third) or just (first next) or
7785 * (first) is available in the input stream.
7786 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7787 * to avoid rescanning sequences of bytes, it DOES change the state of the
7788 * parser, do not use liberally.
7789 *
7790 * Returns the index to the current parsing point if the full sequence
7791 * is available, -1 otherwise.
7792 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007793static int
Owen Taylor3473f882001-02-23 17:55:21 +00007794xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7795 xmlChar next, xmlChar third) {
7796 int base, len;
7797 xmlParserInputPtr in;
7798 const xmlChar *buf;
7799
7800 in = ctxt->input;
7801 if (in == NULL) return(-1);
7802 base = in->cur - in->base;
7803 if (base < 0) return(-1);
7804 if (ctxt->checkIndex > base)
7805 base = ctxt->checkIndex;
7806 if (in->buf == NULL) {
7807 buf = in->base;
7808 len = in->length;
7809 } else {
7810 buf = in->buf->buffer->content;
7811 len = in->buf->buffer->use;
7812 }
7813 /* take into account the sequence length */
7814 if (third) len -= 2;
7815 else if (next) len --;
7816 for (;base < len;base++) {
7817 if (buf[base] == first) {
7818 if (third != 0) {
7819 if ((buf[base + 1] != next) ||
7820 (buf[base + 2] != third)) continue;
7821 } else if (next != 0) {
7822 if (buf[base + 1] != next) continue;
7823 }
7824 ctxt->checkIndex = 0;
7825#ifdef DEBUG_PUSH
7826 if (next == 0)
7827 xmlGenericError(xmlGenericErrorContext,
7828 "PP: lookup '%c' found at %d\n",
7829 first, base);
7830 else if (third == 0)
7831 xmlGenericError(xmlGenericErrorContext,
7832 "PP: lookup '%c%c' found at %d\n",
7833 first, next, base);
7834 else
7835 xmlGenericError(xmlGenericErrorContext,
7836 "PP: lookup '%c%c%c' found at %d\n",
7837 first, next, third, base);
7838#endif
7839 return(base - (in->cur - in->base));
7840 }
7841 }
7842 ctxt->checkIndex = base;
7843#ifdef DEBUG_PUSH
7844 if (next == 0)
7845 xmlGenericError(xmlGenericErrorContext,
7846 "PP: lookup '%c' failed\n", first);
7847 else if (third == 0)
7848 xmlGenericError(xmlGenericErrorContext,
7849 "PP: lookup '%c%c' failed\n", first, next);
7850 else
7851 xmlGenericError(xmlGenericErrorContext,
7852 "PP: lookup '%c%c%c' failed\n", first, next, third);
7853#endif
7854 return(-1);
7855}
7856
7857/**
7858 * xmlParseTryOrFinish:
7859 * @ctxt: an XML parser context
7860 * @terminate: last chunk indicator
7861 *
7862 * Try to progress on parsing
7863 *
7864 * Returns zero if no parsing was possible
7865 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007866static int
Owen Taylor3473f882001-02-23 17:55:21 +00007867xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7868 int ret = 0;
7869 int avail;
7870 xmlChar cur, next;
7871
7872#ifdef DEBUG_PUSH
7873 switch (ctxt->instate) {
7874 case XML_PARSER_EOF:
7875 xmlGenericError(xmlGenericErrorContext,
7876 "PP: try EOF\n"); break;
7877 case XML_PARSER_START:
7878 xmlGenericError(xmlGenericErrorContext,
7879 "PP: try START\n"); break;
7880 case XML_PARSER_MISC:
7881 xmlGenericError(xmlGenericErrorContext,
7882 "PP: try MISC\n");break;
7883 case XML_PARSER_COMMENT:
7884 xmlGenericError(xmlGenericErrorContext,
7885 "PP: try COMMENT\n");break;
7886 case XML_PARSER_PROLOG:
7887 xmlGenericError(xmlGenericErrorContext,
7888 "PP: try PROLOG\n");break;
7889 case XML_PARSER_START_TAG:
7890 xmlGenericError(xmlGenericErrorContext,
7891 "PP: try START_TAG\n");break;
7892 case XML_PARSER_CONTENT:
7893 xmlGenericError(xmlGenericErrorContext,
7894 "PP: try CONTENT\n");break;
7895 case XML_PARSER_CDATA_SECTION:
7896 xmlGenericError(xmlGenericErrorContext,
7897 "PP: try CDATA_SECTION\n");break;
7898 case XML_PARSER_END_TAG:
7899 xmlGenericError(xmlGenericErrorContext,
7900 "PP: try END_TAG\n");break;
7901 case XML_PARSER_ENTITY_DECL:
7902 xmlGenericError(xmlGenericErrorContext,
7903 "PP: try ENTITY_DECL\n");break;
7904 case XML_PARSER_ENTITY_VALUE:
7905 xmlGenericError(xmlGenericErrorContext,
7906 "PP: try ENTITY_VALUE\n");break;
7907 case XML_PARSER_ATTRIBUTE_VALUE:
7908 xmlGenericError(xmlGenericErrorContext,
7909 "PP: try ATTRIBUTE_VALUE\n");break;
7910 case XML_PARSER_DTD:
7911 xmlGenericError(xmlGenericErrorContext,
7912 "PP: try DTD\n");break;
7913 case XML_PARSER_EPILOG:
7914 xmlGenericError(xmlGenericErrorContext,
7915 "PP: try EPILOG\n");break;
7916 case XML_PARSER_PI:
7917 xmlGenericError(xmlGenericErrorContext,
7918 "PP: try PI\n");break;
7919 case XML_PARSER_IGNORE:
7920 xmlGenericError(xmlGenericErrorContext,
7921 "PP: try IGNORE\n");break;
7922 }
7923#endif
7924
7925 while (1) {
7926 /*
7927 * Pop-up of finished entities.
7928 */
7929 while ((RAW == 0) && (ctxt->inputNr > 1))
7930 xmlPopInput(ctxt);
7931
7932 if (ctxt->input ==NULL) break;
7933 if (ctxt->input->buf == NULL)
7934 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7935 else
7936 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7937 if (avail < 1)
7938 goto done;
7939 switch (ctxt->instate) {
7940 case XML_PARSER_EOF:
7941 /*
7942 * Document parsing is done !
7943 */
7944 goto done;
7945 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007946 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7947 xmlChar start[4];
7948 xmlCharEncoding enc;
7949
7950 /*
7951 * Very first chars read from the document flow.
7952 */
7953 if (avail < 4)
7954 goto done;
7955
7956 /*
7957 * Get the 4 first bytes and decode the charset
7958 * if enc != XML_CHAR_ENCODING_NONE
7959 * plug some encoding conversion routines.
7960 */
7961 start[0] = RAW;
7962 start[1] = NXT(1);
7963 start[2] = NXT(2);
7964 start[3] = NXT(3);
7965 enc = xmlDetectCharEncoding(start, 4);
7966 if (enc != XML_CHAR_ENCODING_NONE) {
7967 xmlSwitchEncoding(ctxt, enc);
7968 }
7969 break;
7970 }
Owen Taylor3473f882001-02-23 17:55:21 +00007971
7972 cur = ctxt->input->cur[0];
7973 next = ctxt->input->cur[1];
7974 if (cur == 0) {
7975 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7976 ctxt->sax->setDocumentLocator(ctxt->userData,
7977 &xmlDefaultSAXLocator);
7978 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7979 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7980 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7981 ctxt->wellFormed = 0;
7982 ctxt->disableSAX = 1;
7983 ctxt->instate = XML_PARSER_EOF;
7984#ifdef DEBUG_PUSH
7985 xmlGenericError(xmlGenericErrorContext,
7986 "PP: entering EOF\n");
7987#endif
7988 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7989 ctxt->sax->endDocument(ctxt->userData);
7990 goto done;
7991 }
7992 if ((cur == '<') && (next == '?')) {
7993 /* PI or XML decl */
7994 if (avail < 5) return(ret);
7995 if ((!terminate) &&
7996 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7997 return(ret);
7998 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7999 ctxt->sax->setDocumentLocator(ctxt->userData,
8000 &xmlDefaultSAXLocator);
8001 if ((ctxt->input->cur[2] == 'x') &&
8002 (ctxt->input->cur[3] == 'm') &&
8003 (ctxt->input->cur[4] == 'l') &&
8004 (IS_BLANK(ctxt->input->cur[5]))) {
8005 ret += 5;
8006#ifdef DEBUG_PUSH
8007 xmlGenericError(xmlGenericErrorContext,
8008 "PP: Parsing XML Decl\n");
8009#endif
8010 xmlParseXMLDecl(ctxt);
8011 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8012 /*
8013 * The XML REC instructs us to stop parsing right
8014 * here
8015 */
8016 ctxt->instate = XML_PARSER_EOF;
8017 return(0);
8018 }
8019 ctxt->standalone = ctxt->input->standalone;
8020 if ((ctxt->encoding == NULL) &&
8021 (ctxt->input->encoding != NULL))
8022 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8023 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8024 (!ctxt->disableSAX))
8025 ctxt->sax->startDocument(ctxt->userData);
8026 ctxt->instate = XML_PARSER_MISC;
8027#ifdef DEBUG_PUSH
8028 xmlGenericError(xmlGenericErrorContext,
8029 "PP: entering MISC\n");
8030#endif
8031 } else {
8032 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8033 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8034 (!ctxt->disableSAX))
8035 ctxt->sax->startDocument(ctxt->userData);
8036 ctxt->instate = XML_PARSER_MISC;
8037#ifdef DEBUG_PUSH
8038 xmlGenericError(xmlGenericErrorContext,
8039 "PP: entering MISC\n");
8040#endif
8041 }
8042 } else {
8043 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8044 ctxt->sax->setDocumentLocator(ctxt->userData,
8045 &xmlDefaultSAXLocator);
8046 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8047 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8048 (!ctxt->disableSAX))
8049 ctxt->sax->startDocument(ctxt->userData);
8050 ctxt->instate = XML_PARSER_MISC;
8051#ifdef DEBUG_PUSH
8052 xmlGenericError(xmlGenericErrorContext,
8053 "PP: entering MISC\n");
8054#endif
8055 }
8056 break;
8057 case XML_PARSER_MISC:
8058 SKIP_BLANKS;
8059 if (ctxt->input->buf == NULL)
8060 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8061 else
8062 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8063 if (avail < 2)
8064 goto done;
8065 cur = ctxt->input->cur[0];
8066 next = ctxt->input->cur[1];
8067 if ((cur == '<') && (next == '?')) {
8068 if ((!terminate) &&
8069 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8070 goto done;
8071#ifdef DEBUG_PUSH
8072 xmlGenericError(xmlGenericErrorContext,
8073 "PP: Parsing PI\n");
8074#endif
8075 xmlParsePI(ctxt);
8076 } else if ((cur == '<') && (next == '!') &&
8077 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8078 if ((!terminate) &&
8079 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8080 goto done;
8081#ifdef DEBUG_PUSH
8082 xmlGenericError(xmlGenericErrorContext,
8083 "PP: Parsing Comment\n");
8084#endif
8085 xmlParseComment(ctxt);
8086 ctxt->instate = XML_PARSER_MISC;
8087 } else if ((cur == '<') && (next == '!') &&
8088 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8089 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8090 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8091 (ctxt->input->cur[8] == 'E')) {
8092 if ((!terminate) &&
8093 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8094 goto done;
8095#ifdef DEBUG_PUSH
8096 xmlGenericError(xmlGenericErrorContext,
8097 "PP: Parsing internal subset\n");
8098#endif
8099 ctxt->inSubset = 1;
8100 xmlParseDocTypeDecl(ctxt);
8101 if (RAW == '[') {
8102 ctxt->instate = XML_PARSER_DTD;
8103#ifdef DEBUG_PUSH
8104 xmlGenericError(xmlGenericErrorContext,
8105 "PP: entering DTD\n");
8106#endif
8107 } else {
8108 /*
8109 * Create and update the external subset.
8110 */
8111 ctxt->inSubset = 2;
8112 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8113 (ctxt->sax->externalSubset != NULL))
8114 ctxt->sax->externalSubset(ctxt->userData,
8115 ctxt->intSubName, ctxt->extSubSystem,
8116 ctxt->extSubURI);
8117 ctxt->inSubset = 0;
8118 ctxt->instate = XML_PARSER_PROLOG;
8119#ifdef DEBUG_PUSH
8120 xmlGenericError(xmlGenericErrorContext,
8121 "PP: entering PROLOG\n");
8122#endif
8123 }
8124 } else if ((cur == '<') && (next == '!') &&
8125 (avail < 9)) {
8126 goto done;
8127 } else {
8128 ctxt->instate = XML_PARSER_START_TAG;
8129#ifdef DEBUG_PUSH
8130 xmlGenericError(xmlGenericErrorContext,
8131 "PP: entering START_TAG\n");
8132#endif
8133 }
8134 break;
8135 case XML_PARSER_IGNORE:
8136 xmlGenericError(xmlGenericErrorContext,
8137 "PP: internal error, state == IGNORE");
8138 ctxt->instate = XML_PARSER_DTD;
8139#ifdef DEBUG_PUSH
8140 xmlGenericError(xmlGenericErrorContext,
8141 "PP: entering DTD\n");
8142#endif
8143 break;
8144 case XML_PARSER_PROLOG:
8145 SKIP_BLANKS;
8146 if (ctxt->input->buf == NULL)
8147 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8148 else
8149 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8150 if (avail < 2)
8151 goto done;
8152 cur = ctxt->input->cur[0];
8153 next = ctxt->input->cur[1];
8154 if ((cur == '<') && (next == '?')) {
8155 if ((!terminate) &&
8156 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8157 goto done;
8158#ifdef DEBUG_PUSH
8159 xmlGenericError(xmlGenericErrorContext,
8160 "PP: Parsing PI\n");
8161#endif
8162 xmlParsePI(ctxt);
8163 } else if ((cur == '<') && (next == '!') &&
8164 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8165 if ((!terminate) &&
8166 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8167 goto done;
8168#ifdef DEBUG_PUSH
8169 xmlGenericError(xmlGenericErrorContext,
8170 "PP: Parsing Comment\n");
8171#endif
8172 xmlParseComment(ctxt);
8173 ctxt->instate = XML_PARSER_PROLOG;
8174 } else if ((cur == '<') && (next == '!') &&
8175 (avail < 4)) {
8176 goto done;
8177 } else {
8178 ctxt->instate = XML_PARSER_START_TAG;
8179#ifdef DEBUG_PUSH
8180 xmlGenericError(xmlGenericErrorContext,
8181 "PP: entering START_TAG\n");
8182#endif
8183 }
8184 break;
8185 case XML_PARSER_EPILOG:
8186 SKIP_BLANKS;
8187 if (ctxt->input->buf == NULL)
8188 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8189 else
8190 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8191 if (avail < 2)
8192 goto done;
8193 cur = ctxt->input->cur[0];
8194 next = ctxt->input->cur[1];
8195 if ((cur == '<') && (next == '?')) {
8196 if ((!terminate) &&
8197 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8198 goto done;
8199#ifdef DEBUG_PUSH
8200 xmlGenericError(xmlGenericErrorContext,
8201 "PP: Parsing PI\n");
8202#endif
8203 xmlParsePI(ctxt);
8204 ctxt->instate = XML_PARSER_EPILOG;
8205 } else if ((cur == '<') && (next == '!') &&
8206 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8207 if ((!terminate) &&
8208 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8209 goto done;
8210#ifdef DEBUG_PUSH
8211 xmlGenericError(xmlGenericErrorContext,
8212 "PP: Parsing Comment\n");
8213#endif
8214 xmlParseComment(ctxt);
8215 ctxt->instate = XML_PARSER_EPILOG;
8216 } else if ((cur == '<') && (next == '!') &&
8217 (avail < 4)) {
8218 goto done;
8219 } else {
8220 ctxt->errNo = XML_ERR_DOCUMENT_END;
8221 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8222 ctxt->sax->error(ctxt->userData,
8223 "Extra content at the end of the document\n");
8224 ctxt->wellFormed = 0;
8225 ctxt->disableSAX = 1;
8226 ctxt->instate = XML_PARSER_EOF;
8227#ifdef DEBUG_PUSH
8228 xmlGenericError(xmlGenericErrorContext,
8229 "PP: entering EOF\n");
8230#endif
8231 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8232 (!ctxt->disableSAX))
8233 ctxt->sax->endDocument(ctxt->userData);
8234 goto done;
8235 }
8236 break;
8237 case XML_PARSER_START_TAG: {
8238 xmlChar *name, *oldname;
8239
8240 if ((avail < 2) && (ctxt->inputNr == 1))
8241 goto done;
8242 cur = ctxt->input->cur[0];
8243 if (cur != '<') {
8244 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8246 ctxt->sax->error(ctxt->userData,
8247 "Start tag expect, '<' not found\n");
8248 ctxt->wellFormed = 0;
8249 ctxt->disableSAX = 1;
8250 ctxt->instate = XML_PARSER_EOF;
8251#ifdef DEBUG_PUSH
8252 xmlGenericError(xmlGenericErrorContext,
8253 "PP: entering EOF\n");
8254#endif
8255 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8256 (!ctxt->disableSAX))
8257 ctxt->sax->endDocument(ctxt->userData);
8258 goto done;
8259 }
8260 if ((!terminate) &&
8261 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8262 goto done;
8263 if (ctxt->spaceNr == 0)
8264 spacePush(ctxt, -1);
8265 else
8266 spacePush(ctxt, *ctxt->space);
8267 name = xmlParseStartTag(ctxt);
8268 if (name == NULL) {
8269 spacePop(ctxt);
8270 ctxt->instate = XML_PARSER_EOF;
8271#ifdef DEBUG_PUSH
8272 xmlGenericError(xmlGenericErrorContext,
8273 "PP: entering EOF\n");
8274#endif
8275 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8276 (!ctxt->disableSAX))
8277 ctxt->sax->endDocument(ctxt->userData);
8278 goto done;
8279 }
8280 namePush(ctxt, xmlStrdup(name));
8281
8282 /*
8283 * [ VC: Root Element Type ]
8284 * The Name in the document type declaration must match
8285 * the element type of the root element.
8286 */
8287 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8288 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8289 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8290
8291 /*
8292 * Check for an Empty Element.
8293 */
8294 if ((RAW == '/') && (NXT(1) == '>')) {
8295 SKIP(2);
8296 if ((ctxt->sax != NULL) &&
8297 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8298 ctxt->sax->endElement(ctxt->userData, name);
8299 xmlFree(name);
8300 oldname = namePop(ctxt);
8301 spacePop(ctxt);
8302 if (oldname != NULL) {
8303#ifdef DEBUG_STACK
8304 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8305#endif
8306 xmlFree(oldname);
8307 }
8308 if (ctxt->name == NULL) {
8309 ctxt->instate = XML_PARSER_EPILOG;
8310#ifdef DEBUG_PUSH
8311 xmlGenericError(xmlGenericErrorContext,
8312 "PP: entering EPILOG\n");
8313#endif
8314 } else {
8315 ctxt->instate = XML_PARSER_CONTENT;
8316#ifdef DEBUG_PUSH
8317 xmlGenericError(xmlGenericErrorContext,
8318 "PP: entering CONTENT\n");
8319#endif
8320 }
8321 break;
8322 }
8323 if (RAW == '>') {
8324 NEXT;
8325 } else {
8326 ctxt->errNo = XML_ERR_GT_REQUIRED;
8327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8328 ctxt->sax->error(ctxt->userData,
8329 "Couldn't find end of Start Tag %s\n",
8330 name);
8331 ctxt->wellFormed = 0;
8332 ctxt->disableSAX = 1;
8333
8334 /*
8335 * end of parsing of this node.
8336 */
8337 nodePop(ctxt);
8338 oldname = namePop(ctxt);
8339 spacePop(ctxt);
8340 if (oldname != NULL) {
8341#ifdef DEBUG_STACK
8342 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8343#endif
8344 xmlFree(oldname);
8345 }
8346 }
8347 xmlFree(name);
8348 ctxt->instate = XML_PARSER_CONTENT;
8349#ifdef DEBUG_PUSH
8350 xmlGenericError(xmlGenericErrorContext,
8351 "PP: entering CONTENT\n");
8352#endif
8353 break;
8354 }
8355 case XML_PARSER_CONTENT: {
8356 const xmlChar *test;
8357 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008358 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008359
8360 /*
8361 * Handle preparsed entities and charRef
8362 */
8363 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008364 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008365
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008366 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008367 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8368 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008369 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008370 ctxt->token = 0;
8371 }
8372 if ((avail < 2) && (ctxt->inputNr == 1))
8373 goto done;
8374 cur = ctxt->input->cur[0];
8375 next = ctxt->input->cur[1];
8376
8377 test = CUR_PTR;
8378 cons = ctxt->input->consumed;
8379 tok = ctxt->token;
8380 if ((cur == '<') && (next == '?')) {
8381 if ((!terminate) &&
8382 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8383 goto done;
8384#ifdef DEBUG_PUSH
8385 xmlGenericError(xmlGenericErrorContext,
8386 "PP: Parsing PI\n");
8387#endif
8388 xmlParsePI(ctxt);
8389 } else if ((cur == '<') && (next == '!') &&
8390 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8391 if ((!terminate) &&
8392 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8393 goto done;
8394#ifdef DEBUG_PUSH
8395 xmlGenericError(xmlGenericErrorContext,
8396 "PP: Parsing Comment\n");
8397#endif
8398 xmlParseComment(ctxt);
8399 ctxt->instate = XML_PARSER_CONTENT;
8400 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8401 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8402 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8403 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8404 (ctxt->input->cur[8] == '[')) {
8405 SKIP(9);
8406 ctxt->instate = XML_PARSER_CDATA_SECTION;
8407#ifdef DEBUG_PUSH
8408 xmlGenericError(xmlGenericErrorContext,
8409 "PP: entering CDATA_SECTION\n");
8410#endif
8411 break;
8412 } else if ((cur == '<') && (next == '!') &&
8413 (avail < 9)) {
8414 goto done;
8415 } else if ((cur == '<') && (next == '/')) {
8416 ctxt->instate = XML_PARSER_END_TAG;
8417#ifdef DEBUG_PUSH
8418 xmlGenericError(xmlGenericErrorContext,
8419 "PP: entering END_TAG\n");
8420#endif
8421 break;
8422 } else if (cur == '<') {
8423 ctxt->instate = XML_PARSER_START_TAG;
8424#ifdef DEBUG_PUSH
8425 xmlGenericError(xmlGenericErrorContext,
8426 "PP: entering START_TAG\n");
8427#endif
8428 break;
8429 } else if (cur == '&') {
8430 if ((!terminate) &&
8431 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8432 goto done;
8433#ifdef DEBUG_PUSH
8434 xmlGenericError(xmlGenericErrorContext,
8435 "PP: Parsing Reference\n");
8436#endif
8437 xmlParseReference(ctxt);
8438 } else {
8439 /* TODO Avoid the extra copy, handle directly !!! */
8440 /*
8441 * Goal of the following test is:
8442 * - minimize calls to the SAX 'character' callback
8443 * when they are mergeable
8444 * - handle an problem for isBlank when we only parse
8445 * a sequence of blank chars and the next one is
8446 * not available to check against '<' presence.
8447 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008448 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008449 * of the parser.
8450 */
8451 if ((ctxt->inputNr == 1) &&
8452 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8453 if ((!terminate) &&
8454 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8455 goto done;
8456 }
8457 ctxt->checkIndex = 0;
8458#ifdef DEBUG_PUSH
8459 xmlGenericError(xmlGenericErrorContext,
8460 "PP: Parsing char data\n");
8461#endif
8462 xmlParseCharData(ctxt, 0);
8463 }
8464 /*
8465 * Pop-up of finished entities.
8466 */
8467 while ((RAW == 0) && (ctxt->inputNr > 1))
8468 xmlPopInput(ctxt);
8469 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8470 (tok == ctxt->token)) {
8471 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8473 ctxt->sax->error(ctxt->userData,
8474 "detected an error in element content\n");
8475 ctxt->wellFormed = 0;
8476 ctxt->disableSAX = 1;
8477 ctxt->instate = XML_PARSER_EOF;
8478 break;
8479 }
8480 break;
8481 }
8482 case XML_PARSER_CDATA_SECTION: {
8483 /*
8484 * The Push mode need to have the SAX callback for
8485 * cdataBlock merge back contiguous callbacks.
8486 */
8487 int base;
8488
8489 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8490 if (base < 0) {
8491 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8492 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8493 if (ctxt->sax->cdataBlock != NULL)
8494 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8495 XML_PARSER_BIG_BUFFER_SIZE);
8496 }
8497 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8498 ctxt->checkIndex = 0;
8499 }
8500 goto done;
8501 } else {
8502 if ((ctxt->sax != NULL) && (base > 0) &&
8503 (!ctxt->disableSAX)) {
8504 if (ctxt->sax->cdataBlock != NULL)
8505 ctxt->sax->cdataBlock(ctxt->userData,
8506 ctxt->input->cur, base);
8507 }
8508 SKIP(base + 3);
8509 ctxt->checkIndex = 0;
8510 ctxt->instate = XML_PARSER_CONTENT;
8511#ifdef DEBUG_PUSH
8512 xmlGenericError(xmlGenericErrorContext,
8513 "PP: entering CONTENT\n");
8514#endif
8515 }
8516 break;
8517 }
8518 case XML_PARSER_END_TAG:
8519 if (avail < 2)
8520 goto done;
8521 if ((!terminate) &&
8522 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8523 goto done;
8524 xmlParseEndTag(ctxt);
8525 if (ctxt->name == NULL) {
8526 ctxt->instate = XML_PARSER_EPILOG;
8527#ifdef DEBUG_PUSH
8528 xmlGenericError(xmlGenericErrorContext,
8529 "PP: entering EPILOG\n");
8530#endif
8531 } else {
8532 ctxt->instate = XML_PARSER_CONTENT;
8533#ifdef DEBUG_PUSH
8534 xmlGenericError(xmlGenericErrorContext,
8535 "PP: entering CONTENT\n");
8536#endif
8537 }
8538 break;
8539 case XML_PARSER_DTD: {
8540 /*
8541 * Sorry but progressive parsing of the internal subset
8542 * is not expected to be supported. We first check that
8543 * the full content of the internal subset is available and
8544 * the parsing is launched only at that point.
8545 * Internal subset ends up with "']' S? '>'" in an unescaped
8546 * section and not in a ']]>' sequence which are conditional
8547 * sections (whoever argued to keep that crap in XML deserve
8548 * a place in hell !).
8549 */
8550 int base, i;
8551 xmlChar *buf;
8552 xmlChar quote = 0;
8553
8554 base = ctxt->input->cur - ctxt->input->base;
8555 if (base < 0) return(0);
8556 if (ctxt->checkIndex > base)
8557 base = ctxt->checkIndex;
8558 buf = ctxt->input->buf->buffer->content;
8559 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8560 base++) {
8561 if (quote != 0) {
8562 if (buf[base] == quote)
8563 quote = 0;
8564 continue;
8565 }
8566 if (buf[base] == '"') {
8567 quote = '"';
8568 continue;
8569 }
8570 if (buf[base] == '\'') {
8571 quote = '\'';
8572 continue;
8573 }
8574 if (buf[base] == ']') {
8575 if ((unsigned int) base +1 >=
8576 ctxt->input->buf->buffer->use)
8577 break;
8578 if (buf[base + 1] == ']') {
8579 /* conditional crap, skip both ']' ! */
8580 base++;
8581 continue;
8582 }
8583 for (i = 0;
8584 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8585 i++) {
8586 if (buf[base + i] == '>')
8587 goto found_end_int_subset;
8588 }
8589 break;
8590 }
8591 }
8592 /*
8593 * We didn't found the end of the Internal subset
8594 */
8595 if (quote == 0)
8596 ctxt->checkIndex = base;
8597#ifdef DEBUG_PUSH
8598 if (next == 0)
8599 xmlGenericError(xmlGenericErrorContext,
8600 "PP: lookup of int subset end filed\n");
8601#endif
8602 goto done;
8603
8604found_end_int_subset:
8605 xmlParseInternalSubset(ctxt);
8606 ctxt->inSubset = 2;
8607 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8608 (ctxt->sax->externalSubset != NULL))
8609 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8610 ctxt->extSubSystem, ctxt->extSubURI);
8611 ctxt->inSubset = 0;
8612 ctxt->instate = XML_PARSER_PROLOG;
8613 ctxt->checkIndex = 0;
8614#ifdef DEBUG_PUSH
8615 xmlGenericError(xmlGenericErrorContext,
8616 "PP: entering PROLOG\n");
8617#endif
8618 break;
8619 }
8620 case XML_PARSER_COMMENT:
8621 xmlGenericError(xmlGenericErrorContext,
8622 "PP: internal error, state == COMMENT\n");
8623 ctxt->instate = XML_PARSER_CONTENT;
8624#ifdef DEBUG_PUSH
8625 xmlGenericError(xmlGenericErrorContext,
8626 "PP: entering CONTENT\n");
8627#endif
8628 break;
8629 case XML_PARSER_PI:
8630 xmlGenericError(xmlGenericErrorContext,
8631 "PP: internal error, state == PI\n");
8632 ctxt->instate = XML_PARSER_CONTENT;
8633#ifdef DEBUG_PUSH
8634 xmlGenericError(xmlGenericErrorContext,
8635 "PP: entering CONTENT\n");
8636#endif
8637 break;
8638 case XML_PARSER_ENTITY_DECL:
8639 xmlGenericError(xmlGenericErrorContext,
8640 "PP: internal error, state == ENTITY_DECL\n");
8641 ctxt->instate = XML_PARSER_DTD;
8642#ifdef DEBUG_PUSH
8643 xmlGenericError(xmlGenericErrorContext,
8644 "PP: entering DTD\n");
8645#endif
8646 break;
8647 case XML_PARSER_ENTITY_VALUE:
8648 xmlGenericError(xmlGenericErrorContext,
8649 "PP: internal error, state == ENTITY_VALUE\n");
8650 ctxt->instate = XML_PARSER_CONTENT;
8651#ifdef DEBUG_PUSH
8652 xmlGenericError(xmlGenericErrorContext,
8653 "PP: entering DTD\n");
8654#endif
8655 break;
8656 case XML_PARSER_ATTRIBUTE_VALUE:
8657 xmlGenericError(xmlGenericErrorContext,
8658 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8659 ctxt->instate = XML_PARSER_START_TAG;
8660#ifdef DEBUG_PUSH
8661 xmlGenericError(xmlGenericErrorContext,
8662 "PP: entering START_TAG\n");
8663#endif
8664 break;
8665 case XML_PARSER_SYSTEM_LITERAL:
8666 xmlGenericError(xmlGenericErrorContext,
8667 "PP: internal error, state == SYSTEM_LITERAL\n");
8668 ctxt->instate = XML_PARSER_START_TAG;
8669#ifdef DEBUG_PUSH
8670 xmlGenericError(xmlGenericErrorContext,
8671 "PP: entering START_TAG\n");
8672#endif
8673 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008674 case XML_PARSER_PUBLIC_LITERAL:
8675 xmlGenericError(xmlGenericErrorContext,
8676 "PP: internal error, state == PUBLIC_LITERAL\n");
8677 ctxt->instate = XML_PARSER_START_TAG;
8678#ifdef DEBUG_PUSH
8679 xmlGenericError(xmlGenericErrorContext,
8680 "PP: entering START_TAG\n");
8681#endif
8682 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008683 }
8684 }
8685done:
8686#ifdef DEBUG_PUSH
8687 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8688#endif
8689 return(ret);
8690}
8691
8692/**
Owen Taylor3473f882001-02-23 17:55:21 +00008693 * xmlParseChunk:
8694 * @ctxt: an XML parser context
8695 * @chunk: an char array
8696 * @size: the size in byte of the chunk
8697 * @terminate: last chunk indicator
8698 *
8699 * Parse a Chunk of memory
8700 *
8701 * Returns zero if no error, the xmlParserErrors otherwise.
8702 */
8703int
8704xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8705 int terminate) {
8706 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8707 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8708 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8709 int cur = ctxt->input->cur - ctxt->input->base;
8710
8711 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8712 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8713 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008714 ctxt->input->end =
8715 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008716#ifdef DEBUG_PUSH
8717 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8718#endif
8719
8720 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8721 xmlParseTryOrFinish(ctxt, terminate);
8722 } else if (ctxt->instate != XML_PARSER_EOF) {
8723 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8724 xmlParserInputBufferPtr in = ctxt->input->buf;
8725 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8726 (in->raw != NULL)) {
8727 int nbchars;
8728
8729 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8730 if (nbchars < 0) {
8731 xmlGenericError(xmlGenericErrorContext,
8732 "xmlParseChunk: encoder error\n");
8733 return(XML_ERR_INVALID_ENCODING);
8734 }
8735 }
8736 }
8737 }
8738 xmlParseTryOrFinish(ctxt, terminate);
8739 if (terminate) {
8740 /*
8741 * Check for termination
8742 */
8743 if ((ctxt->instate != XML_PARSER_EOF) &&
8744 (ctxt->instate != XML_PARSER_EPILOG)) {
8745 ctxt->errNo = XML_ERR_DOCUMENT_END;
8746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8747 ctxt->sax->error(ctxt->userData,
8748 "Extra content at the end of the document\n");
8749 ctxt->wellFormed = 0;
8750 ctxt->disableSAX = 1;
8751 }
8752 if (ctxt->instate != XML_PARSER_EOF) {
8753 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8754 (!ctxt->disableSAX))
8755 ctxt->sax->endDocument(ctxt->userData);
8756 }
8757 ctxt->instate = XML_PARSER_EOF;
8758 }
8759 return((xmlParserErrors) ctxt->errNo);
8760}
8761
8762/************************************************************************
8763 * *
8764 * I/O front end functions to the parser *
8765 * *
8766 ************************************************************************/
8767
8768/**
8769 * xmlStopParser:
8770 * @ctxt: an XML parser context
8771 *
8772 * Blocks further parser processing
8773 */
8774void
8775xmlStopParser(xmlParserCtxtPtr ctxt) {
8776 ctxt->instate = XML_PARSER_EOF;
8777 if (ctxt->input != NULL)
8778 ctxt->input->cur = BAD_CAST"";
8779}
8780
8781/**
8782 * xmlCreatePushParserCtxt:
8783 * @sax: a SAX handler
8784 * @user_data: The user data returned on SAX callbacks
8785 * @chunk: a pointer to an array of chars
8786 * @size: number of chars in the array
8787 * @filename: an optional file name or URI
8788 *
8789 * Create a parser context for using the XML parser in push mode
8790 * To allow content encoding detection, @size should be >= 4
8791 * The value of @filename is used for fetching external entities
8792 * and error/warning reports.
8793 *
8794 * Returns the new parser context or NULL
8795 */
8796xmlParserCtxtPtr
8797xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8798 const char *chunk, int size, const char *filename) {
8799 xmlParserCtxtPtr ctxt;
8800 xmlParserInputPtr inputStream;
8801 xmlParserInputBufferPtr buf;
8802 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8803
8804 /*
8805 * plug some encoding conversion routines
8806 */
8807 if ((chunk != NULL) && (size >= 4))
8808 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8809
8810 buf = xmlAllocParserInputBuffer(enc);
8811 if (buf == NULL) return(NULL);
8812
8813 ctxt = xmlNewParserCtxt();
8814 if (ctxt == NULL) {
8815 xmlFree(buf);
8816 return(NULL);
8817 }
8818 if (sax != NULL) {
8819 if (ctxt->sax != &xmlDefaultSAXHandler)
8820 xmlFree(ctxt->sax);
8821 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8822 if (ctxt->sax == NULL) {
8823 xmlFree(buf);
8824 xmlFree(ctxt);
8825 return(NULL);
8826 }
8827 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8828 if (user_data != NULL)
8829 ctxt->userData = user_data;
8830 }
8831 if (filename == NULL) {
8832 ctxt->directory = NULL;
8833 } else {
8834 ctxt->directory = xmlParserGetDirectory(filename);
8835 }
8836
8837 inputStream = xmlNewInputStream(ctxt);
8838 if (inputStream == NULL) {
8839 xmlFreeParserCtxt(ctxt);
8840 return(NULL);
8841 }
8842
8843 if (filename == NULL)
8844 inputStream->filename = NULL;
8845 else
8846 inputStream->filename = xmlMemStrdup(filename);
8847 inputStream->buf = buf;
8848 inputStream->base = inputStream->buf->buffer->content;
8849 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008850 inputStream->end =
8851 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008852
8853 inputPush(ctxt, inputStream);
8854
8855 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8856 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008857 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8858 int cur = ctxt->input->cur - ctxt->input->base;
8859
Owen Taylor3473f882001-02-23 17:55:21 +00008860 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008861
8862 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8863 ctxt->input->cur = ctxt->input->base + cur;
8864 ctxt->input->end =
8865 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008866#ifdef DEBUG_PUSH
8867 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8868#endif
8869 }
8870
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008871 if (enc != XML_CHAR_ENCODING_NONE) {
8872 xmlSwitchEncoding(ctxt, enc);
8873 }
8874
Owen Taylor3473f882001-02-23 17:55:21 +00008875 return(ctxt);
8876}
8877
8878/**
8879 * xmlCreateIOParserCtxt:
8880 * @sax: a SAX handler
8881 * @user_data: The user data returned on SAX callbacks
8882 * @ioread: an I/O read function
8883 * @ioclose: an I/O close function
8884 * @ioctx: an I/O handler
8885 * @enc: the charset encoding if known
8886 *
8887 * Create a parser context for using the XML parser with an existing
8888 * I/O stream
8889 *
8890 * Returns the new parser context or NULL
8891 */
8892xmlParserCtxtPtr
8893xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8894 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8895 void *ioctx, xmlCharEncoding enc) {
8896 xmlParserCtxtPtr ctxt;
8897 xmlParserInputPtr inputStream;
8898 xmlParserInputBufferPtr buf;
8899
8900 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8901 if (buf == NULL) return(NULL);
8902
8903 ctxt = xmlNewParserCtxt();
8904 if (ctxt == NULL) {
8905 xmlFree(buf);
8906 return(NULL);
8907 }
8908 if (sax != NULL) {
8909 if (ctxt->sax != &xmlDefaultSAXHandler)
8910 xmlFree(ctxt->sax);
8911 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8912 if (ctxt->sax == NULL) {
8913 xmlFree(buf);
8914 xmlFree(ctxt);
8915 return(NULL);
8916 }
8917 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8918 if (user_data != NULL)
8919 ctxt->userData = user_data;
8920 }
8921
8922 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8923 if (inputStream == NULL) {
8924 xmlFreeParserCtxt(ctxt);
8925 return(NULL);
8926 }
8927 inputPush(ctxt, inputStream);
8928
8929 return(ctxt);
8930}
8931
8932/************************************************************************
8933 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008934 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00008935 * *
8936 ************************************************************************/
8937
8938/**
8939 * xmlIOParseDTD:
8940 * @sax: the SAX handler block or NULL
8941 * @input: an Input Buffer
8942 * @enc: the charset encoding if known
8943 *
8944 * Load and parse a DTD
8945 *
8946 * Returns the resulting xmlDtdPtr or NULL in case of error.
8947 * @input will be freed at parsing end.
8948 */
8949
8950xmlDtdPtr
8951xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8952 xmlCharEncoding enc) {
8953 xmlDtdPtr ret = NULL;
8954 xmlParserCtxtPtr ctxt;
8955 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008956 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008957
8958 if (input == NULL)
8959 return(NULL);
8960
8961 ctxt = xmlNewParserCtxt();
8962 if (ctxt == NULL) {
8963 return(NULL);
8964 }
8965
8966 /*
8967 * Set-up the SAX context
8968 */
8969 if (sax != NULL) {
8970 if (ctxt->sax != NULL)
8971 xmlFree(ctxt->sax);
8972 ctxt->sax = sax;
8973 ctxt->userData = NULL;
8974 }
8975
8976 /*
8977 * generate a parser input from the I/O handler
8978 */
8979
8980 pinput = xmlNewIOInputStream(ctxt, input, enc);
8981 if (pinput == NULL) {
8982 if (sax != NULL) ctxt->sax = NULL;
8983 xmlFreeParserCtxt(ctxt);
8984 return(NULL);
8985 }
8986
8987 /*
8988 * plug some encoding conversion routines here.
8989 */
8990 xmlPushInput(ctxt, pinput);
8991
8992 pinput->filename = NULL;
8993 pinput->line = 1;
8994 pinput->col = 1;
8995 pinput->base = ctxt->input->cur;
8996 pinput->cur = ctxt->input->cur;
8997 pinput->free = NULL;
8998
8999 /*
9000 * let's parse that entity knowing it's an external subset.
9001 */
9002 ctxt->inSubset = 2;
9003 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9004 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9005 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009006
9007 if (enc == XML_CHAR_ENCODING_NONE) {
9008 /*
9009 * Get the 4 first bytes and decode the charset
9010 * if enc != XML_CHAR_ENCODING_NONE
9011 * plug some encoding conversion routines.
9012 */
9013 start[0] = RAW;
9014 start[1] = NXT(1);
9015 start[2] = NXT(2);
9016 start[3] = NXT(3);
9017 enc = xmlDetectCharEncoding(start, 4);
9018 if (enc != XML_CHAR_ENCODING_NONE) {
9019 xmlSwitchEncoding(ctxt, enc);
9020 }
9021 }
9022
Owen Taylor3473f882001-02-23 17:55:21 +00009023 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9024
9025 if (ctxt->myDoc != NULL) {
9026 if (ctxt->wellFormed) {
9027 ret = ctxt->myDoc->extSubset;
9028 ctxt->myDoc->extSubset = NULL;
9029 } else {
9030 ret = NULL;
9031 }
9032 xmlFreeDoc(ctxt->myDoc);
9033 ctxt->myDoc = NULL;
9034 }
9035 if (sax != NULL) ctxt->sax = NULL;
9036 xmlFreeParserCtxt(ctxt);
9037
9038 return(ret);
9039}
9040
9041/**
9042 * xmlSAXParseDTD:
9043 * @sax: the SAX handler block
9044 * @ExternalID: a NAME* containing the External ID of the DTD
9045 * @SystemID: a NAME* containing the URL to the DTD
9046 *
9047 * Load and parse an external subset.
9048 *
9049 * Returns the resulting xmlDtdPtr or NULL in case of error.
9050 */
9051
9052xmlDtdPtr
9053xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9054 const xmlChar *SystemID) {
9055 xmlDtdPtr ret = NULL;
9056 xmlParserCtxtPtr ctxt;
9057 xmlParserInputPtr input = NULL;
9058 xmlCharEncoding enc;
9059
9060 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9061
9062 ctxt = xmlNewParserCtxt();
9063 if (ctxt == NULL) {
9064 return(NULL);
9065 }
9066
9067 /*
9068 * Set-up the SAX context
9069 */
9070 if (sax != NULL) {
9071 if (ctxt->sax != NULL)
9072 xmlFree(ctxt->sax);
9073 ctxt->sax = sax;
9074 ctxt->userData = NULL;
9075 }
9076
9077 /*
9078 * Ask the Entity resolver to load the damn thing
9079 */
9080
9081 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9082 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9083 if (input == NULL) {
9084 if (sax != NULL) ctxt->sax = NULL;
9085 xmlFreeParserCtxt(ctxt);
9086 return(NULL);
9087 }
9088
9089 /*
9090 * plug some encoding conversion routines here.
9091 */
9092 xmlPushInput(ctxt, input);
9093 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9094 xmlSwitchEncoding(ctxt, enc);
9095
9096 if (input->filename == NULL)
9097 input->filename = (char *) xmlStrdup(SystemID);
9098 input->line = 1;
9099 input->col = 1;
9100 input->base = ctxt->input->cur;
9101 input->cur = ctxt->input->cur;
9102 input->free = NULL;
9103
9104 /*
9105 * let's parse that entity knowing it's an external subset.
9106 */
9107 ctxt->inSubset = 2;
9108 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9109 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9110 ExternalID, SystemID);
9111 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9112
9113 if (ctxt->myDoc != NULL) {
9114 if (ctxt->wellFormed) {
9115 ret = ctxt->myDoc->extSubset;
9116 ctxt->myDoc->extSubset = NULL;
9117 } else {
9118 ret = NULL;
9119 }
9120 xmlFreeDoc(ctxt->myDoc);
9121 ctxt->myDoc = NULL;
9122 }
9123 if (sax != NULL) ctxt->sax = NULL;
9124 xmlFreeParserCtxt(ctxt);
9125
9126 return(ret);
9127}
9128
9129/**
9130 * xmlParseDTD:
9131 * @ExternalID: a NAME* containing the External ID of the DTD
9132 * @SystemID: a NAME* containing the URL to the DTD
9133 *
9134 * Load and parse an external subset.
9135 *
9136 * Returns the resulting xmlDtdPtr or NULL in case of error.
9137 */
9138
9139xmlDtdPtr
9140xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9141 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9142}
9143
9144/************************************************************************
9145 * *
9146 * Front ends when parsing an Entity *
9147 * *
9148 ************************************************************************/
9149
9150/**
Owen Taylor3473f882001-02-23 17:55:21 +00009151 * xmlParseCtxtExternalEntity:
9152 * @ctx: the existing parsing context
9153 * @URL: the URL for the entity to load
9154 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009155 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009156 *
9157 * Parse an external general entity within an existing parsing context
9158 * An external general parsed entity is well-formed if it matches the
9159 * production labeled extParsedEnt.
9160 *
9161 * [78] extParsedEnt ::= TextDecl? content
9162 *
9163 * Returns 0 if the entity is well formed, -1 in case of args problem and
9164 * the parser error code otherwise
9165 */
9166
9167int
9168xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009169 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009170 xmlParserCtxtPtr ctxt;
9171 xmlDocPtr newDoc;
9172 xmlSAXHandlerPtr oldsax = NULL;
9173 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009174 xmlChar start[4];
9175 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009176
9177 if (ctx->depth > 40) {
9178 return(XML_ERR_ENTITY_LOOP);
9179 }
9180
Daniel Veillardcda96922001-08-21 10:56:31 +00009181 if (lst != NULL)
9182 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009183 if ((URL == NULL) && (ID == NULL))
9184 return(-1);
9185 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9186 return(-1);
9187
9188
9189 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9190 if (ctxt == NULL) return(-1);
9191 ctxt->userData = ctxt;
9192 oldsax = ctxt->sax;
9193 ctxt->sax = ctx->sax;
9194 newDoc = xmlNewDoc(BAD_CAST "1.0");
9195 if (newDoc == NULL) {
9196 xmlFreeParserCtxt(ctxt);
9197 return(-1);
9198 }
9199 if (ctx->myDoc != NULL) {
9200 newDoc->intSubset = ctx->myDoc->intSubset;
9201 newDoc->extSubset = ctx->myDoc->extSubset;
9202 }
9203 if (ctx->myDoc->URL != NULL) {
9204 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9205 }
9206 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9207 if (newDoc->children == NULL) {
9208 ctxt->sax = oldsax;
9209 xmlFreeParserCtxt(ctxt);
9210 newDoc->intSubset = NULL;
9211 newDoc->extSubset = NULL;
9212 xmlFreeDoc(newDoc);
9213 return(-1);
9214 }
9215 nodePush(ctxt, newDoc->children);
9216 if (ctx->myDoc == NULL) {
9217 ctxt->myDoc = newDoc;
9218 } else {
9219 ctxt->myDoc = ctx->myDoc;
9220 newDoc->children->doc = ctx->myDoc;
9221 }
9222
Daniel Veillard87a764e2001-06-20 17:41:10 +00009223 /*
9224 * Get the 4 first bytes and decode the charset
9225 * if enc != XML_CHAR_ENCODING_NONE
9226 * plug some encoding conversion routines.
9227 */
9228 GROW
9229 start[0] = RAW;
9230 start[1] = NXT(1);
9231 start[2] = NXT(2);
9232 start[3] = NXT(3);
9233 enc = xmlDetectCharEncoding(start, 4);
9234 if (enc != XML_CHAR_ENCODING_NONE) {
9235 xmlSwitchEncoding(ctxt, enc);
9236 }
9237
Owen Taylor3473f882001-02-23 17:55:21 +00009238 /*
9239 * Parse a possible text declaration first
9240 */
Owen Taylor3473f882001-02-23 17:55:21 +00009241 if ((RAW == '<') && (NXT(1) == '?') &&
9242 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9243 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9244 xmlParseTextDecl(ctxt);
9245 }
9246
9247 /*
9248 * Doing validity checking on chunk doesn't make sense
9249 */
9250 ctxt->instate = XML_PARSER_CONTENT;
9251 ctxt->validate = ctx->validate;
9252 ctxt->loadsubset = ctx->loadsubset;
9253 ctxt->depth = ctx->depth + 1;
9254 ctxt->replaceEntities = ctx->replaceEntities;
9255 if (ctxt->validate) {
9256 ctxt->vctxt.error = ctx->vctxt.error;
9257 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009258 } else {
9259 ctxt->vctxt.error = NULL;
9260 ctxt->vctxt.warning = NULL;
9261 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009262 ctxt->vctxt.nodeTab = NULL;
9263 ctxt->vctxt.nodeNr = 0;
9264 ctxt->vctxt.nodeMax = 0;
9265 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009266
9267 xmlParseContent(ctxt);
9268
9269 if ((RAW == '<') && (NXT(1) == '/')) {
9270 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9272 ctxt->sax->error(ctxt->userData,
9273 "chunk is not well balanced\n");
9274 ctxt->wellFormed = 0;
9275 ctxt->disableSAX = 1;
9276 } else if (RAW != 0) {
9277 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9279 ctxt->sax->error(ctxt->userData,
9280 "extra content at the end of well balanced chunk\n");
9281 ctxt->wellFormed = 0;
9282 ctxt->disableSAX = 1;
9283 }
9284 if (ctxt->node != newDoc->children) {
9285 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9287 ctxt->sax->error(ctxt->userData,
9288 "chunk is not well balanced\n");
9289 ctxt->wellFormed = 0;
9290 ctxt->disableSAX = 1;
9291 }
9292
9293 if (!ctxt->wellFormed) {
9294 if (ctxt->errNo == 0)
9295 ret = 1;
9296 else
9297 ret = ctxt->errNo;
9298 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009299 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009300 xmlNodePtr cur;
9301
9302 /*
9303 * Return the newly created nodeset after unlinking it from
9304 * they pseudo parent.
9305 */
9306 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009307 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009308 while (cur != NULL) {
9309 cur->parent = NULL;
9310 cur = cur->next;
9311 }
9312 newDoc->children->children = NULL;
9313 }
9314 ret = 0;
9315 }
9316 ctxt->sax = oldsax;
9317 xmlFreeParserCtxt(ctxt);
9318 newDoc->intSubset = NULL;
9319 newDoc->extSubset = NULL;
9320 xmlFreeDoc(newDoc);
9321
9322 return(ret);
9323}
9324
9325/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009326 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009327 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009328 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009329 * @sax: the SAX handler bloc (possibly NULL)
9330 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9331 * @depth: Used for loop detection, use 0
9332 * @URL: the URL for the entity to load
9333 * @ID: the System ID for the entity to load
9334 * @list: the return value for the set of parsed nodes
9335 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009336 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009337 *
9338 * Returns 0 if the entity is well formed, -1 in case of args problem and
9339 * the parser error code otherwise
9340 */
9341
Daniel Veillard257d9102001-05-08 10:41:44 +00009342static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009343xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9344 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009345 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009346 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009347 xmlParserCtxtPtr ctxt;
9348 xmlDocPtr newDoc;
9349 xmlSAXHandlerPtr oldsax = NULL;
9350 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009351 xmlChar start[4];
9352 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009353
9354 if (depth > 40) {
9355 return(XML_ERR_ENTITY_LOOP);
9356 }
9357
9358
9359
9360 if (list != NULL)
9361 *list = NULL;
9362 if ((URL == NULL) && (ID == NULL))
9363 return(-1);
9364 if (doc == NULL) /* @@ relax but check for dereferences */
9365 return(-1);
9366
9367
9368 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9369 if (ctxt == NULL) return(-1);
9370 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009371 if (oldctxt != NULL) {
9372 ctxt->_private = oldctxt->_private;
9373 ctxt->loadsubset = oldctxt->loadsubset;
9374 ctxt->validate = oldctxt->validate;
9375 ctxt->external = oldctxt->external;
9376 } else {
9377 /*
9378 * Doing validity checking on chunk without context
9379 * doesn't make sense
9380 */
9381 ctxt->_private = NULL;
9382 ctxt->validate = 0;
9383 ctxt->external = 2;
9384 ctxt->loadsubset = 0;
9385 }
Owen Taylor3473f882001-02-23 17:55:21 +00009386 if (sax != NULL) {
9387 oldsax = ctxt->sax;
9388 ctxt->sax = sax;
9389 if (user_data != NULL)
9390 ctxt->userData = user_data;
9391 }
9392 newDoc = xmlNewDoc(BAD_CAST "1.0");
9393 if (newDoc == NULL) {
9394 xmlFreeParserCtxt(ctxt);
9395 return(-1);
9396 }
9397 if (doc != NULL) {
9398 newDoc->intSubset = doc->intSubset;
9399 newDoc->extSubset = doc->extSubset;
9400 }
9401 if (doc->URL != NULL) {
9402 newDoc->URL = xmlStrdup(doc->URL);
9403 }
9404 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9405 if (newDoc->children == NULL) {
9406 if (sax != NULL)
9407 ctxt->sax = oldsax;
9408 xmlFreeParserCtxt(ctxt);
9409 newDoc->intSubset = NULL;
9410 newDoc->extSubset = NULL;
9411 xmlFreeDoc(newDoc);
9412 return(-1);
9413 }
9414 nodePush(ctxt, newDoc->children);
9415 if (doc == NULL) {
9416 ctxt->myDoc = newDoc;
9417 } else {
9418 ctxt->myDoc = doc;
9419 newDoc->children->doc = doc;
9420 }
9421
Daniel Veillard87a764e2001-06-20 17:41:10 +00009422 /*
9423 * Get the 4 first bytes and decode the charset
9424 * if enc != XML_CHAR_ENCODING_NONE
9425 * plug some encoding conversion routines.
9426 */
9427 GROW;
9428 start[0] = RAW;
9429 start[1] = NXT(1);
9430 start[2] = NXT(2);
9431 start[3] = NXT(3);
9432 enc = xmlDetectCharEncoding(start, 4);
9433 if (enc != XML_CHAR_ENCODING_NONE) {
9434 xmlSwitchEncoding(ctxt, enc);
9435 }
9436
Owen Taylor3473f882001-02-23 17:55:21 +00009437 /*
9438 * Parse a possible text declaration first
9439 */
Owen Taylor3473f882001-02-23 17:55:21 +00009440 if ((RAW == '<') && (NXT(1) == '?') &&
9441 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9442 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9443 xmlParseTextDecl(ctxt);
9444 }
9445
Owen Taylor3473f882001-02-23 17:55:21 +00009446 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009447 ctxt->depth = depth;
9448
9449 xmlParseContent(ctxt);
9450
9451 if ((RAW == '<') && (NXT(1) == '/')) {
9452 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9454 ctxt->sax->error(ctxt->userData,
9455 "chunk is not well balanced\n");
9456 ctxt->wellFormed = 0;
9457 ctxt->disableSAX = 1;
9458 } else if (RAW != 0) {
9459 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9461 ctxt->sax->error(ctxt->userData,
9462 "extra content at the end of well balanced chunk\n");
9463 ctxt->wellFormed = 0;
9464 ctxt->disableSAX = 1;
9465 }
9466 if (ctxt->node != newDoc->children) {
9467 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9469 ctxt->sax->error(ctxt->userData,
9470 "chunk is not well balanced\n");
9471 ctxt->wellFormed = 0;
9472 ctxt->disableSAX = 1;
9473 }
9474
9475 if (!ctxt->wellFormed) {
9476 if (ctxt->errNo == 0)
9477 ret = 1;
9478 else
9479 ret = ctxt->errNo;
9480 } else {
9481 if (list != NULL) {
9482 xmlNodePtr cur;
9483
9484 /*
9485 * Return the newly created nodeset after unlinking it from
9486 * they pseudo parent.
9487 */
9488 cur = newDoc->children->children;
9489 *list = cur;
9490 while (cur != NULL) {
9491 cur->parent = NULL;
9492 cur = cur->next;
9493 }
9494 newDoc->children->children = NULL;
9495 }
9496 ret = 0;
9497 }
9498 if (sax != NULL)
9499 ctxt->sax = oldsax;
9500 xmlFreeParserCtxt(ctxt);
9501 newDoc->intSubset = NULL;
9502 newDoc->extSubset = NULL;
9503 xmlFreeDoc(newDoc);
9504
9505 return(ret);
9506}
9507
9508/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009509 * xmlParseExternalEntity:
9510 * @doc: the document the chunk pertains to
9511 * @sax: the SAX handler bloc (possibly NULL)
9512 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9513 * @depth: Used for loop detection, use 0
9514 * @URL: the URL for the entity to load
9515 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009516 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009517 *
9518 * Parse an external general entity
9519 * An external general parsed entity is well-formed if it matches the
9520 * production labeled extParsedEnt.
9521 *
9522 * [78] extParsedEnt ::= TextDecl? content
9523 *
9524 * Returns 0 if the entity is well formed, -1 in case of args problem and
9525 * the parser error code otherwise
9526 */
9527
9528int
9529xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009530 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009531 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009532 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009533}
9534
9535/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009536 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009537 * @doc: the document the chunk pertains to
9538 * @sax: the SAX handler bloc (possibly NULL)
9539 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9540 * @depth: Used for loop detection, use 0
9541 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009542 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009543 *
9544 * Parse a well-balanced chunk of an XML document
9545 * called by the parser
9546 * The allowed sequence for the Well Balanced Chunk is the one defined by
9547 * the content production in the XML grammar:
9548 *
9549 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9550 *
9551 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9552 * the parser error code otherwise
9553 */
9554
9555int
9556xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009557 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009558 xmlParserCtxtPtr ctxt;
9559 xmlDocPtr newDoc;
9560 xmlSAXHandlerPtr oldsax = NULL;
9561 int size;
9562 int ret = 0;
9563
9564 if (depth > 40) {
9565 return(XML_ERR_ENTITY_LOOP);
9566 }
9567
9568
Daniel Veillardcda96922001-08-21 10:56:31 +00009569 if (lst != NULL)
9570 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009571 if (string == NULL)
9572 return(-1);
9573
9574 size = xmlStrlen(string);
9575
9576 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9577 if (ctxt == NULL) return(-1);
9578 ctxt->userData = ctxt;
9579 if (sax != NULL) {
9580 oldsax = ctxt->sax;
9581 ctxt->sax = sax;
9582 if (user_data != NULL)
9583 ctxt->userData = user_data;
9584 }
9585 newDoc = xmlNewDoc(BAD_CAST "1.0");
9586 if (newDoc == NULL) {
9587 xmlFreeParserCtxt(ctxt);
9588 return(-1);
9589 }
9590 if (doc != NULL) {
9591 newDoc->intSubset = doc->intSubset;
9592 newDoc->extSubset = doc->extSubset;
9593 }
9594 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9595 if (newDoc->children == NULL) {
9596 if (sax != NULL)
9597 ctxt->sax = oldsax;
9598 xmlFreeParserCtxt(ctxt);
9599 newDoc->intSubset = NULL;
9600 newDoc->extSubset = NULL;
9601 xmlFreeDoc(newDoc);
9602 return(-1);
9603 }
9604 nodePush(ctxt, newDoc->children);
9605 if (doc == NULL) {
9606 ctxt->myDoc = newDoc;
9607 } else {
9608 ctxt->myDoc = doc;
9609 newDoc->children->doc = doc;
9610 }
9611 ctxt->instate = XML_PARSER_CONTENT;
9612 ctxt->depth = depth;
9613
9614 /*
9615 * Doing validity checking on chunk doesn't make sense
9616 */
9617 ctxt->validate = 0;
9618 ctxt->loadsubset = 0;
9619
9620 xmlParseContent(ctxt);
9621
9622 if ((RAW == '<') && (NXT(1) == '/')) {
9623 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9625 ctxt->sax->error(ctxt->userData,
9626 "chunk is not well balanced\n");
9627 ctxt->wellFormed = 0;
9628 ctxt->disableSAX = 1;
9629 } else if (RAW != 0) {
9630 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9632 ctxt->sax->error(ctxt->userData,
9633 "extra content at the end of well balanced chunk\n");
9634 ctxt->wellFormed = 0;
9635 ctxt->disableSAX = 1;
9636 }
9637 if (ctxt->node != newDoc->children) {
9638 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9640 ctxt->sax->error(ctxt->userData,
9641 "chunk is not well balanced\n");
9642 ctxt->wellFormed = 0;
9643 ctxt->disableSAX = 1;
9644 }
9645
9646 if (!ctxt->wellFormed) {
9647 if (ctxt->errNo == 0)
9648 ret = 1;
9649 else
9650 ret = ctxt->errNo;
9651 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009652 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009653 xmlNodePtr cur;
9654
9655 /*
9656 * Return the newly created nodeset after unlinking it from
9657 * they pseudo parent.
9658 */
9659 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009660 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009661 while (cur != NULL) {
9662 cur->parent = NULL;
9663 cur = cur->next;
9664 }
9665 newDoc->children->children = NULL;
9666 }
9667 ret = 0;
9668 }
9669 if (sax != NULL)
9670 ctxt->sax = oldsax;
9671 xmlFreeParserCtxt(ctxt);
9672 newDoc->intSubset = NULL;
9673 newDoc->extSubset = NULL;
9674 xmlFreeDoc(newDoc);
9675
9676 return(ret);
9677}
9678
9679/**
9680 * xmlSAXParseEntity:
9681 * @sax: the SAX handler block
9682 * @filename: the filename
9683 *
9684 * parse an XML external entity out of context and build a tree.
9685 * It use the given SAX function block to handle the parsing callback.
9686 * If sax is NULL, fallback to the default DOM tree building routines.
9687 *
9688 * [78] extParsedEnt ::= TextDecl? content
9689 *
9690 * This correspond to a "Well Balanced" chunk
9691 *
9692 * Returns the resulting document tree
9693 */
9694
9695xmlDocPtr
9696xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9697 xmlDocPtr ret;
9698 xmlParserCtxtPtr ctxt;
9699 char *directory = NULL;
9700
9701 ctxt = xmlCreateFileParserCtxt(filename);
9702 if (ctxt == NULL) {
9703 return(NULL);
9704 }
9705 if (sax != NULL) {
9706 if (ctxt->sax != NULL)
9707 xmlFree(ctxt->sax);
9708 ctxt->sax = sax;
9709 ctxt->userData = NULL;
9710 }
9711
9712 if ((ctxt->directory == NULL) && (directory == NULL))
9713 directory = xmlParserGetDirectory(filename);
9714
9715 xmlParseExtParsedEnt(ctxt);
9716
9717 if (ctxt->wellFormed)
9718 ret = ctxt->myDoc;
9719 else {
9720 ret = NULL;
9721 xmlFreeDoc(ctxt->myDoc);
9722 ctxt->myDoc = NULL;
9723 }
9724 if (sax != NULL)
9725 ctxt->sax = NULL;
9726 xmlFreeParserCtxt(ctxt);
9727
9728 return(ret);
9729}
9730
9731/**
9732 * xmlParseEntity:
9733 * @filename: the filename
9734 *
9735 * parse an XML external entity out of context and build a tree.
9736 *
9737 * [78] extParsedEnt ::= TextDecl? content
9738 *
9739 * This correspond to a "Well Balanced" chunk
9740 *
9741 * Returns the resulting document tree
9742 */
9743
9744xmlDocPtr
9745xmlParseEntity(const char *filename) {
9746 return(xmlSAXParseEntity(NULL, filename));
9747}
9748
9749/**
9750 * xmlCreateEntityParserCtxt:
9751 * @URL: the entity URL
9752 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009753 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009754 *
9755 * Create a parser context for an external entity
9756 * Automatic support for ZLIB/Compress compressed document is provided
9757 * by default if found at compile-time.
9758 *
9759 * Returns the new parser context or NULL
9760 */
9761xmlParserCtxtPtr
9762xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9763 const xmlChar *base) {
9764 xmlParserCtxtPtr ctxt;
9765 xmlParserInputPtr inputStream;
9766 char *directory = NULL;
9767 xmlChar *uri;
9768
9769 ctxt = xmlNewParserCtxt();
9770 if (ctxt == NULL) {
9771 return(NULL);
9772 }
9773
9774 uri = xmlBuildURI(URL, base);
9775
9776 if (uri == NULL) {
9777 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9778 if (inputStream == NULL) {
9779 xmlFreeParserCtxt(ctxt);
9780 return(NULL);
9781 }
9782
9783 inputPush(ctxt, inputStream);
9784
9785 if ((ctxt->directory == NULL) && (directory == NULL))
9786 directory = xmlParserGetDirectory((char *)URL);
9787 if ((ctxt->directory == NULL) && (directory != NULL))
9788 ctxt->directory = directory;
9789 } else {
9790 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9791 if (inputStream == NULL) {
9792 xmlFree(uri);
9793 xmlFreeParserCtxt(ctxt);
9794 return(NULL);
9795 }
9796
9797 inputPush(ctxt, inputStream);
9798
9799 if ((ctxt->directory == NULL) && (directory == NULL))
9800 directory = xmlParserGetDirectory((char *)uri);
9801 if ((ctxt->directory == NULL) && (directory != NULL))
9802 ctxt->directory = directory;
9803 xmlFree(uri);
9804 }
9805
9806 return(ctxt);
9807}
9808
9809/************************************************************************
9810 * *
9811 * Front ends when parsing from a file *
9812 * *
9813 ************************************************************************/
9814
9815/**
9816 * xmlCreateFileParserCtxt:
9817 * @filename: the filename
9818 *
9819 * Create a parser context for a file content.
9820 * Automatic support for ZLIB/Compress compressed document is provided
9821 * by default if found at compile-time.
9822 *
9823 * Returns the new parser context or NULL
9824 */
9825xmlParserCtxtPtr
9826xmlCreateFileParserCtxt(const char *filename)
9827{
9828 xmlParserCtxtPtr ctxt;
9829 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009830 char *directory = NULL;
9831
Owen Taylor3473f882001-02-23 17:55:21 +00009832 ctxt = xmlNewParserCtxt();
9833 if (ctxt == NULL) {
9834 if (xmlDefaultSAXHandler.error != NULL) {
9835 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9836 }
9837 return(NULL);
9838 }
9839
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009840 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009841 if (inputStream == NULL) {
9842 xmlFreeParserCtxt(ctxt);
9843 return(NULL);
9844 }
9845
Owen Taylor3473f882001-02-23 17:55:21 +00009846 inputPush(ctxt, inputStream);
9847 if ((ctxt->directory == NULL) && (directory == NULL))
9848 directory = xmlParserGetDirectory(filename);
9849 if ((ctxt->directory == NULL) && (directory != NULL))
9850 ctxt->directory = directory;
9851
9852 return(ctxt);
9853}
9854
9855/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009856 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009857 * @sax: the SAX handler block
9858 * @filename: the filename
9859 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9860 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009861 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009862 *
9863 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9864 * compressed document is provided by default if found at compile-time.
9865 * It use the given SAX function block to handle the parsing callback.
9866 * If sax is NULL, fallback to the default DOM tree building routines.
9867 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009868 * User data (void *) is stored within the parser context, so it is
9869 * available nearly everywhere in libxml.
9870 *
Owen Taylor3473f882001-02-23 17:55:21 +00009871 * Returns the resulting document tree
9872 */
9873
9874xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009875xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9876 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009877 xmlDocPtr ret;
9878 xmlParserCtxtPtr ctxt;
9879 char *directory = NULL;
9880
Daniel Veillard635ef722001-10-29 11:48:19 +00009881 xmlInitParser();
9882
Owen Taylor3473f882001-02-23 17:55:21 +00009883 ctxt = xmlCreateFileParserCtxt(filename);
9884 if (ctxt == NULL) {
9885 return(NULL);
9886 }
9887 if (sax != NULL) {
9888 if (ctxt->sax != NULL)
9889 xmlFree(ctxt->sax);
9890 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009891 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009892 if (data!=NULL) {
9893 ctxt->_private=data;
9894 }
Owen Taylor3473f882001-02-23 17:55:21 +00009895
9896 if ((ctxt->directory == NULL) && (directory == NULL))
9897 directory = xmlParserGetDirectory(filename);
9898 if ((ctxt->directory == NULL) && (directory != NULL))
9899 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9900
9901 xmlParseDocument(ctxt);
9902
9903 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9904 else {
9905 ret = NULL;
9906 xmlFreeDoc(ctxt->myDoc);
9907 ctxt->myDoc = NULL;
9908 }
9909 if (sax != NULL)
9910 ctxt->sax = NULL;
9911 xmlFreeParserCtxt(ctxt);
9912
9913 return(ret);
9914}
9915
9916/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009917 * xmlSAXParseFile:
9918 * @sax: the SAX handler block
9919 * @filename: the filename
9920 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9921 * documents
9922 *
9923 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9924 * compressed document is provided by default if found at compile-time.
9925 * It use the given SAX function block to handle the parsing callback.
9926 * If sax is NULL, fallback to the default DOM tree building routines.
9927 *
9928 * Returns the resulting document tree
9929 */
9930
9931xmlDocPtr
9932xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9933 int recovery) {
9934 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
9935}
9936
9937/**
Owen Taylor3473f882001-02-23 17:55:21 +00009938 * xmlRecoverDoc:
9939 * @cur: a pointer to an array of xmlChar
9940 *
9941 * parse an XML in-memory document and build a tree.
9942 * In the case the document is not Well Formed, a tree is built anyway
9943 *
9944 * Returns the resulting document tree
9945 */
9946
9947xmlDocPtr
9948xmlRecoverDoc(xmlChar *cur) {
9949 return(xmlSAXParseDoc(NULL, cur, 1));
9950}
9951
9952/**
9953 * xmlParseFile:
9954 * @filename: the filename
9955 *
9956 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9957 * compressed document is provided by default if found at compile-time.
9958 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009959 * Returns the resulting document tree if the file was wellformed,
9960 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009961 */
9962
9963xmlDocPtr
9964xmlParseFile(const char *filename) {
9965 return(xmlSAXParseFile(NULL, filename, 0));
9966}
9967
9968/**
9969 * xmlRecoverFile:
9970 * @filename: the filename
9971 *
9972 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9973 * compressed document is provided by default if found at compile-time.
9974 * In the case the document is not Well Formed, a tree is built anyway
9975 *
9976 * Returns the resulting document tree
9977 */
9978
9979xmlDocPtr
9980xmlRecoverFile(const char *filename) {
9981 return(xmlSAXParseFile(NULL, filename, 1));
9982}
9983
9984
9985/**
9986 * xmlSetupParserForBuffer:
9987 * @ctxt: an XML parser context
9988 * @buffer: a xmlChar * buffer
9989 * @filename: a file name
9990 *
9991 * Setup the parser context to parse a new buffer; Clears any prior
9992 * contents from the parser context. The buffer parameter must not be
9993 * NULL, but the filename parameter can be
9994 */
9995void
9996xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9997 const char* filename)
9998{
9999 xmlParserInputPtr input;
10000
10001 input = xmlNewInputStream(ctxt);
10002 if (input == NULL) {
10003 perror("malloc");
10004 xmlFree(ctxt);
10005 return;
10006 }
10007
10008 xmlClearParserCtxt(ctxt);
10009 if (filename != NULL)
10010 input->filename = xmlMemStrdup(filename);
10011 input->base = buffer;
10012 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010013 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010014 inputPush(ctxt, input);
10015}
10016
10017/**
10018 * xmlSAXUserParseFile:
10019 * @sax: a SAX handler
10020 * @user_data: The user data returned on SAX callbacks
10021 * @filename: a file name
10022 *
10023 * parse an XML file and call the given SAX handler routines.
10024 * Automatic support for ZLIB/Compress compressed document is provided
10025 *
10026 * Returns 0 in case of success or a error number otherwise
10027 */
10028int
10029xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10030 const char *filename) {
10031 int ret = 0;
10032 xmlParserCtxtPtr ctxt;
10033
10034 ctxt = xmlCreateFileParserCtxt(filename);
10035 if (ctxt == NULL) return -1;
10036 if (ctxt->sax != &xmlDefaultSAXHandler)
10037 xmlFree(ctxt->sax);
10038 ctxt->sax = sax;
10039 if (user_data != NULL)
10040 ctxt->userData = user_data;
10041
10042 xmlParseDocument(ctxt);
10043
10044 if (ctxt->wellFormed)
10045 ret = 0;
10046 else {
10047 if (ctxt->errNo != 0)
10048 ret = ctxt->errNo;
10049 else
10050 ret = -1;
10051 }
10052 if (sax != NULL)
10053 ctxt->sax = NULL;
10054 xmlFreeParserCtxt(ctxt);
10055
10056 return ret;
10057}
10058
10059/************************************************************************
10060 * *
10061 * Front ends when parsing from memory *
10062 * *
10063 ************************************************************************/
10064
10065/**
10066 * xmlCreateMemoryParserCtxt:
10067 * @buffer: a pointer to a char array
10068 * @size: the size of the array
10069 *
10070 * Create a parser context for an XML in-memory document.
10071 *
10072 * Returns the new parser context or NULL
10073 */
10074xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010075xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010076 xmlParserCtxtPtr ctxt;
10077 xmlParserInputPtr input;
10078 xmlParserInputBufferPtr buf;
10079
10080 if (buffer == NULL)
10081 return(NULL);
10082 if (size <= 0)
10083 return(NULL);
10084
10085 ctxt = xmlNewParserCtxt();
10086 if (ctxt == NULL)
10087 return(NULL);
10088
10089 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10090 if (buf == NULL) return(NULL);
10091
10092 input = xmlNewInputStream(ctxt);
10093 if (input == NULL) {
10094 xmlFreeParserCtxt(ctxt);
10095 return(NULL);
10096 }
10097
10098 input->filename = NULL;
10099 input->buf = buf;
10100 input->base = input->buf->buffer->content;
10101 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010102 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010103
10104 inputPush(ctxt, input);
10105 return(ctxt);
10106}
10107
10108/**
10109 * xmlSAXParseMemory:
10110 * @sax: the SAX handler block
10111 * @buffer: an pointer to a char array
10112 * @size: the size of the array
10113 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10114 * documents
10115 *
10116 * parse an XML in-memory block and use the given SAX function block
10117 * to handle the parsing callback. If sax is NULL, fallback to the default
10118 * DOM tree building routines.
10119 *
10120 * Returns the resulting document tree
10121 */
10122xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010123xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10124 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010125 xmlDocPtr ret;
10126 xmlParserCtxtPtr ctxt;
10127
10128 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10129 if (ctxt == NULL) return(NULL);
10130 if (sax != NULL) {
10131 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010132 }
10133
10134 xmlParseDocument(ctxt);
10135
10136 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10137 else {
10138 ret = NULL;
10139 xmlFreeDoc(ctxt->myDoc);
10140 ctxt->myDoc = NULL;
10141 }
10142 if (sax != NULL)
10143 ctxt->sax = NULL;
10144 xmlFreeParserCtxt(ctxt);
10145
10146 return(ret);
10147}
10148
10149/**
10150 * xmlParseMemory:
10151 * @buffer: an pointer to a char array
10152 * @size: the size of the array
10153 *
10154 * parse an XML in-memory block and build a tree.
10155 *
10156 * Returns the resulting document tree
10157 */
10158
Daniel Veillard50822cb2001-07-26 20:05:51 +000010159xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010160 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10161}
10162
10163/**
10164 * xmlRecoverMemory:
10165 * @buffer: an pointer to a char array
10166 * @size: the size of the array
10167 *
10168 * parse an XML in-memory block and build a tree.
10169 * In the case the document is not Well Formed, a tree is built anyway
10170 *
10171 * Returns the resulting document tree
10172 */
10173
Daniel Veillard50822cb2001-07-26 20:05:51 +000010174xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010175 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10176}
10177
10178/**
10179 * xmlSAXUserParseMemory:
10180 * @sax: a SAX handler
10181 * @user_data: The user data returned on SAX callbacks
10182 * @buffer: an in-memory XML document input
10183 * @size: the length of the XML document in bytes
10184 *
10185 * A better SAX parsing routine.
10186 * parse an XML in-memory buffer and call the given SAX handler routines.
10187 *
10188 * Returns 0 in case of success or a error number otherwise
10189 */
10190int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010191 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010192 int ret = 0;
10193 xmlParserCtxtPtr ctxt;
10194 xmlSAXHandlerPtr oldsax = NULL;
10195
10196 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10197 if (ctxt == NULL) return -1;
10198 if (sax != NULL) {
10199 oldsax = ctxt->sax;
10200 ctxt->sax = sax;
10201 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010202 if (user_data != NULL)
10203 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010204
10205 xmlParseDocument(ctxt);
10206
10207 if (ctxt->wellFormed)
10208 ret = 0;
10209 else {
10210 if (ctxt->errNo != 0)
10211 ret = ctxt->errNo;
10212 else
10213 ret = -1;
10214 }
10215 if (sax != NULL) {
10216 ctxt->sax = oldsax;
10217 }
10218 xmlFreeParserCtxt(ctxt);
10219
10220 return ret;
10221}
10222
10223/**
10224 * xmlCreateDocParserCtxt:
10225 * @cur: a pointer to an array of xmlChar
10226 *
10227 * Creates a parser context for an XML in-memory document.
10228 *
10229 * Returns the new parser context or NULL
10230 */
10231xmlParserCtxtPtr
10232xmlCreateDocParserCtxt(xmlChar *cur) {
10233 int len;
10234
10235 if (cur == NULL)
10236 return(NULL);
10237 len = xmlStrlen(cur);
10238 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10239}
10240
10241/**
10242 * xmlSAXParseDoc:
10243 * @sax: the SAX handler block
10244 * @cur: a pointer to an array of xmlChar
10245 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10246 * documents
10247 *
10248 * parse an XML in-memory document and build a tree.
10249 * It use the given SAX function block to handle the parsing callback.
10250 * If sax is NULL, fallback to the default DOM tree building routines.
10251 *
10252 * Returns the resulting document tree
10253 */
10254
10255xmlDocPtr
10256xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10257 xmlDocPtr ret;
10258 xmlParserCtxtPtr ctxt;
10259
10260 if (cur == NULL) return(NULL);
10261
10262
10263 ctxt = xmlCreateDocParserCtxt(cur);
10264 if (ctxt == NULL) return(NULL);
10265 if (sax != NULL) {
10266 ctxt->sax = sax;
10267 ctxt->userData = NULL;
10268 }
10269
10270 xmlParseDocument(ctxt);
10271 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10272 else {
10273 ret = NULL;
10274 xmlFreeDoc(ctxt->myDoc);
10275 ctxt->myDoc = NULL;
10276 }
10277 if (sax != NULL)
10278 ctxt->sax = NULL;
10279 xmlFreeParserCtxt(ctxt);
10280
10281 return(ret);
10282}
10283
10284/**
10285 * xmlParseDoc:
10286 * @cur: a pointer to an array of xmlChar
10287 *
10288 * parse an XML in-memory document and build a tree.
10289 *
10290 * Returns the resulting document tree
10291 */
10292
10293xmlDocPtr
10294xmlParseDoc(xmlChar *cur) {
10295 return(xmlSAXParseDoc(NULL, cur, 0));
10296}
10297
Daniel Veillard8107a222002-01-13 14:10:10 +000010298/************************************************************************
10299 * *
10300 * Specific function to keep track of entities references *
10301 * and used by the XSLT debugger *
10302 * *
10303 ************************************************************************/
10304
10305static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10306
10307/**
10308 * xmlAddEntityReference:
10309 * @ent : A valid entity
10310 * @firstNode : A valid first node for children of entity
10311 * @lastNode : A valid last node of children entity
10312 *
10313 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10314 */
10315static void
10316xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10317 xmlNodePtr lastNode)
10318{
10319 if (xmlEntityRefFunc != NULL) {
10320 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10321 }
10322}
10323
10324
10325/**
10326 * xmlSetEntityReferenceFunc:
10327 * @func : A valid function
10328 *
10329 * Set the function to call call back when a xml reference has been made
10330 */
10331void
10332xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10333{
10334 xmlEntityRefFunc = func;
10335}
Owen Taylor3473f882001-02-23 17:55:21 +000010336
10337/************************************************************************
10338 * *
10339 * Miscellaneous *
10340 * *
10341 ************************************************************************/
10342
10343#ifdef LIBXML_XPATH_ENABLED
10344#include <libxml/xpath.h>
10345#endif
10346
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010347extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010348static int xmlParserInitialized = 0;
10349
10350/**
10351 * xmlInitParser:
10352 *
10353 * Initialization function for the XML parser.
10354 * This is not reentrant. Call once before processing in case of
10355 * use in multithreaded programs.
10356 */
10357
10358void
10359xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010360 if (xmlParserInitialized != 0)
10361 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010362
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010363 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10364 (xmlGenericError == NULL))
10365 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010366 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010367 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010368 xmlInitCharEncodingHandlers();
10369 xmlInitializePredefinedEntities();
10370 xmlDefaultSAXHandlerInit();
10371 xmlRegisterDefaultInputCallbacks();
10372 xmlRegisterDefaultOutputCallbacks();
10373#ifdef LIBXML_HTML_ENABLED
10374 htmlInitAutoClose();
10375 htmlDefaultSAXHandlerInit();
10376#endif
10377#ifdef LIBXML_XPATH_ENABLED
10378 xmlXPathInit();
10379#endif
10380 xmlParserInitialized = 1;
10381}
10382
10383/**
10384 * xmlCleanupParser:
10385 *
10386 * Cleanup function for the XML parser. It tries to reclaim all
10387 * parsing related global memory allocated for the parser processing.
10388 * It doesn't deallocate any document related memory. Calling this
10389 * function should not prevent reusing the parser.
10390 */
10391
10392void
10393xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010394 xmlCleanupCharEncodingHandlers();
10395 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010396#ifdef LIBXML_CATALOG_ENABLED
10397 xmlCatalogCleanup();
10398#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010399 xmlCleanupThreads();
10400 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010401}