blob: 3144c3e95a1812c2aeea6f0df08dec5375ab4d85 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Daniel Veillard3c5ed912002-01-08 10:36:16 +000035#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000044#include <libxml/threads.h>
45#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/tree.h>
47#include <libxml/parser.h>
48#include <libxml/parserInternals.h>
49#include <libxml/valid.h>
50#include <libxml/entities.h>
51#include <libxml/xmlerror.h>
52#include <libxml/encoding.h>
53#include <libxml/xmlIO.h>
54#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000055#ifdef LIBXML_CATALOG_ENABLED
56#include <libxml/catalog.h>
57#endif
Owen Taylor3473f882001-02-23 17:55:21 +000058
59#ifdef HAVE_CTYPE_H
60#include <ctype.h>
61#endif
62#ifdef HAVE_STDLIB_H
63#include <stdlib.h>
64#endif
65#ifdef HAVE_SYS_STAT_H
66#include <sys/stat.h>
67#endif
68#ifdef HAVE_FCNTL_H
69#include <fcntl.h>
70#endif
71#ifdef HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74#ifdef HAVE_ZLIB_H
75#include <zlib.h>
76#endif
77
78
Daniel Veillard21a0f912001-02-25 19:54:14 +000079#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000080#define XML_PARSER_BUFFER_SIZE 100
81
82/*
Owen Taylor3473f882001-02-23 17:55:21 +000083 * List of XML prefixed PI allowed by W3C specs
84 */
85
Daniel Veillardb44025c2001-10-11 22:55:55 +000086static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000087 "xml-stylesheet",
88 NULL
89};
90
91/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000092xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
93 const xmlChar **str);
94
Daniel Veillard257d9102001-05-08 10:41:44 +000095static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000096xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
97 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000098 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000100
Daniel Veillard8107a222002-01-13 14:10:10 +0000101static void
102xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
103 xmlNodePtr lastNode);
104
Owen Taylor3473f882001-02-23 17:55:21 +0000105/************************************************************************
106 * *
107 * Parser stacks related functions and macros *
108 * *
109 ************************************************************************/
110
111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
112 const xmlChar ** str);
113
114/*
115 * Generic function for accessing stacks in the Parser Context
116 */
117
118#define PUSH_AND_POP(scope, type, name) \
119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
120 if (ctxt->name##Nr >= ctxt->name##Max) { \
121 ctxt->name##Max *= 2; \
122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
124 if (ctxt->name##Tab == NULL) { \
125 xmlGenericError(xmlGenericErrorContext, \
126 "realloc failed !\n"); \
127 return(0); \
128 } \
129 } \
130 ctxt->name##Tab[ctxt->name##Nr] = value; \
131 ctxt->name = value; \
132 return(ctxt->name##Nr++); \
133} \
134scope type name##Pop(xmlParserCtxtPtr ctxt) { \
135 type ret; \
136 if (ctxt->name##Nr <= 0) return(0); \
137 ctxt->name##Nr--; \
138 if (ctxt->name##Nr > 0) \
139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
140 else \
141 ctxt->name = NULL; \
142 ret = ctxt->name##Tab[ctxt->name##Nr]; \
143 ctxt->name##Tab[ctxt->name##Nr] = 0; \
144 return(ret); \
145} \
146
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
148 * inputPop:
149 * @ctxt: an XML parser context
150 *
151 * Pops the top parser input from the input stack
152 *
153 * Returns the input just removed
154 */
155/**
156 * inputPush:
157 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000158 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000159 *
160 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 *
162 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000163 */
164/**
165 * namePop:
166 * @ctxt: an XML parser context
167 *
168 * Pops the top element name from the name stack
169 *
170 * Returns the name just removed
171 */
172/**
173 * namePush:
174 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000175 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000176 *
177 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 *
179 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000180 */
181/**
182 * nodePop:
183 * @ctxt: an XML parser context
184 *
185 * Pops the top element node from the node stack
186 *
187 * Returns the node just removed
188 */
189/**
190 * nodePush:
191 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000192 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000193 *
194 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 *
196 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000197 */
Owen Taylor3473f882001-02-23 17:55:21 +0000198/*
199 * Those macros actually generate the functions
200 */
201PUSH_AND_POP(extern, xmlParserInputPtr, input)
202PUSH_AND_POP(extern, xmlNodePtr, node)
203PUSH_AND_POP(extern, xmlChar*, name)
204
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000205static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000206 if (ctxt->spaceNr >= ctxt->spaceMax) {
207 ctxt->spaceMax *= 2;
208 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
209 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
210 if (ctxt->spaceTab == NULL) {
211 xmlGenericError(xmlGenericErrorContext,
212 "realloc failed !\n");
213 return(0);
214 }
215 }
216 ctxt->spaceTab[ctxt->spaceNr] = val;
217 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
218 return(ctxt->spaceNr++);
219}
220
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000221static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000222 int ret;
223 if (ctxt->spaceNr <= 0) return(0);
224 ctxt->spaceNr--;
225 if (ctxt->spaceNr > 0)
226 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
227 else
228 ctxt->space = NULL;
229 ret = ctxt->spaceTab[ctxt->spaceNr];
230 ctxt->spaceTab[ctxt->spaceNr] = -1;
231 return(ret);
232}
233
234/*
235 * Macros for accessing the content. Those should be used only by the parser,
236 * and not exported.
237 *
238 * Dirty macros, i.e. one often need to make assumption on the context to
239 * use them
240 *
241 * CUR_PTR return the current pointer to the xmlChar to be parsed.
242 * To be used with extreme caution since operations consuming
243 * characters may move the input buffer to a different location !
244 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
245 * This should be used internally by the parser
246 * only to compare to ASCII values otherwise it would break when
247 * running with UTF-8 encoding.
248 * RAW same as CUR but in the input buffer, bypass any token
249 * extraction that may have been done
250 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
251 * to compare on ASCII based substring.
252 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
253 * strings within the parser.
254 *
255 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
256 *
257 * NEXT Skip to the next character, this does the proper decoding
258 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000259 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000260 * CUR_CHAR(l) returns the current unicode character (int), set l
261 * to the number of xmlChars used for the encoding [0-5].
262 * CUR_SCHAR same but operate on a string instead of the context
263 * COPY_BUF copy the current unicode char to the target buffer, increment
264 * the index
265 * GROW, SHRINK handling of input buffers
266 */
267
268#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
269#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
270#define NXT(val) ctxt->input->cur[(val)]
271#define CUR_PTR ctxt->input->cur
272
273#define SKIP(val) do { \
274 ctxt->nbChars += (val),ctxt->input->cur += (val); \
275 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000276 if ((*ctxt->input->cur == 0) && \
277 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
278 xmlPopInput(ctxt); \
279 } while (0)
280
Daniel Veillard48b2f892001-02-25 16:11:03 +0000281#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000282 xmlParserInputShrink(ctxt->input); \
283 if ((*ctxt->input->cur == 0) && \
284 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
285 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000286 }
Owen Taylor3473f882001-02-23 17:55:21 +0000287
Daniel Veillard48b2f892001-02-25 16:11:03 +0000288#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000289 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
290 if ((*ctxt->input->cur == 0) && \
291 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
292 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000293 }
Owen Taylor3473f882001-02-23 17:55:21 +0000294
295#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
296
297#define NEXT xmlNextChar(ctxt)
298
Daniel Veillard21a0f912001-02-25 19:54:14 +0000299#define NEXT1 { \
300 ctxt->input->cur++; \
301 ctxt->nbChars++; \
302 if (*ctxt->input->cur == 0) \
303 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
304 }
305
Owen Taylor3473f882001-02-23 17:55:21 +0000306#define NEXTL(l) do { \
307 if (*(ctxt->input->cur) == '\n') { \
308 ctxt->input->line++; ctxt->input->col = 1; \
309 } else ctxt->input->col++; \
310 ctxt->token = 0; ctxt->input->cur += l; \
311 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000312 } while (0)
313
314#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
315#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
316
317#define COPY_BUF(l,b,i,v) \
318 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000319 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000320
321/**
322 * xmlSkipBlankChars:
323 * @ctxt: the XML parser context
324 *
325 * skip all blanks character found at that point in the input streams.
326 * It pops up finished entities in the process if allowable at that point.
327 *
328 * Returns the number of space chars skipped
329 */
330
331int
332xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000333 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000334
Daniel Veillard02141ea2001-04-30 11:46:40 +0000335 if (ctxt->token != 0) {
336 if (!IS_BLANK(ctxt->token))
337 return(0);
338 ctxt->token = 0;
339 res++;
340 }
Owen Taylor3473f882001-02-23 17:55:21 +0000341 /*
342 * It's Okay to use CUR/NEXT here since all the blanks are on
343 * the ASCII range.
344 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000345 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
346 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000347 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000348 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000349 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000350 cur = ctxt->input->cur;
351 while (IS_BLANK(*cur)) {
352 if (*cur == '\n') {
353 ctxt->input->line++; ctxt->input->col = 1;
354 }
355 cur++;
356 res++;
357 if (*cur == 0) {
358 ctxt->input->cur = cur;
359 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
360 cur = ctxt->input->cur;
361 }
362 }
363 ctxt->input->cur = cur;
364 } else {
365 int cur;
366 do {
367 cur = CUR;
368 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
369 NEXT;
370 cur = CUR;
371 res++;
372 }
373 while ((cur == 0) && (ctxt->inputNr > 1) &&
374 (ctxt->instate != XML_PARSER_COMMENT)) {
375 xmlPopInput(ctxt);
376 cur = CUR;
377 }
378 /*
379 * Need to handle support of entities branching here
380 */
381 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
382 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
383 }
Owen Taylor3473f882001-02-23 17:55:21 +0000384 return(res);
385}
386
387/************************************************************************
388 * *
389 * Commodity functions to handle entities *
390 * *
391 ************************************************************************/
392
393/**
394 * xmlPopInput:
395 * @ctxt: an XML parser context
396 *
397 * xmlPopInput: the current input pointed by ctxt->input came to an end
398 * pop it and return the next char.
399 *
400 * Returns the current xmlChar in the parser context
401 */
402xmlChar
403xmlPopInput(xmlParserCtxtPtr ctxt) {
404 if (ctxt->inputNr == 1) return(0); /* End of main Input */
405 if (xmlParserDebugEntities)
406 xmlGenericError(xmlGenericErrorContext,
407 "Popping input %d\n", ctxt->inputNr);
408 xmlFreeInputStream(inputPop(ctxt));
409 if ((*ctxt->input->cur == 0) &&
410 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
411 return(xmlPopInput(ctxt));
412 return(CUR);
413}
414
415/**
416 * xmlPushInput:
417 * @ctxt: an XML parser context
418 * @input: an XML parser input fragment (entity, XML fragment ...).
419 *
420 * xmlPushInput: switch to a new input stream which is stacked on top
421 * of the previous one(s).
422 */
423void
424xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
425 if (input == NULL) return;
426
427 if (xmlParserDebugEntities) {
428 if ((ctxt->input != NULL) && (ctxt->input->filename))
429 xmlGenericError(xmlGenericErrorContext,
430 "%s(%d): ", ctxt->input->filename,
431 ctxt->input->line);
432 xmlGenericError(xmlGenericErrorContext,
433 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
434 }
435 inputPush(ctxt, input);
436 GROW;
437}
438
439/**
440 * xmlParseCharRef:
441 * @ctxt: an XML parser context
442 *
443 * parse Reference declarations
444 *
445 * [66] CharRef ::= '&#' [0-9]+ ';' |
446 * '&#x' [0-9a-fA-F]+ ';'
447 *
448 * [ WFC: Legal Character ]
449 * Characters referred to using character references must match the
450 * production for Char.
451 *
452 * Returns the value parsed (as an int), 0 in case of error
453 */
454int
455xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000456 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000457 int count = 0;
458
459 if (ctxt->token != 0) {
460 val = ctxt->token;
461 ctxt->token = 0;
462 return(val);
463 }
464 /*
465 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
466 */
467 if ((RAW == '&') && (NXT(1) == '#') &&
468 (NXT(2) == 'x')) {
469 SKIP(3);
470 GROW;
471 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000472 if (count++ > 20) {
473 count = 0;
474 GROW;
475 }
476 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000477 val = val * 16 + (CUR - '0');
478 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
479 val = val * 16 + (CUR - 'a') + 10;
480 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
481 val = val * 16 + (CUR - 'A') + 10;
482 else {
483 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
485 ctxt->sax->error(ctxt->userData,
486 "xmlParseCharRef: invalid hexadecimal value\n");
487 ctxt->wellFormed = 0;
488 ctxt->disableSAX = 1;
489 val = 0;
490 break;
491 }
492 NEXT;
493 count++;
494 }
495 if (RAW == ';') {
496 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
497 ctxt->nbChars ++;
498 ctxt->input->cur++;
499 }
500 } else if ((RAW == '&') && (NXT(1) == '#')) {
501 SKIP(2);
502 GROW;
503 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000504 if (count++ > 20) {
505 count = 0;
506 GROW;
507 }
508 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000509 val = val * 10 + (CUR - '0');
510 else {
511 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
513 ctxt->sax->error(ctxt->userData,
514 "xmlParseCharRef: invalid decimal value\n");
515 ctxt->wellFormed = 0;
516 ctxt->disableSAX = 1;
517 val = 0;
518 break;
519 }
520 NEXT;
521 count++;
522 }
523 if (RAW == ';') {
524 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
525 ctxt->nbChars ++;
526 ctxt->input->cur++;
527 }
528 } else {
529 ctxt->errNo = XML_ERR_INVALID_CHARREF;
530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
531 ctxt->sax->error(ctxt->userData,
532 "xmlParseCharRef: invalid value\n");
533 ctxt->wellFormed = 0;
534 ctxt->disableSAX = 1;
535 }
536
537 /*
538 * [ WFC: Legal Character ]
539 * Characters referred to using character references must match the
540 * production for Char.
541 */
542 if (IS_CHAR(val)) {
543 return(val);
544 } else {
545 ctxt->errNo = XML_ERR_INVALID_CHAR;
546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000547 ctxt->sax->error(ctxt->userData,
548 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000549 val);
550 ctxt->wellFormed = 0;
551 ctxt->disableSAX = 1;
552 }
553 return(0);
554}
555
556/**
557 * xmlParseStringCharRef:
558 * @ctxt: an XML parser context
559 * @str: a pointer to an index in the string
560 *
561 * parse Reference declarations, variant parsing from a string rather
562 * than an an input flow.
563 *
564 * [66] CharRef ::= '&#' [0-9]+ ';' |
565 * '&#x' [0-9a-fA-F]+ ';'
566 *
567 * [ WFC: Legal Character ]
568 * Characters referred to using character references must match the
569 * production for Char.
570 *
571 * Returns the value parsed (as an int), 0 in case of error, str will be
572 * updated to the current value of the index
573 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000574static int
Owen Taylor3473f882001-02-23 17:55:21 +0000575xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
576 const xmlChar *ptr;
577 xmlChar cur;
578 int val = 0;
579
580 if ((str == NULL) || (*str == NULL)) return(0);
581 ptr = *str;
582 cur = *ptr;
583 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
584 ptr += 3;
585 cur = *ptr;
586 while (cur != ';') { /* Non input consuming loop */
587 if ((cur >= '0') && (cur <= '9'))
588 val = val * 16 + (cur - '0');
589 else if ((cur >= 'a') && (cur <= 'f'))
590 val = val * 16 + (cur - 'a') + 10;
591 else if ((cur >= 'A') && (cur <= 'F'))
592 val = val * 16 + (cur - 'A') + 10;
593 else {
594 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
596 ctxt->sax->error(ctxt->userData,
597 "xmlParseStringCharRef: invalid hexadecimal value\n");
598 ctxt->wellFormed = 0;
599 ctxt->disableSAX = 1;
600 val = 0;
601 break;
602 }
603 ptr++;
604 cur = *ptr;
605 }
606 if (cur == ';')
607 ptr++;
608 } else if ((cur == '&') && (ptr[1] == '#')){
609 ptr += 2;
610 cur = *ptr;
611 while (cur != ';') { /* Non input consuming loops */
612 if ((cur >= '0') && (cur <= '9'))
613 val = val * 10 + (cur - '0');
614 else {
615 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
617 ctxt->sax->error(ctxt->userData,
618 "xmlParseStringCharRef: invalid decimal value\n");
619 ctxt->wellFormed = 0;
620 ctxt->disableSAX = 1;
621 val = 0;
622 break;
623 }
624 ptr++;
625 cur = *ptr;
626 }
627 if (cur == ';')
628 ptr++;
629 } else {
630 ctxt->errNo = XML_ERR_INVALID_CHARREF;
631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
632 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000633 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000634 ctxt->wellFormed = 0;
635 ctxt->disableSAX = 1;
636 return(0);
637 }
638 *str = ptr;
639
640 /*
641 * [ WFC: Legal Character ]
642 * Characters referred to using character references must match the
643 * production for Char.
644 */
645 if (IS_CHAR(val)) {
646 return(val);
647 } else {
648 ctxt->errNo = XML_ERR_INVALID_CHAR;
649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
650 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000651 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000652 ctxt->wellFormed = 0;
653 ctxt->disableSAX = 1;
654 }
655 return(0);
656}
657
658/**
659 * xmlParserHandlePEReference:
660 * @ctxt: the parser context
661 *
662 * [69] PEReference ::= '%' Name ';'
663 *
664 * [ WFC: No Recursion ]
665 * A parsed entity must not contain a recursive
666 * reference to itself, either directly or indirectly.
667 *
668 * [ WFC: Entity Declared ]
669 * In a document without any DTD, a document with only an internal DTD
670 * subset which contains no parameter entity references, or a document
671 * with "standalone='yes'", ... ... The declaration of a parameter
672 * entity must precede any reference to it...
673 *
674 * [ VC: Entity Declared ]
675 * In a document with an external subset or external parameter entities
676 * with "standalone='no'", ... ... The declaration of a parameter entity
677 * must precede any reference to it...
678 *
679 * [ WFC: In DTD ]
680 * Parameter-entity references may only appear in the DTD.
681 * NOTE: misleading but this is handled.
682 *
683 * A PEReference may have been detected in the current input stream
684 * the handling is done accordingly to
685 * http://www.w3.org/TR/REC-xml#entproc
686 * i.e.
687 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000688 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000689 */
690void
691xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
692 xmlChar *name;
693 xmlEntityPtr entity = NULL;
694 xmlParserInputPtr input;
695
696 if (ctxt->token != 0) {
697 return;
698 }
699 if (RAW != '%') return;
700 switch(ctxt->instate) {
701 case XML_PARSER_CDATA_SECTION:
702 return;
703 case XML_PARSER_COMMENT:
704 return;
705 case XML_PARSER_START_TAG:
706 return;
707 case XML_PARSER_END_TAG:
708 return;
709 case XML_PARSER_EOF:
710 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
712 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
713 ctxt->wellFormed = 0;
714 ctxt->disableSAX = 1;
715 return;
716 case XML_PARSER_PROLOG:
717 case XML_PARSER_START:
718 case XML_PARSER_MISC:
719 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
721 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
722 ctxt->wellFormed = 0;
723 ctxt->disableSAX = 1;
724 return;
725 case XML_PARSER_ENTITY_DECL:
726 case XML_PARSER_CONTENT:
727 case XML_PARSER_ATTRIBUTE_VALUE:
728 case XML_PARSER_PI:
729 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000730 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000731 /* we just ignore it there */
732 return;
733 case XML_PARSER_EPILOG:
734 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
736 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
737 ctxt->wellFormed = 0;
738 ctxt->disableSAX = 1;
739 return;
740 case XML_PARSER_ENTITY_VALUE:
741 /*
742 * NOTE: in the case of entity values, we don't do the
743 * substitution here since we need the literal
744 * entity value to be able to save the internal
745 * subset of the document.
746 * This will be handled by xmlStringDecodeEntities
747 */
748 return;
749 case XML_PARSER_DTD:
750 /*
751 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
752 * In the internal DTD subset, parameter-entity references
753 * can occur only where markup declarations can occur, not
754 * within markup declarations.
755 * In that case this is handled in xmlParseMarkupDecl
756 */
757 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
758 return;
759 break;
760 case XML_PARSER_IGNORE:
761 return;
762 }
763
764 NEXT;
765 name = xmlParseName(ctxt);
766 if (xmlParserDebugEntities)
767 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000768 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000769 if (name == NULL) {
770 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000772 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000773 ctxt->wellFormed = 0;
774 ctxt->disableSAX = 1;
775 } else {
776 if (RAW == ';') {
777 NEXT;
778 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
779 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
780 if (entity == NULL) {
781
782 /*
783 * [ WFC: Entity Declared ]
784 * In a document without any DTD, a document with only an
785 * internal DTD subset which contains no parameter entity
786 * references, or a document with "standalone='yes'", ...
787 * ... The declaration of a parameter entity must precede
788 * any reference to it...
789 */
790 if ((ctxt->standalone == 1) ||
791 ((ctxt->hasExternalSubset == 0) &&
792 (ctxt->hasPErefs == 0))) {
793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
794 ctxt->sax->error(ctxt->userData,
795 "PEReference: %%%s; not found\n", name);
796 ctxt->wellFormed = 0;
797 ctxt->disableSAX = 1;
798 } else {
799 /*
800 * [ VC: Entity Declared ]
801 * In a document with an external subset or external
802 * parameter entities with "standalone='no'", ...
803 * ... The declaration of a parameter entity must precede
804 * any reference to it...
805 */
806 if ((!ctxt->disableSAX) &&
807 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
808 ctxt->vctxt.error(ctxt->vctxt.userData,
809 "PEReference: %%%s; not found\n", name);
810 } else if ((!ctxt->disableSAX) &&
811 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
812 ctxt->sax->warning(ctxt->userData,
813 "PEReference: %%%s; not found\n", name);
814 ctxt->valid = 0;
815 }
816 } else {
817 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
818 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000819 xmlChar start[4];
820 xmlCharEncoding enc;
821
Owen Taylor3473f882001-02-23 17:55:21 +0000822 /*
823 * handle the extra spaces added before and after
824 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000825 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000826 */
827 input = xmlNewEntityInputStream(ctxt, entity);
828 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000829
830 /*
831 * Get the 4 first bytes and decode the charset
832 * if enc != XML_CHAR_ENCODING_NONE
833 * plug some encoding conversion routines.
834 */
835 GROW
836 start[0] = RAW;
837 start[1] = NXT(1);
838 start[2] = NXT(2);
839 start[3] = NXT(3);
840 enc = xmlDetectCharEncoding(start, 4);
841 if (enc != XML_CHAR_ENCODING_NONE) {
842 xmlSwitchEncoding(ctxt, enc);
843 }
844
Owen Taylor3473f882001-02-23 17:55:21 +0000845 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
846 (RAW == '<') && (NXT(1) == '?') &&
847 (NXT(2) == 'x') && (NXT(3) == 'm') &&
848 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
849 xmlParseTextDecl(ctxt);
850 }
851 if (ctxt->token == 0)
852 ctxt->token = ' ';
853 } else {
854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
855 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000856 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000857 name);
858 ctxt->wellFormed = 0;
859 ctxt->disableSAX = 1;
860 }
861 }
862 } else {
863 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
865 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000866 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000867 ctxt->wellFormed = 0;
868 ctxt->disableSAX = 1;
869 }
870 xmlFree(name);
871 }
872}
873
874/*
875 * Macro used to grow the current buffer.
876 */
877#define growBuffer(buffer) { \
878 buffer##_size *= 2; \
879 buffer = (xmlChar *) \
880 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
881 if (buffer == NULL) { \
882 perror("realloc failed"); \
883 return(NULL); \
884 } \
885}
886
887/**
888 * xmlStringDecodeEntities:
889 * @ctxt: the parser context
890 * @str: the input string
891 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
892 * @end: an end marker xmlChar, 0 if none
893 * @end2: an end marker xmlChar, 0 if none
894 * @end3: an end marker xmlChar, 0 if none
895 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000896 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000897 *
898 * [67] Reference ::= EntityRef | CharRef
899 *
900 * [69] PEReference ::= '%' Name ';'
901 *
902 * Returns A newly allocated string with the substitution done. The caller
903 * must deallocate it !
904 */
905xmlChar *
906xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
907 xmlChar end, xmlChar end2, xmlChar end3) {
908 xmlChar *buffer = NULL;
909 int buffer_size = 0;
910
911 xmlChar *current = NULL;
912 xmlEntityPtr ent;
913 int c,l;
914 int nbchars = 0;
915
916 if (str == NULL)
917 return(NULL);
918
919 if (ctxt->depth > 40) {
920 ctxt->errNo = XML_ERR_ENTITY_LOOP;
921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
922 ctxt->sax->error(ctxt->userData,
923 "Detected entity reference loop\n");
924 ctxt->wellFormed = 0;
925 ctxt->disableSAX = 1;
926 return(NULL);
927 }
928
929 /*
930 * allocate a translation buffer.
931 */
932 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
933 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
934 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000935 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000936 return(NULL);
937 }
938
939 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000940 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000941 * we are operating on already parsed values.
942 */
943 c = CUR_SCHAR(str, l);
944 while ((c != 0) && (c != end) && /* non input consuming loop */
945 (c != end2) && (c != end3)) {
946
947 if (c == 0) break;
948 if ((c == '&') && (str[1] == '#')) {
949 int val = xmlParseStringCharRef(ctxt, &str);
950 if (val != 0) {
951 COPY_BUF(0,buffer,nbchars,val);
952 }
953 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
954 if (xmlParserDebugEntities)
955 xmlGenericError(xmlGenericErrorContext,
956 "String decoding Entity Reference: %.30s\n",
957 str);
958 ent = xmlParseStringEntityRef(ctxt, &str);
959 if ((ent != NULL) &&
960 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
961 if (ent->content != NULL) {
962 COPY_BUF(0,buffer,nbchars,ent->content[0]);
963 } else {
964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
965 ctxt->sax->error(ctxt->userData,
966 "internal error entity has no content\n");
967 }
968 } else if ((ent != NULL) && (ent->content != NULL)) {
969 xmlChar *rep;
970
971 ctxt->depth++;
972 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
973 0, 0, 0);
974 ctxt->depth--;
975 if (rep != NULL) {
976 current = rep;
977 while (*current != 0) { /* non input consuming loop */
978 buffer[nbchars++] = *current++;
979 if (nbchars >
980 buffer_size - XML_PARSER_BUFFER_SIZE) {
981 growBuffer(buffer);
982 }
983 }
984 xmlFree(rep);
985 }
986 } else if (ent != NULL) {
987 int i = xmlStrlen(ent->name);
988 const xmlChar *cur = ent->name;
989
990 buffer[nbchars++] = '&';
991 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
992 growBuffer(buffer);
993 }
994 for (;i > 0;i--)
995 buffer[nbchars++] = *cur++;
996 buffer[nbchars++] = ';';
997 }
998 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
999 if (xmlParserDebugEntities)
1000 xmlGenericError(xmlGenericErrorContext,
1001 "String decoding PE Reference: %.30s\n", str);
1002 ent = xmlParseStringPEReference(ctxt, &str);
1003 if (ent != NULL) {
1004 xmlChar *rep;
1005
1006 ctxt->depth++;
1007 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1008 0, 0, 0);
1009 ctxt->depth--;
1010 if (rep != NULL) {
1011 current = rep;
1012 while (*current != 0) { /* non input consuming loop */
1013 buffer[nbchars++] = *current++;
1014 if (nbchars >
1015 buffer_size - XML_PARSER_BUFFER_SIZE) {
1016 growBuffer(buffer);
1017 }
1018 }
1019 xmlFree(rep);
1020 }
1021 }
1022 } else {
1023 COPY_BUF(l,buffer,nbchars,c);
1024 str += l;
1025 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1026 growBuffer(buffer);
1027 }
1028 }
1029 c = CUR_SCHAR(str, l);
1030 }
1031 buffer[nbchars++] = 0;
1032 return(buffer);
1033}
1034
1035
1036/************************************************************************
1037 * *
1038 * Commodity functions to handle xmlChars *
1039 * *
1040 ************************************************************************/
1041
1042/**
1043 * xmlStrndup:
1044 * @cur: the input xmlChar *
1045 * @len: the len of @cur
1046 *
1047 * a strndup for array of xmlChar's
1048 *
1049 * Returns a new xmlChar * or NULL
1050 */
1051xmlChar *
1052xmlStrndup(const xmlChar *cur, int len) {
1053 xmlChar *ret;
1054
1055 if ((cur == NULL) || (len < 0)) return(NULL);
1056 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1057 if (ret == NULL) {
1058 xmlGenericError(xmlGenericErrorContext,
1059 "malloc of %ld byte failed\n",
1060 (len + 1) * (long)sizeof(xmlChar));
1061 return(NULL);
1062 }
1063 memcpy(ret, cur, len * sizeof(xmlChar));
1064 ret[len] = 0;
1065 return(ret);
1066}
1067
1068/**
1069 * xmlStrdup:
1070 * @cur: the input xmlChar *
1071 *
1072 * a strdup for array of xmlChar's. Since they are supposed to be
1073 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1074 * a termination mark of '0'.
1075 *
1076 * Returns a new xmlChar * or NULL
1077 */
1078xmlChar *
1079xmlStrdup(const xmlChar *cur) {
1080 const xmlChar *p = cur;
1081
1082 if (cur == NULL) return(NULL);
1083 while (*p != 0) p++; /* non input consuming */
1084 return(xmlStrndup(cur, p - cur));
1085}
1086
1087/**
1088 * xmlCharStrndup:
1089 * @cur: the input char *
1090 * @len: the len of @cur
1091 *
1092 * a strndup for char's to xmlChar's
1093 *
1094 * Returns a new xmlChar * or NULL
1095 */
1096
1097xmlChar *
1098xmlCharStrndup(const char *cur, int len) {
1099 int i;
1100 xmlChar *ret;
1101
1102 if ((cur == NULL) || (len < 0)) return(NULL);
1103 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1104 if (ret == NULL) {
1105 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1106 (len + 1) * (long)sizeof(xmlChar));
1107 return(NULL);
1108 }
1109 for (i = 0;i < len;i++)
1110 ret[i] = (xmlChar) cur[i];
1111 ret[len] = 0;
1112 return(ret);
1113}
1114
1115/**
1116 * xmlCharStrdup:
1117 * @cur: the input char *
1118 * @len: the len of @cur
1119 *
1120 * a strdup for char's to xmlChar's
1121 *
1122 * Returns a new xmlChar * or NULL
1123 */
1124
1125xmlChar *
1126xmlCharStrdup(const char *cur) {
1127 const char *p = cur;
1128
1129 if (cur == NULL) return(NULL);
1130 while (*p != '\0') p++; /* non input consuming */
1131 return(xmlCharStrndup(cur, p - cur));
1132}
1133
1134/**
1135 * xmlStrcmp:
1136 * @str1: the first xmlChar *
1137 * @str2: the second xmlChar *
1138 *
1139 * a strcmp for xmlChar's
1140 *
1141 * Returns the integer result of the comparison
1142 */
1143
1144int
1145xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1146 register int tmp;
1147
1148 if (str1 == str2) return(0);
1149 if (str1 == NULL) return(-1);
1150 if (str2 == NULL) return(1);
1151 do {
1152 tmp = *str1++ - *str2;
1153 if (tmp != 0) return(tmp);
1154 } while (*str2++ != 0);
1155 return 0;
1156}
1157
1158/**
1159 * xmlStrEqual:
1160 * @str1: the first xmlChar *
1161 * @str2: the second xmlChar *
1162 *
1163 * Check if both string are equal of have same content
1164 * Should be a bit more readable and faster than xmlStrEqual()
1165 *
1166 * Returns 1 if they are equal, 0 if they are different
1167 */
1168
1169int
1170xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1171 if (str1 == str2) return(1);
1172 if (str1 == NULL) return(0);
1173 if (str2 == NULL) return(0);
1174 do {
1175 if (*str1++ != *str2) return(0);
1176 } while (*str2++);
1177 return(1);
1178}
1179
1180/**
1181 * xmlStrncmp:
1182 * @str1: the first xmlChar *
1183 * @str2: the second xmlChar *
1184 * @len: the max comparison length
1185 *
1186 * a strncmp for xmlChar's
1187 *
1188 * Returns the integer result of the comparison
1189 */
1190
1191int
1192xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1193 register int tmp;
1194
1195 if (len <= 0) return(0);
1196 if (str1 == str2) return(0);
1197 if (str1 == NULL) return(-1);
1198 if (str2 == NULL) return(1);
1199 do {
1200 tmp = *str1++ - *str2;
1201 if (tmp != 0 || --len == 0) return(tmp);
1202 } while (*str2++ != 0);
1203 return 0;
1204}
1205
Daniel Veillardb44025c2001-10-11 22:55:55 +00001206static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001207 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1208 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1209 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1210 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1211 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1212 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1213 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1214 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1215 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1216 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1217 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1218 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1219 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1220 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1221 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1222 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1223 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1224 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1225 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1226 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1227 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1228 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1229 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1230 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1231 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1232 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1233 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1234 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1235 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1236 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1237 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1238 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1239};
1240
1241/**
1242 * xmlStrcasecmp:
1243 * @str1: the first xmlChar *
1244 * @str2: the second xmlChar *
1245 *
1246 * a strcasecmp for xmlChar's
1247 *
1248 * Returns the integer result of the comparison
1249 */
1250
1251int
1252xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1253 register int tmp;
1254
1255 if (str1 == str2) return(0);
1256 if (str1 == NULL) return(-1);
1257 if (str2 == NULL) return(1);
1258 do {
1259 tmp = casemap[*str1++] - casemap[*str2];
1260 if (tmp != 0) return(tmp);
1261 } while (*str2++ != 0);
1262 return 0;
1263}
1264
1265/**
1266 * xmlStrncasecmp:
1267 * @str1: the first xmlChar *
1268 * @str2: the second xmlChar *
1269 * @len: the max comparison length
1270 *
1271 * a strncasecmp for xmlChar's
1272 *
1273 * Returns the integer result of the comparison
1274 */
1275
1276int
1277xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1278 register int tmp;
1279
1280 if (len <= 0) return(0);
1281 if (str1 == str2) return(0);
1282 if (str1 == NULL) return(-1);
1283 if (str2 == NULL) return(1);
1284 do {
1285 tmp = casemap[*str1++] - casemap[*str2];
1286 if (tmp != 0 || --len == 0) return(tmp);
1287 } while (*str2++ != 0);
1288 return 0;
1289}
1290
1291/**
1292 * xmlStrchr:
1293 * @str: the xmlChar * array
1294 * @val: the xmlChar to search
1295 *
1296 * a strchr for xmlChar's
1297 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001298 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001299 */
1300
1301const xmlChar *
1302xmlStrchr(const xmlChar *str, xmlChar val) {
1303 if (str == NULL) return(NULL);
1304 while (*str != 0) { /* non input consuming */
1305 if (*str == val) return((xmlChar *) str);
1306 str++;
1307 }
1308 return(NULL);
1309}
1310
1311/**
1312 * xmlStrstr:
1313 * @str: the xmlChar * array (haystack)
1314 * @val: the xmlChar to search (needle)
1315 *
1316 * a strstr for xmlChar's
1317 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001318 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001319 */
1320
1321const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001322xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001323 int n;
1324
1325 if (str == NULL) return(NULL);
1326 if (val == NULL) return(NULL);
1327 n = xmlStrlen(val);
1328
1329 if (n == 0) return(str);
1330 while (*str != 0) { /* non input consuming */
1331 if (*str == *val) {
1332 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1333 }
1334 str++;
1335 }
1336 return(NULL);
1337}
1338
1339/**
1340 * xmlStrcasestr:
1341 * @str: the xmlChar * array (haystack)
1342 * @val: the xmlChar to search (needle)
1343 *
1344 * a case-ignoring strstr for xmlChar's
1345 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001346 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001347 */
1348
1349const xmlChar *
1350xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1351 int n;
1352
1353 if (str == NULL) return(NULL);
1354 if (val == NULL) return(NULL);
1355 n = xmlStrlen(val);
1356
1357 if (n == 0) return(str);
1358 while (*str != 0) { /* non input consuming */
1359 if (casemap[*str] == casemap[*val])
1360 if (!xmlStrncasecmp(str, val, n)) return(str);
1361 str++;
1362 }
1363 return(NULL);
1364}
1365
1366/**
1367 * xmlStrsub:
1368 * @str: the xmlChar * array (haystack)
1369 * @start: the index of the first char (zero based)
1370 * @len: the length of the substring
1371 *
1372 * Extract a substring of a given string
1373 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001374 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001375 */
1376
1377xmlChar *
1378xmlStrsub(const xmlChar *str, int start, int len) {
1379 int i;
1380
1381 if (str == NULL) return(NULL);
1382 if (start < 0) return(NULL);
1383 if (len < 0) return(NULL);
1384
1385 for (i = 0;i < start;i++) {
1386 if (*str == 0) return(NULL);
1387 str++;
1388 }
1389 if (*str == 0) return(NULL);
1390 return(xmlStrndup(str, len));
1391}
1392
1393/**
1394 * xmlStrlen:
1395 * @str: the xmlChar * array
1396 *
1397 * length of a xmlChar's string
1398 *
1399 * Returns the number of xmlChar contained in the ARRAY.
1400 */
1401
1402int
1403xmlStrlen(const xmlChar *str) {
1404 int len = 0;
1405
1406 if (str == NULL) return(0);
1407 while (*str != 0) { /* non input consuming */
1408 str++;
1409 len++;
1410 }
1411 return(len);
1412}
1413
1414/**
1415 * xmlStrncat:
1416 * @cur: the original xmlChar * array
1417 * @add: the xmlChar * array added
1418 * @len: the length of @add
1419 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001420 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001421 * first bytes of @add.
1422 *
1423 * Returns a new xmlChar *, the original @cur is reallocated if needed
1424 * and should not be freed
1425 */
1426
1427xmlChar *
1428xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1429 int size;
1430 xmlChar *ret;
1431
1432 if ((add == NULL) || (len == 0))
1433 return(cur);
1434 if (cur == NULL)
1435 return(xmlStrndup(add, len));
1436
1437 size = xmlStrlen(cur);
1438 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1439 if (ret == NULL) {
1440 xmlGenericError(xmlGenericErrorContext,
1441 "xmlStrncat: realloc of %ld byte failed\n",
1442 (size + len + 1) * (long)sizeof(xmlChar));
1443 return(cur);
1444 }
1445 memcpy(&ret[size], add, len * sizeof(xmlChar));
1446 ret[size + len] = 0;
1447 return(ret);
1448}
1449
1450/**
1451 * xmlStrcat:
1452 * @cur: the original xmlChar * array
1453 * @add: the xmlChar * array added
1454 *
1455 * a strcat for array of xmlChar's. Since they are supposed to be
1456 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1457 * a termination mark of '0'.
1458 *
1459 * Returns a new xmlChar * containing the concatenated string.
1460 */
1461xmlChar *
1462xmlStrcat(xmlChar *cur, const xmlChar *add) {
1463 const xmlChar *p = add;
1464
1465 if (add == NULL) return(cur);
1466 if (cur == NULL)
1467 return(xmlStrdup(add));
1468
1469 while (*p != 0) p++; /* non input consuming */
1470 return(xmlStrncat(cur, add, p - add));
1471}
1472
1473/************************************************************************
1474 * *
1475 * Commodity functions, cleanup needed ? *
1476 * *
1477 ************************************************************************/
1478
1479/**
1480 * areBlanks:
1481 * @ctxt: an XML parser context
1482 * @str: a xmlChar *
1483 * @len: the size of @str
1484 *
1485 * Is this a sequence of blank chars that one can ignore ?
1486 *
1487 * Returns 1 if ignorable 0 otherwise.
1488 */
1489
1490static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1491 int i, ret;
1492 xmlNodePtr lastChild;
1493
Daniel Veillard05c13a22001-09-09 08:38:09 +00001494 /*
1495 * Don't spend time trying to differentiate them, the same callback is
1496 * used !
1497 */
1498 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001499 return(0);
1500
Owen Taylor3473f882001-02-23 17:55:21 +00001501 /*
1502 * Check for xml:space value.
1503 */
1504 if (*(ctxt->space) == 1)
1505 return(0);
1506
1507 /*
1508 * Check that the string is made of blanks
1509 */
1510 for (i = 0;i < len;i++)
1511 if (!(IS_BLANK(str[i]))) return(0);
1512
1513 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001514 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001515 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001516 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (ctxt->myDoc != NULL) {
1518 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1519 if (ret == 0) return(1);
1520 if (ret == 1) return(0);
1521 }
1522
1523 /*
1524 * Otherwise, heuristic :-\
1525 */
Owen Taylor3473f882001-02-23 17:55:21 +00001526 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001527 if ((ctxt->node->children == NULL) &&
1528 (RAW == '<') && (NXT(1) == '/')) return(0);
1529
1530 lastChild = xmlGetLastChild(ctxt->node);
1531 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001532 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1533 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001534 } else if (xmlNodeIsText(lastChild))
1535 return(0);
1536 else if ((ctxt->node->children != NULL) &&
1537 (xmlNodeIsText(ctxt->node->children)))
1538 return(0);
1539 return(1);
1540}
1541
Owen Taylor3473f882001-02-23 17:55:21 +00001542/************************************************************************
1543 * *
1544 * Extra stuff for namespace support *
1545 * Relates to http://www.w3.org/TR/WD-xml-names *
1546 * *
1547 ************************************************************************/
1548
1549/**
1550 * xmlSplitQName:
1551 * @ctxt: an XML parser context
1552 * @name: an XML parser context
1553 * @prefix: a xmlChar **
1554 *
1555 * parse an UTF8 encoded XML qualified name string
1556 *
1557 * [NS 5] QName ::= (Prefix ':')? LocalPart
1558 *
1559 * [NS 6] Prefix ::= NCName
1560 *
1561 * [NS 7] LocalPart ::= NCName
1562 *
1563 * Returns the local part, and prefix is updated
1564 * to get the Prefix if any.
1565 */
1566
1567xmlChar *
1568xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1569 xmlChar buf[XML_MAX_NAMELEN + 5];
1570 xmlChar *buffer = NULL;
1571 int len = 0;
1572 int max = XML_MAX_NAMELEN;
1573 xmlChar *ret = NULL;
1574 const xmlChar *cur = name;
1575 int c;
1576
1577 *prefix = NULL;
1578
1579#ifndef XML_XML_NAMESPACE
1580 /* xml: prefix is not really a namespace */
1581 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1582 (cur[2] == 'l') && (cur[3] == ':'))
1583 return(xmlStrdup(name));
1584#endif
1585
1586 /* nasty but valid */
1587 if (cur[0] == ':')
1588 return(xmlStrdup(name));
1589
1590 c = *cur++;
1591 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1592 buf[len++] = c;
1593 c = *cur++;
1594 }
1595 if (len >= max) {
1596 /*
1597 * Okay someone managed to make a huge name, so he's ready to pay
1598 * for the processing speed.
1599 */
1600 max = len * 2;
1601
1602 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1603 if (buffer == NULL) {
1604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1605 ctxt->sax->error(ctxt->userData,
1606 "xmlSplitQName: out of memory\n");
1607 return(NULL);
1608 }
1609 memcpy(buffer, buf, len);
1610 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1611 if (len + 10 > max) {
1612 max *= 2;
1613 buffer = (xmlChar *) xmlRealloc(buffer,
1614 max * sizeof(xmlChar));
1615 if (buffer == NULL) {
1616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1617 ctxt->sax->error(ctxt->userData,
1618 "xmlSplitQName: out of memory\n");
1619 return(NULL);
1620 }
1621 }
1622 buffer[len++] = c;
1623 c = *cur++;
1624 }
1625 buffer[len] = 0;
1626 }
1627
1628 if (buffer == NULL)
1629 ret = xmlStrndup(buf, len);
1630 else {
1631 ret = buffer;
1632 buffer = NULL;
1633 max = XML_MAX_NAMELEN;
1634 }
1635
1636
1637 if (c == ':') {
1638 c = *cur++;
1639 if (c == 0) return(ret);
1640 *prefix = ret;
1641 len = 0;
1642
1643 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1644 buf[len++] = c;
1645 c = *cur++;
1646 }
1647 if (len >= max) {
1648 /*
1649 * Okay someone managed to make a huge name, so he's ready to pay
1650 * for the processing speed.
1651 */
1652 max = len * 2;
1653
1654 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1655 if (buffer == NULL) {
1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1657 ctxt->sax->error(ctxt->userData,
1658 "xmlSplitQName: out of memory\n");
1659 return(NULL);
1660 }
1661 memcpy(buffer, buf, len);
1662 while (c != 0) { /* tested bigname2.xml */
1663 if (len + 10 > max) {
1664 max *= 2;
1665 buffer = (xmlChar *) xmlRealloc(buffer,
1666 max * sizeof(xmlChar));
1667 if (buffer == NULL) {
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "xmlSplitQName: out of memory\n");
1671 return(NULL);
1672 }
1673 }
1674 buffer[len++] = c;
1675 c = *cur++;
1676 }
1677 buffer[len] = 0;
1678 }
1679
1680 if (buffer == NULL)
1681 ret = xmlStrndup(buf, len);
1682 else {
1683 ret = buffer;
1684 }
1685 }
1686
1687 return(ret);
1688}
1689
1690/************************************************************************
1691 * *
1692 * The parser itself *
1693 * Relates to http://www.w3.org/TR/REC-xml *
1694 * *
1695 ************************************************************************/
1696
Daniel Veillard76d66f42001-05-16 21:05:17 +00001697static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001698/**
1699 * xmlParseName:
1700 * @ctxt: an XML parser context
1701 *
1702 * parse an XML name.
1703 *
1704 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1705 * CombiningChar | Extender
1706 *
1707 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1708 *
1709 * [6] Names ::= Name (S Name)*
1710 *
1711 * Returns the Name parsed or NULL
1712 */
1713
1714xmlChar *
1715xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001716 const xmlChar *in;
1717 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 int count = 0;
1719
1720 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001721
1722 /*
1723 * Accelerator for simple ASCII names
1724 */
1725 in = ctxt->input->cur;
1726 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1727 ((*in >= 0x41) && (*in <= 0x5A)) ||
1728 (*in == '_') || (*in == ':')) {
1729 in++;
1730 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1731 ((*in >= 0x41) && (*in <= 0x5A)) ||
1732 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001733 (*in == '_') || (*in == '-') ||
1734 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001735 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001736 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001737 count = in - ctxt->input->cur;
1738 ret = xmlStrndup(ctxt->input->cur, count);
1739 ctxt->input->cur = in;
1740 return(ret);
1741 }
1742 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001743 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001744}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001745
Daniel Veillard76d66f42001-05-16 21:05:17 +00001746static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001747xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1748 xmlChar buf[XML_MAX_NAMELEN + 5];
1749 int len = 0, l;
1750 int c;
1751 int count = 0;
1752
1753 /*
1754 * Handler for more complex cases
1755 */
1756 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001757 c = CUR_CHAR(l);
1758 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1759 (!IS_LETTER(c) && (c != '_') &&
1760 (c != ':'))) {
1761 return(NULL);
1762 }
1763
1764 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1765 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1766 (c == '.') || (c == '-') ||
1767 (c == '_') || (c == ':') ||
1768 (IS_COMBINING(c)) ||
1769 (IS_EXTENDER(c)))) {
1770 if (count++ > 100) {
1771 count = 0;
1772 GROW;
1773 }
1774 COPY_BUF(l,buf,len,c);
1775 NEXTL(l);
1776 c = CUR_CHAR(l);
1777 if (len >= XML_MAX_NAMELEN) {
1778 /*
1779 * Okay someone managed to make a huge name, so he's ready to pay
1780 * for the processing speed.
1781 */
1782 xmlChar *buffer;
1783 int max = len * 2;
1784
1785 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1786 if (buffer == NULL) {
1787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1788 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001789 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001790 return(NULL);
1791 }
1792 memcpy(buffer, buf, len);
1793 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1794 (c == '.') || (c == '-') ||
1795 (c == '_') || (c == ':') ||
1796 (IS_COMBINING(c)) ||
1797 (IS_EXTENDER(c))) {
1798 if (count++ > 100) {
1799 count = 0;
1800 GROW;
1801 }
1802 if (len + 10 > max) {
1803 max *= 2;
1804 buffer = (xmlChar *) xmlRealloc(buffer,
1805 max * sizeof(xmlChar));
1806 if (buffer == NULL) {
1807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1808 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001809 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001810 return(NULL);
1811 }
1812 }
1813 COPY_BUF(l,buffer,len,c);
1814 NEXTL(l);
1815 c = CUR_CHAR(l);
1816 }
1817 buffer[len] = 0;
1818 return(buffer);
1819 }
1820 }
1821 return(xmlStrndup(buf, len));
1822}
1823
1824/**
1825 * xmlParseStringName:
1826 * @ctxt: an XML parser context
1827 * @str: a pointer to the string pointer (IN/OUT)
1828 *
1829 * parse an XML name.
1830 *
1831 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1832 * CombiningChar | Extender
1833 *
1834 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1835 *
1836 * [6] Names ::= Name (S Name)*
1837 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001838 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001839 * is updated to the current location in the string.
1840 */
1841
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001842static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001843xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1844 xmlChar buf[XML_MAX_NAMELEN + 5];
1845 const xmlChar *cur = *str;
1846 int len = 0, l;
1847 int c;
1848
1849 c = CUR_SCHAR(cur, l);
1850 if (!IS_LETTER(c) && (c != '_') &&
1851 (c != ':')) {
1852 return(NULL);
1853 }
1854
1855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1856 (c == '.') || (c == '-') ||
1857 (c == '_') || (c == ':') ||
1858 (IS_COMBINING(c)) ||
1859 (IS_EXTENDER(c))) {
1860 COPY_BUF(l,buf,len,c);
1861 cur += l;
1862 c = CUR_SCHAR(cur, l);
1863 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1864 /*
1865 * Okay someone managed to make a huge name, so he's ready to pay
1866 * for the processing speed.
1867 */
1868 xmlChar *buffer;
1869 int max = len * 2;
1870
1871 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1872 if (buffer == NULL) {
1873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1874 ctxt->sax->error(ctxt->userData,
1875 "xmlParseStringName: out of memory\n");
1876 return(NULL);
1877 }
1878 memcpy(buffer, buf, len);
1879 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1880 (c == '.') || (c == '-') ||
1881 (c == '_') || (c == ':') ||
1882 (IS_COMBINING(c)) ||
1883 (IS_EXTENDER(c))) {
1884 if (len + 10 > max) {
1885 max *= 2;
1886 buffer = (xmlChar *) xmlRealloc(buffer,
1887 max * sizeof(xmlChar));
1888 if (buffer == NULL) {
1889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1890 ctxt->sax->error(ctxt->userData,
1891 "xmlParseStringName: out of memory\n");
1892 return(NULL);
1893 }
1894 }
1895 COPY_BUF(l,buffer,len,c);
1896 cur += l;
1897 c = CUR_SCHAR(cur, l);
1898 }
1899 buffer[len] = 0;
1900 *str = cur;
1901 return(buffer);
1902 }
1903 }
1904 *str = cur;
1905 return(xmlStrndup(buf, len));
1906}
1907
1908/**
1909 * xmlParseNmtoken:
1910 * @ctxt: an XML parser context
1911 *
1912 * parse an XML Nmtoken.
1913 *
1914 * [7] Nmtoken ::= (NameChar)+
1915 *
1916 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1917 *
1918 * Returns the Nmtoken parsed or NULL
1919 */
1920
1921xmlChar *
1922xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1923 xmlChar buf[XML_MAX_NAMELEN + 5];
1924 int len = 0, l;
1925 int c;
1926 int count = 0;
1927
1928 GROW;
1929 c = CUR_CHAR(l);
1930
1931 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1932 (c == '.') || (c == '-') ||
1933 (c == '_') || (c == ':') ||
1934 (IS_COMBINING(c)) ||
1935 (IS_EXTENDER(c))) {
1936 if (count++ > 100) {
1937 count = 0;
1938 GROW;
1939 }
1940 COPY_BUF(l,buf,len,c);
1941 NEXTL(l);
1942 c = CUR_CHAR(l);
1943 if (len >= XML_MAX_NAMELEN) {
1944 /*
1945 * Okay someone managed to make a huge token, so he's ready to pay
1946 * for the processing speed.
1947 */
1948 xmlChar *buffer;
1949 int max = len * 2;
1950
1951 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1952 if (buffer == NULL) {
1953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1954 ctxt->sax->error(ctxt->userData,
1955 "xmlParseNmtoken: out of memory\n");
1956 return(NULL);
1957 }
1958 memcpy(buffer, buf, len);
1959 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1960 (c == '.') || (c == '-') ||
1961 (c == '_') || (c == ':') ||
1962 (IS_COMBINING(c)) ||
1963 (IS_EXTENDER(c))) {
1964 if (count++ > 100) {
1965 count = 0;
1966 GROW;
1967 }
1968 if (len + 10 > max) {
1969 max *= 2;
1970 buffer = (xmlChar *) xmlRealloc(buffer,
1971 max * sizeof(xmlChar));
1972 if (buffer == NULL) {
1973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1974 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001975 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001976 return(NULL);
1977 }
1978 }
1979 COPY_BUF(l,buffer,len,c);
1980 NEXTL(l);
1981 c = CUR_CHAR(l);
1982 }
1983 buffer[len] = 0;
1984 return(buffer);
1985 }
1986 }
1987 if (len == 0)
1988 return(NULL);
1989 return(xmlStrndup(buf, len));
1990}
1991
1992/**
1993 * xmlParseEntityValue:
1994 * @ctxt: an XML parser context
1995 * @orig: if non-NULL store a copy of the original entity value
1996 *
1997 * parse a value for ENTITY declarations
1998 *
1999 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2000 * "'" ([^%&'] | PEReference | Reference)* "'"
2001 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002002 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002003 */
2004
2005xmlChar *
2006xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2007 xmlChar *buf = NULL;
2008 int len = 0;
2009 int size = XML_PARSER_BUFFER_SIZE;
2010 int c, l;
2011 xmlChar stop;
2012 xmlChar *ret = NULL;
2013 const xmlChar *cur = NULL;
2014 xmlParserInputPtr input;
2015
2016 if (RAW == '"') stop = '"';
2017 else if (RAW == '\'') stop = '\'';
2018 else {
2019 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2021 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2022 ctxt->wellFormed = 0;
2023 ctxt->disableSAX = 1;
2024 return(NULL);
2025 }
2026 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2027 if (buf == NULL) {
2028 xmlGenericError(xmlGenericErrorContext,
2029 "malloc of %d byte failed\n", size);
2030 return(NULL);
2031 }
2032
2033 /*
2034 * The content of the entity definition is copied in a buffer.
2035 */
2036
2037 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2038 input = ctxt->input;
2039 GROW;
2040 NEXT;
2041 c = CUR_CHAR(l);
2042 /*
2043 * NOTE: 4.4.5 Included in Literal
2044 * When a parameter entity reference appears in a literal entity
2045 * value, ... a single or double quote character in the replacement
2046 * text is always treated as a normal data character and will not
2047 * terminate the literal.
2048 * In practice it means we stop the loop only when back at parsing
2049 * the initial entity and the quote is found
2050 */
2051 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2052 (ctxt->input != input))) {
2053 if (len + 5 >= size) {
2054 size *= 2;
2055 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2056 if (buf == NULL) {
2057 xmlGenericError(xmlGenericErrorContext,
2058 "realloc of %d byte failed\n", size);
2059 return(NULL);
2060 }
2061 }
2062 COPY_BUF(l,buf,len,c);
2063 NEXTL(l);
2064 /*
2065 * Pop-up of finished entities.
2066 */
2067 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2068 xmlPopInput(ctxt);
2069
2070 GROW;
2071 c = CUR_CHAR(l);
2072 if (c == 0) {
2073 GROW;
2074 c = CUR_CHAR(l);
2075 }
2076 }
2077 buf[len] = 0;
2078
2079 /*
2080 * Raise problem w.r.t. '&' and '%' being used in non-entities
2081 * reference constructs. Note Charref will be handled in
2082 * xmlStringDecodeEntities()
2083 */
2084 cur = buf;
2085 while (*cur != 0) { /* non input consuming */
2086 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2087 xmlChar *name;
2088 xmlChar tmp = *cur;
2089
2090 cur++;
2091 name = xmlParseStringName(ctxt, &cur);
2092 if ((name == NULL) || (*cur != ';')) {
2093 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2095 ctxt->sax->error(ctxt->userData,
2096 "EntityValue: '%c' forbidden except for entities references\n",
2097 tmp);
2098 ctxt->wellFormed = 0;
2099 ctxt->disableSAX = 1;
2100 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002101 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2102 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002103 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2105 ctxt->sax->error(ctxt->userData,
2106 "EntityValue: PEReferences forbidden in internal subset\n",
2107 tmp);
2108 ctxt->wellFormed = 0;
2109 ctxt->disableSAX = 1;
2110 }
2111 if (name != NULL)
2112 xmlFree(name);
2113 }
2114 cur++;
2115 }
2116
2117 /*
2118 * Then PEReference entities are substituted.
2119 */
2120 if (c != stop) {
2121 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2123 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2124 ctxt->wellFormed = 0;
2125 ctxt->disableSAX = 1;
2126 xmlFree(buf);
2127 } else {
2128 NEXT;
2129 /*
2130 * NOTE: 4.4.7 Bypassed
2131 * When a general entity reference appears in the EntityValue in
2132 * an entity declaration, it is bypassed and left as is.
2133 * so XML_SUBSTITUTE_REF is not set here.
2134 */
2135 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2136 0, 0, 0);
2137 if (orig != NULL)
2138 *orig = buf;
2139 else
2140 xmlFree(buf);
2141 }
2142
2143 return(ret);
2144}
2145
2146/**
2147 * xmlParseAttValue:
2148 * @ctxt: an XML parser context
2149 *
2150 * parse a value for an attribute
2151 * Note: the parser won't do substitution of entities here, this
2152 * will be handled later in xmlStringGetNodeList
2153 *
2154 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2155 * "'" ([^<&'] | Reference)* "'"
2156 *
2157 * 3.3.3 Attribute-Value Normalization:
2158 * Before the value of an attribute is passed to the application or
2159 * checked for validity, the XML processor must normalize it as follows:
2160 * - a character reference is processed by appending the referenced
2161 * character to the attribute value
2162 * - an entity reference is processed by recursively processing the
2163 * replacement text of the entity
2164 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2165 * appending #x20 to the normalized value, except that only a single
2166 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2167 * parsed entity or the literal entity value of an internal parsed entity
2168 * - other characters are processed by appending them to the normalized value
2169 * If the declared value is not CDATA, then the XML processor must further
2170 * process the normalized attribute value by discarding any leading and
2171 * trailing space (#x20) characters, and by replacing sequences of space
2172 * (#x20) characters by a single space (#x20) character.
2173 * All attributes for which no declaration has been read should be treated
2174 * by a non-validating parser as if declared CDATA.
2175 *
2176 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2177 */
2178
2179xmlChar *
2180xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2181 xmlChar limit = 0;
2182 xmlChar *buf = NULL;
2183 int len = 0;
2184 int buf_size = 0;
2185 int c, l;
2186 xmlChar *current = NULL;
2187 xmlEntityPtr ent;
2188
2189
2190 SHRINK;
2191 if (NXT(0) == '"') {
2192 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2193 limit = '"';
2194 NEXT;
2195 } else if (NXT(0) == '\'') {
2196 limit = '\'';
2197 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2198 NEXT;
2199 } else {
2200 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2202 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2203 ctxt->wellFormed = 0;
2204 ctxt->disableSAX = 1;
2205 return(NULL);
2206 }
2207
2208 /*
2209 * allocate a translation buffer.
2210 */
2211 buf_size = XML_PARSER_BUFFER_SIZE;
2212 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2213 if (buf == NULL) {
2214 perror("xmlParseAttValue: malloc failed");
2215 return(NULL);
2216 }
2217
2218 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002219 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002220 */
2221 c = CUR_CHAR(l);
2222 while (((NXT(0) != limit) && /* checked */
2223 (c != '<')) || (ctxt->token != 0)) {
2224 if (c == 0) break;
2225 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002226 if (ctxt->replaceEntities) {
2227 if (len > buf_size - 10) {
2228 growBuffer(buf);
2229 }
2230 buf[len++] = '&';
2231 } else {
2232 /*
2233 * The reparsing will be done in xmlStringGetNodeList()
2234 * called by the attribute() function in SAX.c
2235 */
2236 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002237
Daniel Veillard319a7422001-09-11 09:27:09 +00002238 if (len > buf_size - 10) {
2239 growBuffer(buf);
2240 }
2241 current = &buffer[0];
2242 while (*current != 0) { /* non input consuming */
2243 buf[len++] = *current++;
2244 }
2245 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002246 }
Owen Taylor3473f882001-02-23 17:55:21 +00002247 } else if (c == '&') {
2248 if (NXT(1) == '#') {
2249 int val = xmlParseCharRef(ctxt);
2250 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002251 if (ctxt->replaceEntities) {
2252 if (len > buf_size - 10) {
2253 growBuffer(buf);
2254 }
2255 buf[len++] = '&';
2256 } else {
2257 /*
2258 * The reparsing will be done in xmlStringGetNodeList()
2259 * called by the attribute() function in SAX.c
2260 */
2261 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002262
Daniel Veillard319a7422001-09-11 09:27:09 +00002263 if (len > buf_size - 10) {
2264 growBuffer(buf);
2265 }
2266 current = &buffer[0];
2267 while (*current != 0) { /* non input consuming */
2268 buf[len++] = *current++;
2269 }
Owen Taylor3473f882001-02-23 17:55:21 +00002270 }
2271 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002272 if (len > buf_size - 10) {
2273 growBuffer(buf);
2274 }
Owen Taylor3473f882001-02-23 17:55:21 +00002275 len += xmlCopyChar(0, &buf[len], val);
2276 }
2277 } else {
2278 ent = xmlParseEntityRef(ctxt);
2279 if ((ent != NULL) &&
2280 (ctxt->replaceEntities != 0)) {
2281 xmlChar *rep;
2282
2283 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2284 rep = xmlStringDecodeEntities(ctxt, ent->content,
2285 XML_SUBSTITUTE_REF, 0, 0, 0);
2286 if (rep != NULL) {
2287 current = rep;
2288 while (*current != 0) { /* non input consuming */
2289 buf[len++] = *current++;
2290 if (len > buf_size - 10) {
2291 growBuffer(buf);
2292 }
2293 }
2294 xmlFree(rep);
2295 }
2296 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002297 if (len > buf_size - 10) {
2298 growBuffer(buf);
2299 }
Owen Taylor3473f882001-02-23 17:55:21 +00002300 if (ent->content != NULL)
2301 buf[len++] = ent->content[0];
2302 }
2303 } else if (ent != NULL) {
2304 int i = xmlStrlen(ent->name);
2305 const xmlChar *cur = ent->name;
2306
2307 /*
2308 * This may look absurd but is needed to detect
2309 * entities problems
2310 */
2311 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2312 (ent->content != NULL)) {
2313 xmlChar *rep;
2314 rep = xmlStringDecodeEntities(ctxt, ent->content,
2315 XML_SUBSTITUTE_REF, 0, 0, 0);
2316 if (rep != NULL)
2317 xmlFree(rep);
2318 }
2319
2320 /*
2321 * Just output the reference
2322 */
2323 buf[len++] = '&';
2324 if (len > buf_size - i - 10) {
2325 growBuffer(buf);
2326 }
2327 for (;i > 0;i--)
2328 buf[len++] = *cur++;
2329 buf[len++] = ';';
2330 }
2331 }
2332 } else {
2333 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2334 COPY_BUF(l,buf,len,0x20);
2335 if (len > buf_size - 10) {
2336 growBuffer(buf);
2337 }
2338 } else {
2339 COPY_BUF(l,buf,len,c);
2340 if (len > buf_size - 10) {
2341 growBuffer(buf);
2342 }
2343 }
2344 NEXTL(l);
2345 }
2346 GROW;
2347 c = CUR_CHAR(l);
2348 }
2349 buf[len++] = 0;
2350 if (RAW == '<') {
2351 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2353 ctxt->sax->error(ctxt->userData,
2354 "Unescaped '<' not allowed in attributes values\n");
2355 ctxt->wellFormed = 0;
2356 ctxt->disableSAX = 1;
2357 } else if (RAW != limit) {
2358 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2360 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2361 ctxt->wellFormed = 0;
2362 ctxt->disableSAX = 1;
2363 } else
2364 NEXT;
2365 return(buf);
2366}
2367
2368/**
2369 * xmlParseSystemLiteral:
2370 * @ctxt: an XML parser context
2371 *
2372 * parse an XML Literal
2373 *
2374 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2375 *
2376 * Returns the SystemLiteral parsed or NULL
2377 */
2378
2379xmlChar *
2380xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2381 xmlChar *buf = NULL;
2382 int len = 0;
2383 int size = XML_PARSER_BUFFER_SIZE;
2384 int cur, l;
2385 xmlChar stop;
2386 int state = ctxt->instate;
2387 int count = 0;
2388
2389 SHRINK;
2390 if (RAW == '"') {
2391 NEXT;
2392 stop = '"';
2393 } else if (RAW == '\'') {
2394 NEXT;
2395 stop = '\'';
2396 } else {
2397 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2399 ctxt->sax->error(ctxt->userData,
2400 "SystemLiteral \" or ' expected\n");
2401 ctxt->wellFormed = 0;
2402 ctxt->disableSAX = 1;
2403 return(NULL);
2404 }
2405
2406 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2407 if (buf == NULL) {
2408 xmlGenericError(xmlGenericErrorContext,
2409 "malloc of %d byte failed\n", size);
2410 return(NULL);
2411 }
2412 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2413 cur = CUR_CHAR(l);
2414 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2415 if (len + 5 >= size) {
2416 size *= 2;
2417 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2418 if (buf == NULL) {
2419 xmlGenericError(xmlGenericErrorContext,
2420 "realloc of %d byte failed\n", size);
2421 ctxt->instate = (xmlParserInputState) state;
2422 return(NULL);
2423 }
2424 }
2425 count++;
2426 if (count > 50) {
2427 GROW;
2428 count = 0;
2429 }
2430 COPY_BUF(l,buf,len,cur);
2431 NEXTL(l);
2432 cur = CUR_CHAR(l);
2433 if (cur == 0) {
2434 GROW;
2435 SHRINK;
2436 cur = CUR_CHAR(l);
2437 }
2438 }
2439 buf[len] = 0;
2440 ctxt->instate = (xmlParserInputState) state;
2441 if (!IS_CHAR(cur)) {
2442 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2444 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2445 ctxt->wellFormed = 0;
2446 ctxt->disableSAX = 1;
2447 } else {
2448 NEXT;
2449 }
2450 return(buf);
2451}
2452
2453/**
2454 * xmlParsePubidLiteral:
2455 * @ctxt: an XML parser context
2456 *
2457 * parse an XML public literal
2458 *
2459 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2460 *
2461 * Returns the PubidLiteral parsed or NULL.
2462 */
2463
2464xmlChar *
2465xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2466 xmlChar *buf = NULL;
2467 int len = 0;
2468 int size = XML_PARSER_BUFFER_SIZE;
2469 xmlChar cur;
2470 xmlChar stop;
2471 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002472 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002473
2474 SHRINK;
2475 if (RAW == '"') {
2476 NEXT;
2477 stop = '"';
2478 } else if (RAW == '\'') {
2479 NEXT;
2480 stop = '\'';
2481 } else {
2482 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2484 ctxt->sax->error(ctxt->userData,
2485 "SystemLiteral \" or ' expected\n");
2486 ctxt->wellFormed = 0;
2487 ctxt->disableSAX = 1;
2488 return(NULL);
2489 }
2490 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2491 if (buf == NULL) {
2492 xmlGenericError(xmlGenericErrorContext,
2493 "malloc of %d byte failed\n", size);
2494 return(NULL);
2495 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002496 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002497 cur = CUR;
2498 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2499 if (len + 1 >= size) {
2500 size *= 2;
2501 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2502 if (buf == NULL) {
2503 xmlGenericError(xmlGenericErrorContext,
2504 "realloc of %d byte failed\n", size);
2505 return(NULL);
2506 }
2507 }
2508 buf[len++] = cur;
2509 count++;
2510 if (count > 50) {
2511 GROW;
2512 count = 0;
2513 }
2514 NEXT;
2515 cur = CUR;
2516 if (cur == 0) {
2517 GROW;
2518 SHRINK;
2519 cur = CUR;
2520 }
2521 }
2522 buf[len] = 0;
2523 if (cur != stop) {
2524 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2526 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2527 ctxt->wellFormed = 0;
2528 ctxt->disableSAX = 1;
2529 } else {
2530 NEXT;
2531 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002532 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002533 return(buf);
2534}
2535
Daniel Veillard48b2f892001-02-25 16:11:03 +00002536void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002537/**
2538 * xmlParseCharData:
2539 * @ctxt: an XML parser context
2540 * @cdata: int indicating whether we are within a CDATA section
2541 *
2542 * parse a CharData section.
2543 * if we are within a CDATA section ']]>' marks an end of section.
2544 *
2545 * The right angle bracket (>) may be represented using the string "&gt;",
2546 * and must, for compatibility, be escaped using "&gt;" or a character
2547 * reference when it appears in the string "]]>" in content, when that
2548 * string is not marking the end of a CDATA section.
2549 *
2550 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2551 */
2552
2553void
2554xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002555 const xmlChar *in;
2556 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002557 int line = ctxt->input->line;
2558 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002559
2560 SHRINK;
2561 GROW;
2562 /*
2563 * Accelerated common case where input don't need to be
2564 * modified before passing it to the handler.
2565 */
2566 if ((ctxt->token == 0) && (!cdata)) {
2567 in = ctxt->input->cur;
2568 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002569get_more:
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002570 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
Daniel Veillard48b2f892001-02-25 16:11:03 +00002571 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2572 in++;
2573 if (*in == 0xA) {
2574 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002575 in++;
2576 while (*in == 0xA) {
2577 ctxt->input->line++;
2578 in++;
2579 }
2580 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002581 }
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002582 if (*in == ']') {
2583 if ((in[1] == ']') && (in[2] == '>')) {
2584 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2586 ctxt->sax->error(ctxt->userData,
2587 "Sequence ']]>' not allowed in content\n");
2588 ctxt->input->cur = in;
2589 ctxt->wellFormed = 0;
2590 ctxt->disableSAX = 1;
2591 return;
2592 }
2593 in++;
2594 goto get_more;
2595 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002596 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002597 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002598 if (IS_BLANK(*ctxt->input->cur)) {
2599 const xmlChar *tmp = ctxt->input->cur;
2600 ctxt->input->cur = in;
2601 if (areBlanks(ctxt, tmp, nbchar)) {
2602 if (ctxt->sax->ignorableWhitespace != NULL)
2603 ctxt->sax->ignorableWhitespace(ctxt->userData,
2604 tmp, nbchar);
2605 } else {
2606 if (ctxt->sax->characters != NULL)
2607 ctxt->sax->characters(ctxt->userData,
2608 tmp, nbchar);
2609 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002610 line = ctxt->input->line;
2611 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002612 } else {
2613 if (ctxt->sax->characters != NULL)
2614 ctxt->sax->characters(ctxt->userData,
2615 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002616 line = ctxt->input->line;
2617 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002618 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002619 }
2620 ctxt->input->cur = in;
2621 if (*in == 0xD) {
2622 in++;
2623 if (*in == 0xA) {
2624 ctxt->input->cur = in;
2625 in++;
2626 ctxt->input->line++;
2627 continue; /* while */
2628 }
2629 in--;
2630 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002631 if (*in == '<') {
2632 return;
2633 }
2634 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002635 return;
2636 }
2637 SHRINK;
2638 GROW;
2639 in = ctxt->input->cur;
2640 } while ((*in >= 0x20) && (*in <= 0x7F));
2641 nbchar = 0;
2642 }
Daniel Veillard50582112001-03-26 22:52:16 +00002643 ctxt->input->line = line;
2644 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002645 xmlParseCharDataComplex(ctxt, cdata);
2646}
2647
2648void
2649xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002650 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2651 int nbchar = 0;
2652 int cur, l;
2653 int count = 0;
2654
2655 SHRINK;
2656 GROW;
2657 cur = CUR_CHAR(l);
2658 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2659 ((cur != '&') || (ctxt->token == '&')) &&
2660 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2661 if ((cur == ']') && (NXT(1) == ']') &&
2662 (NXT(2) == '>')) {
2663 if (cdata) break;
2664 else {
2665 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2667 ctxt->sax->error(ctxt->userData,
2668 "Sequence ']]>' not allowed in content\n");
2669 /* Should this be relaxed ??? I see a "must here */
2670 ctxt->wellFormed = 0;
2671 ctxt->disableSAX = 1;
2672 }
2673 }
2674 COPY_BUF(l,buf,nbchar,cur);
2675 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2676 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002677 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002678 */
2679 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2680 if (areBlanks(ctxt, buf, nbchar)) {
2681 if (ctxt->sax->ignorableWhitespace != NULL)
2682 ctxt->sax->ignorableWhitespace(ctxt->userData,
2683 buf, nbchar);
2684 } else {
2685 if (ctxt->sax->characters != NULL)
2686 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2687 }
2688 }
2689 nbchar = 0;
2690 }
2691 count++;
2692 if (count > 50) {
2693 GROW;
2694 count = 0;
2695 }
2696 NEXTL(l);
2697 cur = CUR_CHAR(l);
2698 }
2699 if (nbchar != 0) {
2700 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002701 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002702 */
2703 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2704 if (areBlanks(ctxt, buf, nbchar)) {
2705 if (ctxt->sax->ignorableWhitespace != NULL)
2706 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2707 } else {
2708 if (ctxt->sax->characters != NULL)
2709 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2710 }
2711 }
2712 }
2713}
2714
2715/**
2716 * xmlParseExternalID:
2717 * @ctxt: an XML parser context
2718 * @publicID: a xmlChar** receiving PubidLiteral
2719 * @strict: indicate whether we should restrict parsing to only
2720 * production [75], see NOTE below
2721 *
2722 * Parse an External ID or a Public ID
2723 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002724 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002725 * 'PUBLIC' S PubidLiteral S SystemLiteral
2726 *
2727 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2728 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2729 *
2730 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2731 *
2732 * Returns the function returns SystemLiteral and in the second
2733 * case publicID receives PubidLiteral, is strict is off
2734 * it is possible to return NULL and have publicID set.
2735 */
2736
2737xmlChar *
2738xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2739 xmlChar *URI = NULL;
2740
2741 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002742
2743 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002744 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2745 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2746 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2747 SKIP(6);
2748 if (!IS_BLANK(CUR)) {
2749 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2751 ctxt->sax->error(ctxt->userData,
2752 "Space required after 'SYSTEM'\n");
2753 ctxt->wellFormed = 0;
2754 ctxt->disableSAX = 1;
2755 }
2756 SKIP_BLANKS;
2757 URI = xmlParseSystemLiteral(ctxt);
2758 if (URI == NULL) {
2759 ctxt->errNo = XML_ERR_URI_REQUIRED;
2760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2761 ctxt->sax->error(ctxt->userData,
2762 "xmlParseExternalID: SYSTEM, no URI\n");
2763 ctxt->wellFormed = 0;
2764 ctxt->disableSAX = 1;
2765 }
2766 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2767 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2768 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2769 SKIP(6);
2770 if (!IS_BLANK(CUR)) {
2771 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2773 ctxt->sax->error(ctxt->userData,
2774 "Space required after 'PUBLIC'\n");
2775 ctxt->wellFormed = 0;
2776 ctxt->disableSAX = 1;
2777 }
2778 SKIP_BLANKS;
2779 *publicID = xmlParsePubidLiteral(ctxt);
2780 if (*publicID == NULL) {
2781 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2783 ctxt->sax->error(ctxt->userData,
2784 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2785 ctxt->wellFormed = 0;
2786 ctxt->disableSAX = 1;
2787 }
2788 if (strict) {
2789 /*
2790 * We don't handle [83] so "S SystemLiteral" is required.
2791 */
2792 if (!IS_BLANK(CUR)) {
2793 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2795 ctxt->sax->error(ctxt->userData,
2796 "Space required after the Public Identifier\n");
2797 ctxt->wellFormed = 0;
2798 ctxt->disableSAX = 1;
2799 }
2800 } else {
2801 /*
2802 * We handle [83] so we return immediately, if
2803 * "S SystemLiteral" is not detected. From a purely parsing
2804 * point of view that's a nice mess.
2805 */
2806 const xmlChar *ptr;
2807 GROW;
2808
2809 ptr = CUR_PTR;
2810 if (!IS_BLANK(*ptr)) return(NULL);
2811
2812 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2813 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2814 }
2815 SKIP_BLANKS;
2816 URI = xmlParseSystemLiteral(ctxt);
2817 if (URI == NULL) {
2818 ctxt->errNo = XML_ERR_URI_REQUIRED;
2819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2820 ctxt->sax->error(ctxt->userData,
2821 "xmlParseExternalID: PUBLIC, no URI\n");
2822 ctxt->wellFormed = 0;
2823 ctxt->disableSAX = 1;
2824 }
2825 }
2826 return(URI);
2827}
2828
2829/**
2830 * xmlParseComment:
2831 * @ctxt: an XML parser context
2832 *
2833 * Skip an XML (SGML) comment <!-- .... -->
2834 * The spec says that "For compatibility, the string "--" (double-hyphen)
2835 * must not occur within comments. "
2836 *
2837 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2838 */
2839void
2840xmlParseComment(xmlParserCtxtPtr ctxt) {
2841 xmlChar *buf = NULL;
2842 int len;
2843 int size = XML_PARSER_BUFFER_SIZE;
2844 int q, ql;
2845 int r, rl;
2846 int cur, l;
2847 xmlParserInputState state;
2848 xmlParserInputPtr input = ctxt->input;
2849 int count = 0;
2850
2851 /*
2852 * Check that there is a comment right here.
2853 */
2854 if ((RAW != '<') || (NXT(1) != '!') ||
2855 (NXT(2) != '-') || (NXT(3) != '-')) return;
2856
2857 state = ctxt->instate;
2858 ctxt->instate = XML_PARSER_COMMENT;
2859 SHRINK;
2860 SKIP(4);
2861 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2862 if (buf == NULL) {
2863 xmlGenericError(xmlGenericErrorContext,
2864 "malloc of %d byte failed\n", size);
2865 ctxt->instate = state;
2866 return;
2867 }
2868 q = CUR_CHAR(ql);
2869 NEXTL(ql);
2870 r = CUR_CHAR(rl);
2871 NEXTL(rl);
2872 cur = CUR_CHAR(l);
2873 len = 0;
2874 while (IS_CHAR(cur) && /* checked */
2875 ((cur != '>') ||
2876 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002877 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002878 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2880 ctxt->sax->error(ctxt->userData,
2881 "Comment must not contain '--' (double-hyphen)`\n");
2882 ctxt->wellFormed = 0;
2883 ctxt->disableSAX = 1;
2884 }
2885 if (len + 5 >= size) {
2886 size *= 2;
2887 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2888 if (buf == NULL) {
2889 xmlGenericError(xmlGenericErrorContext,
2890 "realloc of %d byte failed\n", size);
2891 ctxt->instate = state;
2892 return;
2893 }
2894 }
2895 COPY_BUF(ql,buf,len,q);
2896 q = r;
2897 ql = rl;
2898 r = cur;
2899 rl = l;
2900
2901 count++;
2902 if (count > 50) {
2903 GROW;
2904 count = 0;
2905 }
2906 NEXTL(l);
2907 cur = CUR_CHAR(l);
2908 if (cur == 0) {
2909 SHRINK;
2910 GROW;
2911 cur = CUR_CHAR(l);
2912 }
2913 }
2914 buf[len] = 0;
2915 if (!IS_CHAR(cur)) {
2916 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2917 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2918 ctxt->sax->error(ctxt->userData,
2919 "Comment not terminated \n<!--%.50s\n", buf);
2920 ctxt->wellFormed = 0;
2921 ctxt->disableSAX = 1;
2922 xmlFree(buf);
2923 } else {
2924 if (input != ctxt->input) {
2925 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2927 ctxt->sax->error(ctxt->userData,
2928"Comment doesn't start and stop in the same entity\n");
2929 ctxt->wellFormed = 0;
2930 ctxt->disableSAX = 1;
2931 }
2932 NEXT;
2933 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2934 (!ctxt->disableSAX))
2935 ctxt->sax->comment(ctxt->userData, buf);
2936 xmlFree(buf);
2937 }
2938 ctxt->instate = state;
2939}
2940
2941/**
2942 * xmlParsePITarget:
2943 * @ctxt: an XML parser context
2944 *
2945 * parse the name of a PI
2946 *
2947 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2948 *
2949 * Returns the PITarget name or NULL
2950 */
2951
2952xmlChar *
2953xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2954 xmlChar *name;
2955
2956 name = xmlParseName(ctxt);
2957 if ((name != NULL) &&
2958 ((name[0] == 'x') || (name[0] == 'X')) &&
2959 ((name[1] == 'm') || (name[1] == 'M')) &&
2960 ((name[2] == 'l') || (name[2] == 'L'))) {
2961 int i;
2962 if ((name[0] == 'x') && (name[1] == 'm') &&
2963 (name[2] == 'l') && (name[3] == 0)) {
2964 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2966 ctxt->sax->error(ctxt->userData,
2967 "XML declaration allowed only at the start of the document\n");
2968 ctxt->wellFormed = 0;
2969 ctxt->disableSAX = 1;
2970 return(name);
2971 } else if (name[3] == 0) {
2972 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2974 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2975 ctxt->wellFormed = 0;
2976 ctxt->disableSAX = 1;
2977 return(name);
2978 }
2979 for (i = 0;;i++) {
2980 if (xmlW3CPIs[i] == NULL) break;
2981 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2982 return(name);
2983 }
2984 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2985 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2986 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002987 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002988 }
2989 }
2990 return(name);
2991}
2992
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002993#ifdef LIBXML_CATALOG_ENABLED
2994/**
2995 * xmlParseCatalogPI:
2996 * @ctxt: an XML parser context
2997 * @catalog: the PI value string
2998 *
2999 * parse an XML Catalog Processing Instruction.
3000 *
3001 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3002 *
3003 * Occurs only if allowed by the user and if happening in the Misc
3004 * part of the document before any doctype informations
3005 * This will add the given catalog to the parsing context in order
3006 * to be used if there is a resolution need further down in the document
3007 */
3008
3009static void
3010xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3011 xmlChar *URL = NULL;
3012 const xmlChar *tmp, *base;
3013 xmlChar marker;
3014
3015 tmp = catalog;
3016 while (IS_BLANK(*tmp)) tmp++;
3017 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3018 goto error;
3019 tmp += 7;
3020 while (IS_BLANK(*tmp)) tmp++;
3021 if (*tmp != '=') {
3022 return;
3023 }
3024 tmp++;
3025 while (IS_BLANK(*tmp)) tmp++;
3026 marker = *tmp;
3027 if ((marker != '\'') && (marker != '"'))
3028 goto error;
3029 tmp++;
3030 base = tmp;
3031 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3032 if (*tmp == 0)
3033 goto error;
3034 URL = xmlStrndup(base, tmp - base);
3035 tmp++;
3036 while (IS_BLANK(*tmp)) tmp++;
3037 if (*tmp != 0)
3038 goto error;
3039
3040 if (URL != NULL) {
3041 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3042 xmlFree(URL);
3043 }
3044 return;
3045
3046error:
3047 ctxt->errNo = XML_WAR_CATALOG_PI;
3048 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3049 ctxt->sax->warning(ctxt->userData,
3050 "Catalog PI syntax error: %s\n", catalog);
3051 if (URL != NULL)
3052 xmlFree(URL);
3053}
3054#endif
3055
Owen Taylor3473f882001-02-23 17:55:21 +00003056/**
3057 * xmlParsePI:
3058 * @ctxt: an XML parser context
3059 *
3060 * parse an XML Processing Instruction.
3061 *
3062 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3063 *
3064 * The processing is transfered to SAX once parsed.
3065 */
3066
3067void
3068xmlParsePI(xmlParserCtxtPtr ctxt) {
3069 xmlChar *buf = NULL;
3070 int len = 0;
3071 int size = XML_PARSER_BUFFER_SIZE;
3072 int cur, l;
3073 xmlChar *target;
3074 xmlParserInputState state;
3075 int count = 0;
3076
3077 if ((RAW == '<') && (NXT(1) == '?')) {
3078 xmlParserInputPtr input = ctxt->input;
3079 state = ctxt->instate;
3080 ctxt->instate = XML_PARSER_PI;
3081 /*
3082 * this is a Processing Instruction.
3083 */
3084 SKIP(2);
3085 SHRINK;
3086
3087 /*
3088 * Parse the target name and check for special support like
3089 * namespace.
3090 */
3091 target = xmlParsePITarget(ctxt);
3092 if (target != NULL) {
3093 if ((RAW == '?') && (NXT(1) == '>')) {
3094 if (input != ctxt->input) {
3095 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3097 ctxt->sax->error(ctxt->userData,
3098 "PI declaration doesn't start and stop in the same entity\n");
3099 ctxt->wellFormed = 0;
3100 ctxt->disableSAX = 1;
3101 }
3102 SKIP(2);
3103
3104 /*
3105 * SAX: PI detected.
3106 */
3107 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3108 (ctxt->sax->processingInstruction != NULL))
3109 ctxt->sax->processingInstruction(ctxt->userData,
3110 target, NULL);
3111 ctxt->instate = state;
3112 xmlFree(target);
3113 return;
3114 }
3115 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3116 if (buf == NULL) {
3117 xmlGenericError(xmlGenericErrorContext,
3118 "malloc of %d byte failed\n", size);
3119 ctxt->instate = state;
3120 return;
3121 }
3122 cur = CUR;
3123 if (!IS_BLANK(cur)) {
3124 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3126 ctxt->sax->error(ctxt->userData,
3127 "xmlParsePI: PI %s space expected\n", target);
3128 ctxt->wellFormed = 0;
3129 ctxt->disableSAX = 1;
3130 }
3131 SKIP_BLANKS;
3132 cur = CUR_CHAR(l);
3133 while (IS_CHAR(cur) && /* checked */
3134 ((cur != '?') || (NXT(1) != '>'))) {
3135 if (len + 5 >= size) {
3136 size *= 2;
3137 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3138 if (buf == NULL) {
3139 xmlGenericError(xmlGenericErrorContext,
3140 "realloc of %d byte failed\n", size);
3141 ctxt->instate = state;
3142 return;
3143 }
3144 }
3145 count++;
3146 if (count > 50) {
3147 GROW;
3148 count = 0;
3149 }
3150 COPY_BUF(l,buf,len,cur);
3151 NEXTL(l);
3152 cur = CUR_CHAR(l);
3153 if (cur == 0) {
3154 SHRINK;
3155 GROW;
3156 cur = CUR_CHAR(l);
3157 }
3158 }
3159 buf[len] = 0;
3160 if (cur != '?') {
3161 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3163 ctxt->sax->error(ctxt->userData,
3164 "xmlParsePI: PI %s never end ...\n", target);
3165 ctxt->wellFormed = 0;
3166 ctxt->disableSAX = 1;
3167 } else {
3168 if (input != ctxt->input) {
3169 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3171 ctxt->sax->error(ctxt->userData,
3172 "PI declaration doesn't start and stop in the same entity\n");
3173 ctxt->wellFormed = 0;
3174 ctxt->disableSAX = 1;
3175 }
3176 SKIP(2);
3177
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003178#ifdef LIBXML_CATALOG_ENABLED
3179 if (((state == XML_PARSER_MISC) ||
3180 (state == XML_PARSER_START)) &&
3181 (xmlStrEqual(target, XML_CATALOG_PI))) {
3182 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3183 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3184 (allow == XML_CATA_ALLOW_ALL))
3185 xmlParseCatalogPI(ctxt, buf);
3186 }
3187#endif
3188
3189
Owen Taylor3473f882001-02-23 17:55:21 +00003190 /*
3191 * SAX: PI detected.
3192 */
3193 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3194 (ctxt->sax->processingInstruction != NULL))
3195 ctxt->sax->processingInstruction(ctxt->userData,
3196 target, buf);
3197 }
3198 xmlFree(buf);
3199 xmlFree(target);
3200 } else {
3201 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3203 ctxt->sax->error(ctxt->userData,
3204 "xmlParsePI : no target name\n");
3205 ctxt->wellFormed = 0;
3206 ctxt->disableSAX = 1;
3207 }
3208 ctxt->instate = state;
3209 }
3210}
3211
3212/**
3213 * xmlParseNotationDecl:
3214 * @ctxt: an XML parser context
3215 *
3216 * parse a notation declaration
3217 *
3218 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3219 *
3220 * Hence there is actually 3 choices:
3221 * 'PUBLIC' S PubidLiteral
3222 * 'PUBLIC' S PubidLiteral S SystemLiteral
3223 * and 'SYSTEM' S SystemLiteral
3224 *
3225 * See the NOTE on xmlParseExternalID().
3226 */
3227
3228void
3229xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3230 xmlChar *name;
3231 xmlChar *Pubid;
3232 xmlChar *Systemid;
3233
3234 if ((RAW == '<') && (NXT(1) == '!') &&
3235 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3236 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3237 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3238 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3239 xmlParserInputPtr input = ctxt->input;
3240 SHRINK;
3241 SKIP(10);
3242 if (!IS_BLANK(CUR)) {
3243 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3245 ctxt->sax->error(ctxt->userData,
3246 "Space required after '<!NOTATION'\n");
3247 ctxt->wellFormed = 0;
3248 ctxt->disableSAX = 1;
3249 return;
3250 }
3251 SKIP_BLANKS;
3252
Daniel Veillard76d66f42001-05-16 21:05:17 +00003253 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003254 if (name == NULL) {
3255 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3257 ctxt->sax->error(ctxt->userData,
3258 "NOTATION: Name expected here\n");
3259 ctxt->wellFormed = 0;
3260 ctxt->disableSAX = 1;
3261 return;
3262 }
3263 if (!IS_BLANK(CUR)) {
3264 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3266 ctxt->sax->error(ctxt->userData,
3267 "Space required after the NOTATION name'\n");
3268 ctxt->wellFormed = 0;
3269 ctxt->disableSAX = 1;
3270 return;
3271 }
3272 SKIP_BLANKS;
3273
3274 /*
3275 * Parse the IDs.
3276 */
3277 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3278 SKIP_BLANKS;
3279
3280 if (RAW == '>') {
3281 if (input != ctxt->input) {
3282 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3284 ctxt->sax->error(ctxt->userData,
3285"Notation declaration doesn't start and stop in the same entity\n");
3286 ctxt->wellFormed = 0;
3287 ctxt->disableSAX = 1;
3288 }
3289 NEXT;
3290 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3291 (ctxt->sax->notationDecl != NULL))
3292 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3293 } else {
3294 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3295 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3296 ctxt->sax->error(ctxt->userData,
3297 "'>' required to close NOTATION declaration\n");
3298 ctxt->wellFormed = 0;
3299 ctxt->disableSAX = 1;
3300 }
3301 xmlFree(name);
3302 if (Systemid != NULL) xmlFree(Systemid);
3303 if (Pubid != NULL) xmlFree(Pubid);
3304 }
3305}
3306
3307/**
3308 * xmlParseEntityDecl:
3309 * @ctxt: an XML parser context
3310 *
3311 * parse <!ENTITY declarations
3312 *
3313 * [70] EntityDecl ::= GEDecl | PEDecl
3314 *
3315 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3316 *
3317 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3318 *
3319 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3320 *
3321 * [74] PEDef ::= EntityValue | ExternalID
3322 *
3323 * [76] NDataDecl ::= S 'NDATA' S Name
3324 *
3325 * [ VC: Notation Declared ]
3326 * The Name must match the declared name of a notation.
3327 */
3328
3329void
3330xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3331 xmlChar *name = NULL;
3332 xmlChar *value = NULL;
3333 xmlChar *URI = NULL, *literal = NULL;
3334 xmlChar *ndata = NULL;
3335 int isParameter = 0;
3336 xmlChar *orig = NULL;
3337
3338 GROW;
3339 if ((RAW == '<') && (NXT(1) == '!') &&
3340 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3341 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3342 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3343 xmlParserInputPtr input = ctxt->input;
3344 ctxt->instate = XML_PARSER_ENTITY_DECL;
3345 SHRINK;
3346 SKIP(8);
3347 if (!IS_BLANK(CUR)) {
3348 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3350 ctxt->sax->error(ctxt->userData,
3351 "Space required after '<!ENTITY'\n");
3352 ctxt->wellFormed = 0;
3353 ctxt->disableSAX = 1;
3354 }
3355 SKIP_BLANKS;
3356
3357 if (RAW == '%') {
3358 NEXT;
3359 if (!IS_BLANK(CUR)) {
3360 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3362 ctxt->sax->error(ctxt->userData,
3363 "Space required after '%'\n");
3364 ctxt->wellFormed = 0;
3365 ctxt->disableSAX = 1;
3366 }
3367 SKIP_BLANKS;
3368 isParameter = 1;
3369 }
3370
Daniel Veillard76d66f42001-05-16 21:05:17 +00003371 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003372 if (name == NULL) {
3373 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3375 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3376 ctxt->wellFormed = 0;
3377 ctxt->disableSAX = 1;
3378 return;
3379 }
3380 if (!IS_BLANK(CUR)) {
3381 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3383 ctxt->sax->error(ctxt->userData,
3384 "Space required after the entity name\n");
3385 ctxt->wellFormed = 0;
3386 ctxt->disableSAX = 1;
3387 }
3388 SKIP_BLANKS;
3389
3390 /*
3391 * handle the various case of definitions...
3392 */
3393 if (isParameter) {
3394 if ((RAW == '"') || (RAW == '\'')) {
3395 value = xmlParseEntityValue(ctxt, &orig);
3396 if (value) {
3397 if ((ctxt->sax != NULL) &&
3398 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3399 ctxt->sax->entityDecl(ctxt->userData, name,
3400 XML_INTERNAL_PARAMETER_ENTITY,
3401 NULL, NULL, value);
3402 }
3403 } else {
3404 URI = xmlParseExternalID(ctxt, &literal, 1);
3405 if ((URI == NULL) && (literal == NULL)) {
3406 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3408 ctxt->sax->error(ctxt->userData,
3409 "Entity value required\n");
3410 ctxt->wellFormed = 0;
3411 ctxt->disableSAX = 1;
3412 }
3413 if (URI) {
3414 xmlURIPtr uri;
3415
3416 uri = xmlParseURI((const char *) URI);
3417 if (uri == NULL) {
3418 ctxt->errNo = XML_ERR_INVALID_URI;
3419 if ((ctxt->sax != NULL) &&
3420 (!ctxt->disableSAX) &&
3421 (ctxt->sax->error != NULL))
3422 ctxt->sax->error(ctxt->userData,
3423 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003424 /*
3425 * This really ought to be a well formedness error
3426 * but the XML Core WG decided otherwise c.f. issue
3427 * E26 of the XML erratas.
3428 */
Owen Taylor3473f882001-02-23 17:55:21 +00003429 } else {
3430 if (uri->fragment != NULL) {
3431 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3432 if ((ctxt->sax != NULL) &&
3433 (!ctxt->disableSAX) &&
3434 (ctxt->sax->error != NULL))
3435 ctxt->sax->error(ctxt->userData,
3436 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003437 /*
3438 * Okay this is foolish to block those but not
3439 * invalid URIs.
3440 */
Owen Taylor3473f882001-02-23 17:55:21 +00003441 ctxt->wellFormed = 0;
3442 } else {
3443 if ((ctxt->sax != NULL) &&
3444 (!ctxt->disableSAX) &&
3445 (ctxt->sax->entityDecl != NULL))
3446 ctxt->sax->entityDecl(ctxt->userData, name,
3447 XML_EXTERNAL_PARAMETER_ENTITY,
3448 literal, URI, NULL);
3449 }
3450 xmlFreeURI(uri);
3451 }
3452 }
3453 }
3454 } else {
3455 if ((RAW == '"') || (RAW == '\'')) {
3456 value = xmlParseEntityValue(ctxt, &orig);
3457 if ((ctxt->sax != NULL) &&
3458 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3459 ctxt->sax->entityDecl(ctxt->userData, name,
3460 XML_INTERNAL_GENERAL_ENTITY,
3461 NULL, NULL, value);
3462 } else {
3463 URI = xmlParseExternalID(ctxt, &literal, 1);
3464 if ((URI == NULL) && (literal == NULL)) {
3465 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3467 ctxt->sax->error(ctxt->userData,
3468 "Entity value required\n");
3469 ctxt->wellFormed = 0;
3470 ctxt->disableSAX = 1;
3471 }
3472 if (URI) {
3473 xmlURIPtr uri;
3474
3475 uri = xmlParseURI((const char *)URI);
3476 if (uri == NULL) {
3477 ctxt->errNo = XML_ERR_INVALID_URI;
3478 if ((ctxt->sax != NULL) &&
3479 (!ctxt->disableSAX) &&
3480 (ctxt->sax->error != NULL))
3481 ctxt->sax->error(ctxt->userData,
3482 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003483 /*
3484 * This really ought to be a well formedness error
3485 * but the XML Core WG decided otherwise c.f. issue
3486 * E26 of the XML erratas.
3487 */
Owen Taylor3473f882001-02-23 17:55:21 +00003488 } else {
3489 if (uri->fragment != NULL) {
3490 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3491 if ((ctxt->sax != NULL) &&
3492 (!ctxt->disableSAX) &&
3493 (ctxt->sax->error != NULL))
3494 ctxt->sax->error(ctxt->userData,
3495 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003496 /*
3497 * Okay this is foolish to block those but not
3498 * invalid URIs.
3499 */
Owen Taylor3473f882001-02-23 17:55:21 +00003500 ctxt->wellFormed = 0;
3501 }
3502 xmlFreeURI(uri);
3503 }
3504 }
3505 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3506 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3508 ctxt->sax->error(ctxt->userData,
3509 "Space required before 'NDATA'\n");
3510 ctxt->wellFormed = 0;
3511 ctxt->disableSAX = 1;
3512 }
3513 SKIP_BLANKS;
3514 if ((RAW == 'N') && (NXT(1) == 'D') &&
3515 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3516 (NXT(4) == 'A')) {
3517 SKIP(5);
3518 if (!IS_BLANK(CUR)) {
3519 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3521 ctxt->sax->error(ctxt->userData,
3522 "Space required after 'NDATA'\n");
3523 ctxt->wellFormed = 0;
3524 ctxt->disableSAX = 1;
3525 }
3526 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003527 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003528 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3529 (ctxt->sax->unparsedEntityDecl != NULL))
3530 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3531 literal, URI, ndata);
3532 } else {
3533 if ((ctxt->sax != NULL) &&
3534 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3535 ctxt->sax->entityDecl(ctxt->userData, name,
3536 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3537 literal, URI, NULL);
3538 }
3539 }
3540 }
3541 SKIP_BLANKS;
3542 if (RAW != '>') {
3543 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3545 ctxt->sax->error(ctxt->userData,
3546 "xmlParseEntityDecl: entity %s not terminated\n", name);
3547 ctxt->wellFormed = 0;
3548 ctxt->disableSAX = 1;
3549 } else {
3550 if (input != ctxt->input) {
3551 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3553 ctxt->sax->error(ctxt->userData,
3554"Entity declaration doesn't start and stop in the same entity\n");
3555 ctxt->wellFormed = 0;
3556 ctxt->disableSAX = 1;
3557 }
3558 NEXT;
3559 }
3560 if (orig != NULL) {
3561 /*
3562 * Ugly mechanism to save the raw entity value.
3563 */
3564 xmlEntityPtr cur = NULL;
3565
3566 if (isParameter) {
3567 if ((ctxt->sax != NULL) &&
3568 (ctxt->sax->getParameterEntity != NULL))
3569 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3570 } else {
3571 if ((ctxt->sax != NULL) &&
3572 (ctxt->sax->getEntity != NULL))
3573 cur = ctxt->sax->getEntity(ctxt->userData, name);
3574 }
3575 if (cur != NULL) {
3576 if (cur->orig != NULL)
3577 xmlFree(orig);
3578 else
3579 cur->orig = orig;
3580 } else
3581 xmlFree(orig);
3582 }
3583 if (name != NULL) xmlFree(name);
3584 if (value != NULL) xmlFree(value);
3585 if (URI != NULL) xmlFree(URI);
3586 if (literal != NULL) xmlFree(literal);
3587 if (ndata != NULL) xmlFree(ndata);
3588 }
3589}
3590
3591/**
3592 * xmlParseDefaultDecl:
3593 * @ctxt: an XML parser context
3594 * @value: Receive a possible fixed default value for the attribute
3595 *
3596 * Parse an attribute default declaration
3597 *
3598 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3599 *
3600 * [ VC: Required Attribute ]
3601 * if the default declaration is the keyword #REQUIRED, then the
3602 * attribute must be specified for all elements of the type in the
3603 * attribute-list declaration.
3604 *
3605 * [ VC: Attribute Default Legal ]
3606 * The declared default value must meet the lexical constraints of
3607 * the declared attribute type c.f. xmlValidateAttributeDecl()
3608 *
3609 * [ VC: Fixed Attribute Default ]
3610 * if an attribute has a default value declared with the #FIXED
3611 * keyword, instances of that attribute must match the default value.
3612 *
3613 * [ WFC: No < in Attribute Values ]
3614 * handled in xmlParseAttValue()
3615 *
3616 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3617 * or XML_ATTRIBUTE_FIXED.
3618 */
3619
3620int
3621xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3622 int val;
3623 xmlChar *ret;
3624
3625 *value = NULL;
3626 if ((RAW == '#') && (NXT(1) == 'R') &&
3627 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3628 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3629 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3630 (NXT(8) == 'D')) {
3631 SKIP(9);
3632 return(XML_ATTRIBUTE_REQUIRED);
3633 }
3634 if ((RAW == '#') && (NXT(1) == 'I') &&
3635 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3636 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3637 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3638 SKIP(8);
3639 return(XML_ATTRIBUTE_IMPLIED);
3640 }
3641 val = XML_ATTRIBUTE_NONE;
3642 if ((RAW == '#') && (NXT(1) == 'F') &&
3643 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3644 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3645 SKIP(6);
3646 val = XML_ATTRIBUTE_FIXED;
3647 if (!IS_BLANK(CUR)) {
3648 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3650 ctxt->sax->error(ctxt->userData,
3651 "Space required after '#FIXED'\n");
3652 ctxt->wellFormed = 0;
3653 ctxt->disableSAX = 1;
3654 }
3655 SKIP_BLANKS;
3656 }
3657 ret = xmlParseAttValue(ctxt);
3658 ctxt->instate = XML_PARSER_DTD;
3659 if (ret == NULL) {
3660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3661 ctxt->sax->error(ctxt->userData,
3662 "Attribute default value declaration error\n");
3663 ctxt->wellFormed = 0;
3664 ctxt->disableSAX = 1;
3665 } else
3666 *value = ret;
3667 return(val);
3668}
3669
3670/**
3671 * xmlParseNotationType:
3672 * @ctxt: an XML parser context
3673 *
3674 * parse an Notation attribute type.
3675 *
3676 * Note: the leading 'NOTATION' S part has already being parsed...
3677 *
3678 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3679 *
3680 * [ VC: Notation Attributes ]
3681 * Values of this type must match one of the notation names included
3682 * in the declaration; all notation names in the declaration must be declared.
3683 *
3684 * Returns: the notation attribute tree built while parsing
3685 */
3686
3687xmlEnumerationPtr
3688xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3689 xmlChar *name;
3690 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3691
3692 if (RAW != '(') {
3693 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3695 ctxt->sax->error(ctxt->userData,
3696 "'(' required to start 'NOTATION'\n");
3697 ctxt->wellFormed = 0;
3698 ctxt->disableSAX = 1;
3699 return(NULL);
3700 }
3701 SHRINK;
3702 do {
3703 NEXT;
3704 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003705 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003706 if (name == NULL) {
3707 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3709 ctxt->sax->error(ctxt->userData,
3710 "Name expected in NOTATION declaration\n");
3711 ctxt->wellFormed = 0;
3712 ctxt->disableSAX = 1;
3713 return(ret);
3714 }
3715 cur = xmlCreateEnumeration(name);
3716 xmlFree(name);
3717 if (cur == NULL) return(ret);
3718 if (last == NULL) ret = last = cur;
3719 else {
3720 last->next = cur;
3721 last = cur;
3722 }
3723 SKIP_BLANKS;
3724 } while (RAW == '|');
3725 if (RAW != ')') {
3726 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3728 ctxt->sax->error(ctxt->userData,
3729 "')' required to finish NOTATION declaration\n");
3730 ctxt->wellFormed = 0;
3731 ctxt->disableSAX = 1;
3732 if ((last != NULL) && (last != ret))
3733 xmlFreeEnumeration(last);
3734 return(ret);
3735 }
3736 NEXT;
3737 return(ret);
3738}
3739
3740/**
3741 * xmlParseEnumerationType:
3742 * @ctxt: an XML parser context
3743 *
3744 * parse an Enumeration attribute type.
3745 *
3746 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3747 *
3748 * [ VC: Enumeration ]
3749 * Values of this type must match one of the Nmtoken tokens in
3750 * the declaration
3751 *
3752 * Returns: the enumeration attribute tree built while parsing
3753 */
3754
3755xmlEnumerationPtr
3756xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3757 xmlChar *name;
3758 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3759
3760 if (RAW != '(') {
3761 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3763 ctxt->sax->error(ctxt->userData,
3764 "'(' required to start ATTLIST enumeration\n");
3765 ctxt->wellFormed = 0;
3766 ctxt->disableSAX = 1;
3767 return(NULL);
3768 }
3769 SHRINK;
3770 do {
3771 NEXT;
3772 SKIP_BLANKS;
3773 name = xmlParseNmtoken(ctxt);
3774 if (name == NULL) {
3775 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3777 ctxt->sax->error(ctxt->userData,
3778 "NmToken expected in ATTLIST enumeration\n");
3779 ctxt->wellFormed = 0;
3780 ctxt->disableSAX = 1;
3781 return(ret);
3782 }
3783 cur = xmlCreateEnumeration(name);
3784 xmlFree(name);
3785 if (cur == NULL) return(ret);
3786 if (last == NULL) ret = last = cur;
3787 else {
3788 last->next = cur;
3789 last = cur;
3790 }
3791 SKIP_BLANKS;
3792 } while (RAW == '|');
3793 if (RAW != ')') {
3794 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3796 ctxt->sax->error(ctxt->userData,
3797 "')' required to finish ATTLIST enumeration\n");
3798 ctxt->wellFormed = 0;
3799 ctxt->disableSAX = 1;
3800 return(ret);
3801 }
3802 NEXT;
3803 return(ret);
3804}
3805
3806/**
3807 * xmlParseEnumeratedType:
3808 * @ctxt: an XML parser context
3809 * @tree: the enumeration tree built while parsing
3810 *
3811 * parse an Enumerated attribute type.
3812 *
3813 * [57] EnumeratedType ::= NotationType | Enumeration
3814 *
3815 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3816 *
3817 *
3818 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3819 */
3820
3821int
3822xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3823 if ((RAW == 'N') && (NXT(1) == 'O') &&
3824 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3825 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3826 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3827 SKIP(8);
3828 if (!IS_BLANK(CUR)) {
3829 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3831 ctxt->sax->error(ctxt->userData,
3832 "Space required after 'NOTATION'\n");
3833 ctxt->wellFormed = 0;
3834 ctxt->disableSAX = 1;
3835 return(0);
3836 }
3837 SKIP_BLANKS;
3838 *tree = xmlParseNotationType(ctxt);
3839 if (*tree == NULL) return(0);
3840 return(XML_ATTRIBUTE_NOTATION);
3841 }
3842 *tree = xmlParseEnumerationType(ctxt);
3843 if (*tree == NULL) return(0);
3844 return(XML_ATTRIBUTE_ENUMERATION);
3845}
3846
3847/**
3848 * xmlParseAttributeType:
3849 * @ctxt: an XML parser context
3850 * @tree: the enumeration tree built while parsing
3851 *
3852 * parse the Attribute list def for an element
3853 *
3854 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3855 *
3856 * [55] StringType ::= 'CDATA'
3857 *
3858 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3859 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3860 *
3861 * Validity constraints for attribute values syntax are checked in
3862 * xmlValidateAttributeValue()
3863 *
3864 * [ VC: ID ]
3865 * Values of type ID must match the Name production. A name must not
3866 * appear more than once in an XML document as a value of this type;
3867 * i.e., ID values must uniquely identify the elements which bear them.
3868 *
3869 * [ VC: One ID per Element Type ]
3870 * No element type may have more than one ID attribute specified.
3871 *
3872 * [ VC: ID Attribute Default ]
3873 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3874 *
3875 * [ VC: IDREF ]
3876 * Values of type IDREF must match the Name production, and values
3877 * of type IDREFS must match Names; each IDREF Name must match the value
3878 * of an ID attribute on some element in the XML document; i.e. IDREF
3879 * values must match the value of some ID attribute.
3880 *
3881 * [ VC: Entity Name ]
3882 * Values of type ENTITY must match the Name production, values
3883 * of type ENTITIES must match Names; each Entity Name must match the
3884 * name of an unparsed entity declared in the DTD.
3885 *
3886 * [ VC: Name Token ]
3887 * Values of type NMTOKEN must match the Nmtoken production; values
3888 * of type NMTOKENS must match Nmtokens.
3889 *
3890 * Returns the attribute type
3891 */
3892int
3893xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3894 SHRINK;
3895 if ((RAW == 'C') && (NXT(1) == 'D') &&
3896 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3897 (NXT(4) == 'A')) {
3898 SKIP(5);
3899 return(XML_ATTRIBUTE_CDATA);
3900 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3901 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3902 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3903 SKIP(6);
3904 return(XML_ATTRIBUTE_IDREFS);
3905 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3906 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3907 (NXT(4) == 'F')) {
3908 SKIP(5);
3909 return(XML_ATTRIBUTE_IDREF);
3910 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3911 SKIP(2);
3912 return(XML_ATTRIBUTE_ID);
3913 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3914 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3915 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3916 SKIP(6);
3917 return(XML_ATTRIBUTE_ENTITY);
3918 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3919 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3920 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3921 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3922 SKIP(8);
3923 return(XML_ATTRIBUTE_ENTITIES);
3924 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3925 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3926 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3927 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3928 SKIP(8);
3929 return(XML_ATTRIBUTE_NMTOKENS);
3930 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3931 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3932 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3933 (NXT(6) == 'N')) {
3934 SKIP(7);
3935 return(XML_ATTRIBUTE_NMTOKEN);
3936 }
3937 return(xmlParseEnumeratedType(ctxt, tree));
3938}
3939
3940/**
3941 * xmlParseAttributeListDecl:
3942 * @ctxt: an XML parser context
3943 *
3944 * : parse the Attribute list def for an element
3945 *
3946 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3947 *
3948 * [53] AttDef ::= S Name S AttType S DefaultDecl
3949 *
3950 */
3951void
3952xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3953 xmlChar *elemName;
3954 xmlChar *attrName;
3955 xmlEnumerationPtr tree;
3956
3957 if ((RAW == '<') && (NXT(1) == '!') &&
3958 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3959 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3960 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3961 (NXT(8) == 'T')) {
3962 xmlParserInputPtr input = ctxt->input;
3963
3964 SKIP(9);
3965 if (!IS_BLANK(CUR)) {
3966 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3967 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3968 ctxt->sax->error(ctxt->userData,
3969 "Space required after '<!ATTLIST'\n");
3970 ctxt->wellFormed = 0;
3971 ctxt->disableSAX = 1;
3972 }
3973 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003974 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003975 if (elemName == NULL) {
3976 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3978 ctxt->sax->error(ctxt->userData,
3979 "ATTLIST: no name for Element\n");
3980 ctxt->wellFormed = 0;
3981 ctxt->disableSAX = 1;
3982 return;
3983 }
3984 SKIP_BLANKS;
3985 GROW;
3986 while (RAW != '>') {
3987 const xmlChar *check = CUR_PTR;
3988 int type;
3989 int def;
3990 xmlChar *defaultValue = NULL;
3991
3992 GROW;
3993 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003994 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003995 if (attrName == NULL) {
3996 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3998 ctxt->sax->error(ctxt->userData,
3999 "ATTLIST: no name for Attribute\n");
4000 ctxt->wellFormed = 0;
4001 ctxt->disableSAX = 1;
4002 break;
4003 }
4004 GROW;
4005 if (!IS_BLANK(CUR)) {
4006 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4008 ctxt->sax->error(ctxt->userData,
4009 "Space required after the attribute name\n");
4010 ctxt->wellFormed = 0;
4011 ctxt->disableSAX = 1;
4012 if (attrName != NULL)
4013 xmlFree(attrName);
4014 if (defaultValue != NULL)
4015 xmlFree(defaultValue);
4016 break;
4017 }
4018 SKIP_BLANKS;
4019
4020 type = xmlParseAttributeType(ctxt, &tree);
4021 if (type <= 0) {
4022 if (attrName != NULL)
4023 xmlFree(attrName);
4024 if (defaultValue != NULL)
4025 xmlFree(defaultValue);
4026 break;
4027 }
4028
4029 GROW;
4030 if (!IS_BLANK(CUR)) {
4031 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4033 ctxt->sax->error(ctxt->userData,
4034 "Space required after the attribute type\n");
4035 ctxt->wellFormed = 0;
4036 ctxt->disableSAX = 1;
4037 if (attrName != NULL)
4038 xmlFree(attrName);
4039 if (defaultValue != NULL)
4040 xmlFree(defaultValue);
4041 if (tree != NULL)
4042 xmlFreeEnumeration(tree);
4043 break;
4044 }
4045 SKIP_BLANKS;
4046
4047 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4048 if (def <= 0) {
4049 if (attrName != NULL)
4050 xmlFree(attrName);
4051 if (defaultValue != NULL)
4052 xmlFree(defaultValue);
4053 if (tree != NULL)
4054 xmlFreeEnumeration(tree);
4055 break;
4056 }
4057
4058 GROW;
4059 if (RAW != '>') {
4060 if (!IS_BLANK(CUR)) {
4061 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4063 ctxt->sax->error(ctxt->userData,
4064 "Space required after the attribute default value\n");
4065 ctxt->wellFormed = 0;
4066 ctxt->disableSAX = 1;
4067 if (attrName != NULL)
4068 xmlFree(attrName);
4069 if (defaultValue != NULL)
4070 xmlFree(defaultValue);
4071 if (tree != NULL)
4072 xmlFreeEnumeration(tree);
4073 break;
4074 }
4075 SKIP_BLANKS;
4076 }
4077 if (check == CUR_PTR) {
4078 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4080 ctxt->sax->error(ctxt->userData,
4081 "xmlParseAttributeListDecl: detected internal error\n");
4082 if (attrName != NULL)
4083 xmlFree(attrName);
4084 if (defaultValue != NULL)
4085 xmlFree(defaultValue);
4086 if (tree != NULL)
4087 xmlFreeEnumeration(tree);
4088 break;
4089 }
4090 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4091 (ctxt->sax->attributeDecl != NULL))
4092 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4093 type, def, defaultValue, tree);
4094 if (attrName != NULL)
4095 xmlFree(attrName);
4096 if (defaultValue != NULL)
4097 xmlFree(defaultValue);
4098 GROW;
4099 }
4100 if (RAW == '>') {
4101 if (input != ctxt->input) {
4102 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4104 ctxt->sax->error(ctxt->userData,
4105"Attribute list declaration doesn't start and stop in the same entity\n");
4106 ctxt->wellFormed = 0;
4107 ctxt->disableSAX = 1;
4108 }
4109 NEXT;
4110 }
4111
4112 xmlFree(elemName);
4113 }
4114}
4115
4116/**
4117 * xmlParseElementMixedContentDecl:
4118 * @ctxt: an XML parser context
4119 *
4120 * parse the declaration for a Mixed Element content
4121 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4122 *
4123 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4124 * '(' S? '#PCDATA' S? ')'
4125 *
4126 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4127 *
4128 * [ VC: No Duplicate Types ]
4129 * The same name must not appear more than once in a single
4130 * mixed-content declaration.
4131 *
4132 * returns: the list of the xmlElementContentPtr describing the element choices
4133 */
4134xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004135xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004136 xmlElementContentPtr ret = NULL, cur = NULL, n;
4137 xmlChar *elem = NULL;
4138
4139 GROW;
4140 if ((RAW == '#') && (NXT(1) == 'P') &&
4141 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4142 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4143 (NXT(6) == 'A')) {
4144 SKIP(7);
4145 SKIP_BLANKS;
4146 SHRINK;
4147 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004148 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4149 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4150 if (ctxt->vctxt.error != NULL)
4151 ctxt->vctxt.error(ctxt->vctxt.userData,
4152"Element content declaration doesn't start and stop in the same entity\n");
4153 ctxt->valid = 0;
4154 }
Owen Taylor3473f882001-02-23 17:55:21 +00004155 NEXT;
4156 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4157 if (RAW == '*') {
4158 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4159 NEXT;
4160 }
4161 return(ret);
4162 }
4163 if ((RAW == '(') || (RAW == '|')) {
4164 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4165 if (ret == NULL) return(NULL);
4166 }
4167 while (RAW == '|') {
4168 NEXT;
4169 if (elem == NULL) {
4170 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4171 if (ret == NULL) return(NULL);
4172 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004173 if (cur != NULL)
4174 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004175 cur = ret;
4176 } else {
4177 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4178 if (n == NULL) return(NULL);
4179 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004180 if (n->c1 != NULL)
4181 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004182 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004183 if (n != NULL)
4184 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004185 cur = n;
4186 xmlFree(elem);
4187 }
4188 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004189 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004190 if (elem == NULL) {
4191 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4193 ctxt->sax->error(ctxt->userData,
4194 "xmlParseElementMixedContentDecl : Name expected\n");
4195 ctxt->wellFormed = 0;
4196 ctxt->disableSAX = 1;
4197 xmlFreeElementContent(cur);
4198 return(NULL);
4199 }
4200 SKIP_BLANKS;
4201 GROW;
4202 }
4203 if ((RAW == ')') && (NXT(1) == '*')) {
4204 if (elem != NULL) {
4205 cur->c2 = xmlNewElementContent(elem,
4206 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004207 if (cur->c2 != NULL)
4208 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004209 xmlFree(elem);
4210 }
4211 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004212 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4213 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4214 if (ctxt->vctxt.error != NULL)
4215 ctxt->vctxt.error(ctxt->vctxt.userData,
4216"Element content declaration doesn't start and stop in the same entity\n");
4217 ctxt->valid = 0;
4218 }
Owen Taylor3473f882001-02-23 17:55:21 +00004219 SKIP(2);
4220 } else {
4221 if (elem != NULL) xmlFree(elem);
4222 xmlFreeElementContent(ret);
4223 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4225 ctxt->sax->error(ctxt->userData,
4226 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4227 ctxt->wellFormed = 0;
4228 ctxt->disableSAX = 1;
4229 return(NULL);
4230 }
4231
4232 } else {
4233 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4234 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4235 ctxt->sax->error(ctxt->userData,
4236 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4237 ctxt->wellFormed = 0;
4238 ctxt->disableSAX = 1;
4239 }
4240 return(ret);
4241}
4242
4243/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004244 * xmlParseElementChildrenContentD:
4245 * @ctxt: an XML parser context
4246 *
4247 * VMS version of xmlParseElementChildrenContentDecl()
4248 *
4249 * Returns the tree of xmlElementContentPtr describing the element
4250 * hierarchy.
4251 */
4252/**
Owen Taylor3473f882001-02-23 17:55:21 +00004253 * xmlParseElementChildrenContentDecl:
4254 * @ctxt: an XML parser context
4255 *
4256 * parse the declaration for a Mixed Element content
4257 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4258 *
4259 *
4260 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4261 *
4262 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4263 *
4264 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4265 *
4266 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4267 *
4268 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4269 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004270 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004271 * opening or closing parentheses in a choice, seq, or Mixed
4272 * construct is contained in the replacement text for a parameter
4273 * entity, both must be contained in the same replacement text. For
4274 * interoperability, if a parameter-entity reference appears in a
4275 * choice, seq, or Mixed construct, its replacement text should not
4276 * be empty, and neither the first nor last non-blank character of
4277 * the replacement text should be a connector (| or ,).
4278 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004279 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004280 * hierarchy.
4281 */
4282xmlElementContentPtr
4283#ifdef VMS
4284xmlParseElementChildrenContentD
4285#else
4286xmlParseElementChildrenContentDecl
4287#endif
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004288(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004289 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4290 xmlChar *elem;
4291 xmlChar type = 0;
4292
4293 SKIP_BLANKS;
4294 GROW;
4295 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004296 xmlParserInputPtr input = ctxt->input;
4297
Owen Taylor3473f882001-02-23 17:55:21 +00004298 /* Recurse on first child */
4299 NEXT;
4300 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004301 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004302 SKIP_BLANKS;
4303 GROW;
4304 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004305 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004306 if (elem == NULL) {
4307 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4309 ctxt->sax->error(ctxt->userData,
4310 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4311 ctxt->wellFormed = 0;
4312 ctxt->disableSAX = 1;
4313 return(NULL);
4314 }
4315 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4316 GROW;
4317 if (RAW == '?') {
4318 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4319 NEXT;
4320 } else if (RAW == '*') {
4321 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4322 NEXT;
4323 } else if (RAW == '+') {
4324 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4325 NEXT;
4326 } else {
4327 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4328 }
4329 xmlFree(elem);
4330 GROW;
4331 }
4332 SKIP_BLANKS;
4333 SHRINK;
4334 while (RAW != ')') {
4335 /*
4336 * Each loop we parse one separator and one element.
4337 */
4338 if (RAW == ',') {
4339 if (type == 0) type = CUR;
4340
4341 /*
4342 * Detect "Name | Name , Name" error
4343 */
4344 else if (type != CUR) {
4345 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4347 ctxt->sax->error(ctxt->userData,
4348 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4349 type);
4350 ctxt->wellFormed = 0;
4351 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004352 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004353 xmlFreeElementContent(last);
4354 if (ret != NULL)
4355 xmlFreeElementContent(ret);
4356 return(NULL);
4357 }
4358 NEXT;
4359
4360 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4361 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004362 if ((last != NULL) && (last != ret))
4363 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004364 xmlFreeElementContent(ret);
4365 return(NULL);
4366 }
4367 if (last == NULL) {
4368 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004369 if (ret != NULL)
4370 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004371 ret = cur = op;
4372 } else {
4373 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004374 if (op != NULL)
4375 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004376 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004377 if (last != NULL)
4378 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004379 cur =op;
4380 last = NULL;
4381 }
4382 } else if (RAW == '|') {
4383 if (type == 0) type = CUR;
4384
4385 /*
4386 * Detect "Name , Name | Name" error
4387 */
4388 else if (type != CUR) {
4389 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4390 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4391 ctxt->sax->error(ctxt->userData,
4392 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4393 type);
4394 ctxt->wellFormed = 0;
4395 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004396 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004397 xmlFreeElementContent(last);
4398 if (ret != NULL)
4399 xmlFreeElementContent(ret);
4400 return(NULL);
4401 }
4402 NEXT;
4403
4404 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4405 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004406 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004407 xmlFreeElementContent(last);
4408 if (ret != NULL)
4409 xmlFreeElementContent(ret);
4410 return(NULL);
4411 }
4412 if (last == NULL) {
4413 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004414 if (ret != NULL)
4415 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004416 ret = cur = op;
4417 } else {
4418 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004419 if (op != NULL)
4420 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004421 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004422 if (last != NULL)
4423 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004424 cur =op;
4425 last = NULL;
4426 }
4427 } else {
4428 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4430 ctxt->sax->error(ctxt->userData,
4431 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4432 ctxt->wellFormed = 0;
4433 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004434 if (ret != NULL)
4435 xmlFreeElementContent(ret);
4436 return(NULL);
4437 }
4438 GROW;
4439 SKIP_BLANKS;
4440 GROW;
4441 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004442 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004443 /* Recurse on second child */
4444 NEXT;
4445 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004446 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004447 SKIP_BLANKS;
4448 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004449 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004450 if (elem == NULL) {
4451 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4453 ctxt->sax->error(ctxt->userData,
4454 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4455 ctxt->wellFormed = 0;
4456 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004457 if (ret != NULL)
4458 xmlFreeElementContent(ret);
4459 return(NULL);
4460 }
4461 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4462 xmlFree(elem);
4463 if (RAW == '?') {
4464 last->ocur = XML_ELEMENT_CONTENT_OPT;
4465 NEXT;
4466 } else if (RAW == '*') {
4467 last->ocur = XML_ELEMENT_CONTENT_MULT;
4468 NEXT;
4469 } else if (RAW == '+') {
4470 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4471 NEXT;
4472 } else {
4473 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4474 }
4475 }
4476 SKIP_BLANKS;
4477 GROW;
4478 }
4479 if ((cur != NULL) && (last != NULL)) {
4480 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004481 if (last != NULL)
4482 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004483 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004484 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4485 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4486 if (ctxt->vctxt.error != NULL)
4487 ctxt->vctxt.error(ctxt->vctxt.userData,
4488"Element content declaration doesn't start and stop in the same entity\n");
4489 ctxt->valid = 0;
4490 }
Owen Taylor3473f882001-02-23 17:55:21 +00004491 NEXT;
4492 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004493 if (ret != NULL)
4494 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004495 NEXT;
4496 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004497 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004498 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004499 cur = ret;
4500 /*
4501 * Some normalization:
4502 * (a | b* | c?)* == (a | b | c)*
4503 */
4504 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4505 if ((cur->c1 != NULL) &&
4506 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4507 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4508 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4509 if ((cur->c2 != NULL) &&
4510 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4511 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4512 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4513 cur = cur->c2;
4514 }
4515 }
Owen Taylor3473f882001-02-23 17:55:21 +00004516 NEXT;
4517 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004518 if (ret != NULL) {
4519 int found = 0;
4520
Daniel Veillarde470df72001-04-18 21:41:07 +00004521 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004522 /*
4523 * Some normalization:
4524 * (a | b*)+ == (a | b)*
4525 * (a | b?)+ == (a | b)*
4526 */
4527 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4528 if ((cur->c1 != NULL) &&
4529 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4530 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4531 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4532 found = 1;
4533 }
4534 if ((cur->c2 != NULL) &&
4535 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4536 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4537 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4538 found = 1;
4539 }
4540 cur = cur->c2;
4541 }
4542 if (found)
4543 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4544 }
Owen Taylor3473f882001-02-23 17:55:21 +00004545 NEXT;
4546 }
4547 return(ret);
4548}
4549
4550/**
4551 * xmlParseElementContentDecl:
4552 * @ctxt: an XML parser context
4553 * @name: the name of the element being defined.
4554 * @result: the Element Content pointer will be stored here if any
4555 *
4556 * parse the declaration for an Element content either Mixed or Children,
4557 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4558 *
4559 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4560 *
4561 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4562 */
4563
4564int
4565xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4566 xmlElementContentPtr *result) {
4567
4568 xmlElementContentPtr tree = NULL;
4569 xmlParserInputPtr input = ctxt->input;
4570 int res;
4571
4572 *result = NULL;
4573
4574 if (RAW != '(') {
4575 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4577 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004578 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004579 ctxt->wellFormed = 0;
4580 ctxt->disableSAX = 1;
4581 return(-1);
4582 }
4583 NEXT;
4584 GROW;
4585 SKIP_BLANKS;
4586 if ((RAW == '#') && (NXT(1) == 'P') &&
4587 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4588 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4589 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004590 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004591 res = XML_ELEMENT_TYPE_MIXED;
4592 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004593 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004594 res = XML_ELEMENT_TYPE_ELEMENT;
4595 }
Owen Taylor3473f882001-02-23 17:55:21 +00004596 SKIP_BLANKS;
4597 *result = tree;
4598 return(res);
4599}
4600
4601/**
4602 * xmlParseElementDecl:
4603 * @ctxt: an XML parser context
4604 *
4605 * parse an Element declaration.
4606 *
4607 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4608 *
4609 * [ VC: Unique Element Type Declaration ]
4610 * No element type may be declared more than once
4611 *
4612 * Returns the type of the element, or -1 in case of error
4613 */
4614int
4615xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4616 xmlChar *name;
4617 int ret = -1;
4618 xmlElementContentPtr content = NULL;
4619
4620 GROW;
4621 if ((RAW == '<') && (NXT(1) == '!') &&
4622 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4623 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4624 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4625 (NXT(8) == 'T')) {
4626 xmlParserInputPtr input = ctxt->input;
4627
4628 SKIP(9);
4629 if (!IS_BLANK(CUR)) {
4630 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4632 ctxt->sax->error(ctxt->userData,
4633 "Space required after 'ELEMENT'\n");
4634 ctxt->wellFormed = 0;
4635 ctxt->disableSAX = 1;
4636 }
4637 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004638 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004639 if (name == NULL) {
4640 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4642 ctxt->sax->error(ctxt->userData,
4643 "xmlParseElementDecl: no name for Element\n");
4644 ctxt->wellFormed = 0;
4645 ctxt->disableSAX = 1;
4646 return(-1);
4647 }
4648 while ((RAW == 0) && (ctxt->inputNr > 1))
4649 xmlPopInput(ctxt);
4650 if (!IS_BLANK(CUR)) {
4651 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4653 ctxt->sax->error(ctxt->userData,
4654 "Space required after the element name\n");
4655 ctxt->wellFormed = 0;
4656 ctxt->disableSAX = 1;
4657 }
4658 SKIP_BLANKS;
4659 if ((RAW == 'E') && (NXT(1) == 'M') &&
4660 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4661 (NXT(4) == 'Y')) {
4662 SKIP(5);
4663 /*
4664 * Element must always be empty.
4665 */
4666 ret = XML_ELEMENT_TYPE_EMPTY;
4667 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4668 (NXT(2) == 'Y')) {
4669 SKIP(3);
4670 /*
4671 * Element is a generic container.
4672 */
4673 ret = XML_ELEMENT_TYPE_ANY;
4674 } else if (RAW == '(') {
4675 ret = xmlParseElementContentDecl(ctxt, name, &content);
4676 } else {
4677 /*
4678 * [ WFC: PEs in Internal Subset ] error handling.
4679 */
4680 if ((RAW == '%') && (ctxt->external == 0) &&
4681 (ctxt->inputNr == 1)) {
4682 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4684 ctxt->sax->error(ctxt->userData,
4685 "PEReference: forbidden within markup decl in internal subset\n");
4686 } else {
4687 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4689 ctxt->sax->error(ctxt->userData,
4690 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4691 }
4692 ctxt->wellFormed = 0;
4693 ctxt->disableSAX = 1;
4694 if (name != NULL) xmlFree(name);
4695 return(-1);
4696 }
4697
4698 SKIP_BLANKS;
4699 /*
4700 * Pop-up of finished entities.
4701 */
4702 while ((RAW == 0) && (ctxt->inputNr > 1))
4703 xmlPopInput(ctxt);
4704 SKIP_BLANKS;
4705
4706 if (RAW != '>') {
4707 ctxt->errNo = XML_ERR_GT_REQUIRED;
4708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4709 ctxt->sax->error(ctxt->userData,
4710 "xmlParseElementDecl: expected '>' at the end\n");
4711 ctxt->wellFormed = 0;
4712 ctxt->disableSAX = 1;
4713 } else {
4714 if (input != ctxt->input) {
4715 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4717 ctxt->sax->error(ctxt->userData,
4718"Element declaration doesn't start and stop in the same entity\n");
4719 ctxt->wellFormed = 0;
4720 ctxt->disableSAX = 1;
4721 }
4722
4723 NEXT;
4724 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4725 (ctxt->sax->elementDecl != NULL))
4726 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4727 content);
4728 }
4729 if (content != NULL) {
4730 xmlFreeElementContent(content);
4731 }
4732 if (name != NULL) {
4733 xmlFree(name);
4734 }
4735 }
4736 return(ret);
4737}
4738
4739/**
Owen Taylor3473f882001-02-23 17:55:21 +00004740 * xmlParseConditionalSections
4741 * @ctxt: an XML parser context
4742 *
4743 * [61] conditionalSect ::= includeSect | ignoreSect
4744 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4745 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4746 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4747 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4748 */
4749
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004750static void
Owen Taylor3473f882001-02-23 17:55:21 +00004751xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4752 SKIP(3);
4753 SKIP_BLANKS;
4754 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4755 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4756 (NXT(6) == 'E')) {
4757 SKIP(7);
4758 SKIP_BLANKS;
4759 if (RAW != '[') {
4760 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4762 ctxt->sax->error(ctxt->userData,
4763 "XML conditional section '[' expected\n");
4764 ctxt->wellFormed = 0;
4765 ctxt->disableSAX = 1;
4766 } else {
4767 NEXT;
4768 }
4769 if (xmlParserDebugEntities) {
4770 if ((ctxt->input != NULL) && (ctxt->input->filename))
4771 xmlGenericError(xmlGenericErrorContext,
4772 "%s(%d): ", ctxt->input->filename,
4773 ctxt->input->line);
4774 xmlGenericError(xmlGenericErrorContext,
4775 "Entering INCLUDE Conditional Section\n");
4776 }
4777
4778 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4779 (NXT(2) != '>'))) {
4780 const xmlChar *check = CUR_PTR;
4781 int cons = ctxt->input->consumed;
4782 int tok = ctxt->token;
4783
4784 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4785 xmlParseConditionalSections(ctxt);
4786 } else if (IS_BLANK(CUR)) {
4787 NEXT;
4788 } else if (RAW == '%') {
4789 xmlParsePEReference(ctxt);
4790 } else
4791 xmlParseMarkupDecl(ctxt);
4792
4793 /*
4794 * Pop-up of finished entities.
4795 */
4796 while ((RAW == 0) && (ctxt->inputNr > 1))
4797 xmlPopInput(ctxt);
4798
4799 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4800 (tok == ctxt->token)) {
4801 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4803 ctxt->sax->error(ctxt->userData,
4804 "Content error in the external subset\n");
4805 ctxt->wellFormed = 0;
4806 ctxt->disableSAX = 1;
4807 break;
4808 }
4809 }
4810 if (xmlParserDebugEntities) {
4811 if ((ctxt->input != NULL) && (ctxt->input->filename))
4812 xmlGenericError(xmlGenericErrorContext,
4813 "%s(%d): ", ctxt->input->filename,
4814 ctxt->input->line);
4815 xmlGenericError(xmlGenericErrorContext,
4816 "Leaving INCLUDE Conditional Section\n");
4817 }
4818
4819 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4820 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4821 int state;
4822 int instate;
4823 int depth = 0;
4824
4825 SKIP(6);
4826 SKIP_BLANKS;
4827 if (RAW != '[') {
4828 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4829 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4830 ctxt->sax->error(ctxt->userData,
4831 "XML conditional section '[' expected\n");
4832 ctxt->wellFormed = 0;
4833 ctxt->disableSAX = 1;
4834 } else {
4835 NEXT;
4836 }
4837 if (xmlParserDebugEntities) {
4838 if ((ctxt->input != NULL) && (ctxt->input->filename))
4839 xmlGenericError(xmlGenericErrorContext,
4840 "%s(%d): ", ctxt->input->filename,
4841 ctxt->input->line);
4842 xmlGenericError(xmlGenericErrorContext,
4843 "Entering IGNORE Conditional Section\n");
4844 }
4845
4846 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004847 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004848 * But disable SAX event generating DTD building in the meantime
4849 */
4850 state = ctxt->disableSAX;
4851 instate = ctxt->instate;
4852 ctxt->disableSAX = 1;
4853 ctxt->instate = XML_PARSER_IGNORE;
4854
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004855 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004856 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4857 depth++;
4858 SKIP(3);
4859 continue;
4860 }
4861 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4862 if (--depth >= 0) SKIP(3);
4863 continue;
4864 }
4865 NEXT;
4866 continue;
4867 }
4868
4869 ctxt->disableSAX = state;
4870 ctxt->instate = instate;
4871
4872 if (xmlParserDebugEntities) {
4873 if ((ctxt->input != NULL) && (ctxt->input->filename))
4874 xmlGenericError(xmlGenericErrorContext,
4875 "%s(%d): ", ctxt->input->filename,
4876 ctxt->input->line);
4877 xmlGenericError(xmlGenericErrorContext,
4878 "Leaving IGNORE Conditional Section\n");
4879 }
4880
4881 } else {
4882 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4884 ctxt->sax->error(ctxt->userData,
4885 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4886 ctxt->wellFormed = 0;
4887 ctxt->disableSAX = 1;
4888 }
4889
4890 if (RAW == 0)
4891 SHRINK;
4892
4893 if (RAW == 0) {
4894 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4896 ctxt->sax->error(ctxt->userData,
4897 "XML conditional section not closed\n");
4898 ctxt->wellFormed = 0;
4899 ctxt->disableSAX = 1;
4900 } else {
4901 SKIP(3);
4902 }
4903}
4904
4905/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004906 * xmlParseMarkupDecl:
4907 * @ctxt: an XML parser context
4908 *
4909 * parse Markup declarations
4910 *
4911 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4912 * NotationDecl | PI | Comment
4913 *
4914 * [ VC: Proper Declaration/PE Nesting ]
4915 * Parameter-entity replacement text must be properly nested with
4916 * markup declarations. That is to say, if either the first character
4917 * or the last character of a markup declaration (markupdecl above) is
4918 * contained in the replacement text for a parameter-entity reference,
4919 * both must be contained in the same replacement text.
4920 *
4921 * [ WFC: PEs in Internal Subset ]
4922 * In the internal DTD subset, parameter-entity references can occur
4923 * only where markup declarations can occur, not within markup declarations.
4924 * (This does not apply to references that occur in external parameter
4925 * entities or to the external subset.)
4926 */
4927void
4928xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4929 GROW;
4930 xmlParseElementDecl(ctxt);
4931 xmlParseAttributeListDecl(ctxt);
4932 xmlParseEntityDecl(ctxt);
4933 xmlParseNotationDecl(ctxt);
4934 xmlParsePI(ctxt);
4935 xmlParseComment(ctxt);
4936 /*
4937 * This is only for internal subset. On external entities,
4938 * the replacement is done before parsing stage
4939 */
4940 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4941 xmlParsePEReference(ctxt);
4942
4943 /*
4944 * Conditional sections are allowed from entities included
4945 * by PE References in the internal subset.
4946 */
4947 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4948 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4949 xmlParseConditionalSections(ctxt);
4950 }
4951 }
4952
4953 ctxt->instate = XML_PARSER_DTD;
4954}
4955
4956/**
4957 * xmlParseTextDecl:
4958 * @ctxt: an XML parser context
4959 *
4960 * parse an XML declaration header for external entities
4961 *
4962 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4963 *
4964 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4965 */
4966
4967void
4968xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4969 xmlChar *version;
4970
4971 /*
4972 * We know that '<?xml' is here.
4973 */
4974 if ((RAW == '<') && (NXT(1) == '?') &&
4975 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4976 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4977 SKIP(5);
4978 } else {
4979 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4981 ctxt->sax->error(ctxt->userData,
4982 "Text declaration '<?xml' required\n");
4983 ctxt->wellFormed = 0;
4984 ctxt->disableSAX = 1;
4985
4986 return;
4987 }
4988
4989 if (!IS_BLANK(CUR)) {
4990 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4992 ctxt->sax->error(ctxt->userData,
4993 "Space needed after '<?xml'\n");
4994 ctxt->wellFormed = 0;
4995 ctxt->disableSAX = 1;
4996 }
4997 SKIP_BLANKS;
4998
4999 /*
5000 * We may have the VersionInfo here.
5001 */
5002 version = xmlParseVersionInfo(ctxt);
5003 if (version == NULL)
5004 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005005 else {
5006 if (!IS_BLANK(CUR)) {
5007 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5009 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5010 ctxt->wellFormed = 0;
5011 ctxt->disableSAX = 1;
5012 }
5013 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005014 ctxt->input->version = version;
5015
5016 /*
5017 * We must have the encoding declaration
5018 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005019 xmlParseEncodingDecl(ctxt);
5020 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5021 /*
5022 * The XML REC instructs us to stop parsing right here
5023 */
5024 return;
5025 }
5026
5027 SKIP_BLANKS;
5028 if ((RAW == '?') && (NXT(1) == '>')) {
5029 SKIP(2);
5030 } else if (RAW == '>') {
5031 /* Deprecated old WD ... */
5032 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5034 ctxt->sax->error(ctxt->userData,
5035 "XML declaration must end-up with '?>'\n");
5036 ctxt->wellFormed = 0;
5037 ctxt->disableSAX = 1;
5038 NEXT;
5039 } else {
5040 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5042 ctxt->sax->error(ctxt->userData,
5043 "parsing XML declaration: '?>' expected\n");
5044 ctxt->wellFormed = 0;
5045 ctxt->disableSAX = 1;
5046 MOVETO_ENDTAG(CUR_PTR);
5047 NEXT;
5048 }
5049}
5050
5051/**
Owen Taylor3473f882001-02-23 17:55:21 +00005052 * xmlParseExternalSubset:
5053 * @ctxt: an XML parser context
5054 * @ExternalID: the external identifier
5055 * @SystemID: the system identifier (or URL)
5056 *
5057 * parse Markup declarations from an external subset
5058 *
5059 * [30] extSubset ::= textDecl? extSubsetDecl
5060 *
5061 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5062 */
5063void
5064xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5065 const xmlChar *SystemID) {
5066 GROW;
5067 if ((RAW == '<') && (NXT(1) == '?') &&
5068 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5069 (NXT(4) == 'l')) {
5070 xmlParseTextDecl(ctxt);
5071 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5072 /*
5073 * The XML REC instructs us to stop parsing right here
5074 */
5075 ctxt->instate = XML_PARSER_EOF;
5076 return;
5077 }
5078 }
5079 if (ctxt->myDoc == NULL) {
5080 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5081 }
5082 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5083 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5084
5085 ctxt->instate = XML_PARSER_DTD;
5086 ctxt->external = 1;
5087 while (((RAW == '<') && (NXT(1) == '?')) ||
5088 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005089 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005090 const xmlChar *check = CUR_PTR;
5091 int cons = ctxt->input->consumed;
5092 int tok = ctxt->token;
5093
5094 GROW;
5095 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5096 xmlParseConditionalSections(ctxt);
5097 } else if (IS_BLANK(CUR)) {
5098 NEXT;
5099 } else if (RAW == '%') {
5100 xmlParsePEReference(ctxt);
5101 } else
5102 xmlParseMarkupDecl(ctxt);
5103
5104 /*
5105 * Pop-up of finished entities.
5106 */
5107 while ((RAW == 0) && (ctxt->inputNr > 1))
5108 xmlPopInput(ctxt);
5109
5110 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5111 (tok == ctxt->token)) {
5112 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5114 ctxt->sax->error(ctxt->userData,
5115 "Content error in the external subset\n");
5116 ctxt->wellFormed = 0;
5117 ctxt->disableSAX = 1;
5118 break;
5119 }
5120 }
5121
5122 if (RAW != 0) {
5123 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5125 ctxt->sax->error(ctxt->userData,
5126 "Extra content at the end of the document\n");
5127 ctxt->wellFormed = 0;
5128 ctxt->disableSAX = 1;
5129 }
5130
5131}
5132
5133/**
5134 * xmlParseReference:
5135 * @ctxt: an XML parser context
5136 *
5137 * parse and handle entity references in content, depending on the SAX
5138 * interface, this may end-up in a call to character() if this is a
5139 * CharRef, a predefined entity, if there is no reference() callback.
5140 * or if the parser was asked to switch to that mode.
5141 *
5142 * [67] Reference ::= EntityRef | CharRef
5143 */
5144void
5145xmlParseReference(xmlParserCtxtPtr ctxt) {
5146 xmlEntityPtr ent;
5147 xmlChar *val;
5148 if (RAW != '&') return;
5149
5150 if (NXT(1) == '#') {
5151 int i = 0;
5152 xmlChar out[10];
5153 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005154 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005155
5156 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5157 /*
5158 * So we are using non-UTF-8 buffers
5159 * Check that the char fit on 8bits, if not
5160 * generate a CharRef.
5161 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005162 if (value <= 0xFF) {
5163 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005164 out[1] = 0;
5165 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5166 (!ctxt->disableSAX))
5167 ctxt->sax->characters(ctxt->userData, out, 1);
5168 } else {
5169 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005170 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005171 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005172 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005173 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5174 (!ctxt->disableSAX))
5175 ctxt->sax->reference(ctxt->userData, out);
5176 }
5177 } else {
5178 /*
5179 * Just encode the value in UTF-8
5180 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005181 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005182 out[i] = 0;
5183 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5184 (!ctxt->disableSAX))
5185 ctxt->sax->characters(ctxt->userData, out, i);
5186 }
5187 } else {
5188 ent = xmlParseEntityRef(ctxt);
5189 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005190 if (!ctxt->wellFormed)
5191 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005192 if ((ent->name != NULL) &&
5193 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5194 xmlNodePtr list = NULL;
5195 int ret;
5196
5197
5198 /*
5199 * The first reference to the entity trigger a parsing phase
5200 * where the ent->children is filled with the result from
5201 * the parsing.
5202 */
5203 if (ent->children == NULL) {
5204 xmlChar *value;
5205 value = ent->content;
5206
5207 /*
5208 * Check that this entity is well formed
5209 */
5210 if ((value != NULL) &&
5211 (value[1] == 0) && (value[0] == '<') &&
5212 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5213 /*
5214 * DONE: get definite answer on this !!!
5215 * Lots of entity decls are used to declare a single
5216 * char
5217 * <!ENTITY lt "<">
5218 * Which seems to be valid since
5219 * 2.4: The ampersand character (&) and the left angle
5220 * bracket (<) may appear in their literal form only
5221 * when used ... They are also legal within the literal
5222 * entity value of an internal entity declaration;i
5223 * see "4.3.2 Well-Formed Parsed Entities".
5224 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5225 * Looking at the OASIS test suite and James Clark
5226 * tests, this is broken. However the XML REC uses
5227 * it. Is the XML REC not well-formed ????
5228 * This is a hack to avoid this problem
5229 *
5230 * ANSWER: since lt gt amp .. are already defined,
5231 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005232 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005233 * is lousy but acceptable.
5234 */
5235 list = xmlNewDocText(ctxt->myDoc, value);
5236 if (list != NULL) {
5237 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5238 (ent->children == NULL)) {
5239 ent->children = list;
5240 ent->last = list;
5241 list->parent = (xmlNodePtr) ent;
5242 } else {
5243 xmlFreeNodeList(list);
5244 }
5245 } else if (list != NULL) {
5246 xmlFreeNodeList(list);
5247 }
5248 } else {
5249 /*
5250 * 4.3.2: An internal general parsed entity is well-formed
5251 * if its replacement text matches the production labeled
5252 * content.
5253 */
5254 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5255 ctxt->depth++;
5256 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5257 ctxt->sax, NULL, ctxt->depth,
5258 value, &list);
5259 ctxt->depth--;
5260 } else if (ent->etype ==
5261 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5262 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005263 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005264 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005265 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005266 ctxt->depth--;
5267 } else {
5268 ret = -1;
5269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5270 ctxt->sax->error(ctxt->userData,
5271 "Internal: invalid entity type\n");
5272 }
5273 if (ret == XML_ERR_ENTITY_LOOP) {
5274 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5275 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5276 ctxt->sax->error(ctxt->userData,
5277 "Detected entity reference loop\n");
5278 ctxt->wellFormed = 0;
5279 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005280 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005281 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005282 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5283 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005284 (ent->children == NULL)) {
5285 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005286 if (ctxt->replaceEntities) {
5287 /*
5288 * Prune it directly in the generated document
5289 * except for single text nodes.
5290 */
5291 if ((list->type == XML_TEXT_NODE) &&
5292 (list->next == NULL)) {
5293 list->parent = (xmlNodePtr) ent;
5294 list = NULL;
5295 } else {
5296 while (list != NULL) {
5297 list->parent = (xmlNodePtr) ctxt->node;
5298 if (list->next == NULL)
5299 ent->last = list;
5300 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005301 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005302 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005303 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5304 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005305 }
5306 } else {
5307 while (list != NULL) {
5308 list->parent = (xmlNodePtr) ent;
5309 if (list->next == NULL)
5310 ent->last = list;
5311 list = list->next;
5312 }
Owen Taylor3473f882001-02-23 17:55:21 +00005313 }
5314 } else {
5315 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005316 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005317 }
5318 } else if (ret > 0) {
5319 ctxt->errNo = ret;
5320 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5321 ctxt->sax->error(ctxt->userData,
5322 "Entity value required\n");
5323 ctxt->wellFormed = 0;
5324 ctxt->disableSAX = 1;
5325 } else if (list != NULL) {
5326 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005327 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005328 }
5329 }
5330 }
5331 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5332 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5333 /*
5334 * Create a node.
5335 */
5336 ctxt->sax->reference(ctxt->userData, ent->name);
5337 return;
5338 } else if (ctxt->replaceEntities) {
5339 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5340 /*
5341 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005342 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005343 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005344 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005345 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005346 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005347 cur = ent->children;
5348 while (cur != NULL) {
5349 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005350 if (firstChild == NULL){
5351 firstChild = new;
5352 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005353 xmlAddChild(ctxt->node, new);
5354 if (cur == ent->last)
5355 break;
5356 cur = cur->next;
5357 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005358 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5359 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005360 } else {
5361 /*
5362 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005363 * node with a possible previous text one which
5364 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005365 */
5366 if (ent->children->type == XML_TEXT_NODE)
5367 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5368 if ((ent->last != ent->children) &&
5369 (ent->last->type == XML_TEXT_NODE))
5370 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5371 xmlAddChildList(ctxt->node, ent->children);
5372 }
5373
Owen Taylor3473f882001-02-23 17:55:21 +00005374 /*
5375 * This is to avoid a nasty side effect, see
5376 * characters() in SAX.c
5377 */
5378 ctxt->nodemem = 0;
5379 ctxt->nodelen = 0;
5380 return;
5381 } else {
5382 /*
5383 * Probably running in SAX mode
5384 */
5385 xmlParserInputPtr input;
5386
5387 input = xmlNewEntityInputStream(ctxt, ent);
5388 xmlPushInput(ctxt, input);
5389 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5390 (RAW == '<') && (NXT(1) == '?') &&
5391 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5392 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5393 xmlParseTextDecl(ctxt);
5394 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5395 /*
5396 * The XML REC instructs us to stop parsing right here
5397 */
5398 ctxt->instate = XML_PARSER_EOF;
5399 return;
5400 }
5401 if (input->standalone == 1) {
5402 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5404 ctxt->sax->error(ctxt->userData,
5405 "external parsed entities cannot be standalone\n");
5406 ctxt->wellFormed = 0;
5407 ctxt->disableSAX = 1;
5408 }
5409 }
5410 return;
5411 }
5412 }
5413 } else {
5414 val = ent->content;
5415 if (val == NULL) return;
5416 /*
5417 * inline the entity.
5418 */
5419 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5420 (!ctxt->disableSAX))
5421 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5422 }
5423 }
5424}
5425
5426/**
5427 * xmlParseEntityRef:
5428 * @ctxt: an XML parser context
5429 *
5430 * parse ENTITY references declarations
5431 *
5432 * [68] EntityRef ::= '&' Name ';'
5433 *
5434 * [ WFC: Entity Declared ]
5435 * In a document without any DTD, a document with only an internal DTD
5436 * subset which contains no parameter entity references, or a document
5437 * with "standalone='yes'", the Name given in the entity reference
5438 * must match that in an entity declaration, except that well-formed
5439 * documents need not declare any of the following entities: amp, lt,
5440 * gt, apos, quot. The declaration of a parameter entity must precede
5441 * any reference to it. Similarly, the declaration of a general entity
5442 * must precede any reference to it which appears in a default value in an
5443 * attribute-list declaration. Note that if entities are declared in the
5444 * external subset or in external parameter entities, a non-validating
5445 * processor is not obligated to read and process their declarations;
5446 * for such documents, the rule that an entity must be declared is a
5447 * well-formedness constraint only if standalone='yes'.
5448 *
5449 * [ WFC: Parsed Entity ]
5450 * An entity reference must not contain the name of an unparsed entity
5451 *
5452 * Returns the xmlEntityPtr if found, or NULL otherwise.
5453 */
5454xmlEntityPtr
5455xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5456 xmlChar *name;
5457 xmlEntityPtr ent = NULL;
5458
5459 GROW;
5460
5461 if (RAW == '&') {
5462 NEXT;
5463 name = xmlParseName(ctxt);
5464 if (name == NULL) {
5465 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5467 ctxt->sax->error(ctxt->userData,
5468 "xmlParseEntityRef: no name\n");
5469 ctxt->wellFormed = 0;
5470 ctxt->disableSAX = 1;
5471 } else {
5472 if (RAW == ';') {
5473 NEXT;
5474 /*
5475 * Ask first SAX for entity resolution, otherwise try the
5476 * predefined set.
5477 */
5478 if (ctxt->sax != NULL) {
5479 if (ctxt->sax->getEntity != NULL)
5480 ent = ctxt->sax->getEntity(ctxt->userData, name);
5481 if (ent == NULL)
5482 ent = xmlGetPredefinedEntity(name);
5483 }
5484 /*
5485 * [ WFC: Entity Declared ]
5486 * In a document without any DTD, a document with only an
5487 * internal DTD subset which contains no parameter entity
5488 * references, or a document with "standalone='yes'", the
5489 * Name given in the entity reference must match that in an
5490 * entity declaration, except that well-formed documents
5491 * need not declare any of the following entities: amp, lt,
5492 * gt, apos, quot.
5493 * The declaration of a parameter entity must precede any
5494 * reference to it.
5495 * Similarly, the declaration of a general entity must
5496 * precede any reference to it which appears in a default
5497 * value in an attribute-list declaration. Note that if
5498 * entities are declared in the external subset or in
5499 * external parameter entities, a non-validating processor
5500 * is not obligated to read and process their declarations;
5501 * for such documents, the rule that an entity must be
5502 * declared is a well-formedness constraint only if
5503 * standalone='yes'.
5504 */
5505 if (ent == NULL) {
5506 if ((ctxt->standalone == 1) ||
5507 ((ctxt->hasExternalSubset == 0) &&
5508 (ctxt->hasPErefs == 0))) {
5509 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5511 ctxt->sax->error(ctxt->userData,
5512 "Entity '%s' not defined\n", name);
5513 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005514 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005515 ctxt->disableSAX = 1;
5516 } else {
5517 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005518 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005519 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005520 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005521 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005522 }
5523 }
5524
5525 /*
5526 * [ WFC: Parsed Entity ]
5527 * An entity reference must not contain the name of an
5528 * unparsed entity
5529 */
5530 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5531 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5533 ctxt->sax->error(ctxt->userData,
5534 "Entity reference to unparsed entity %s\n", name);
5535 ctxt->wellFormed = 0;
5536 ctxt->disableSAX = 1;
5537 }
5538
5539 /*
5540 * [ WFC: No External Entity References ]
5541 * Attribute values cannot contain direct or indirect
5542 * entity references to external entities.
5543 */
5544 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5545 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5546 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5548 ctxt->sax->error(ctxt->userData,
5549 "Attribute references external entity '%s'\n", name);
5550 ctxt->wellFormed = 0;
5551 ctxt->disableSAX = 1;
5552 }
5553 /*
5554 * [ WFC: No < in Attribute Values ]
5555 * The replacement text of any entity referred to directly or
5556 * indirectly in an attribute value (other than "&lt;") must
5557 * not contain a <.
5558 */
5559 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5560 (ent != NULL) &&
5561 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5562 (ent->content != NULL) &&
5563 (xmlStrchr(ent->content, '<'))) {
5564 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5566 ctxt->sax->error(ctxt->userData,
5567 "'<' in entity '%s' is not allowed in attributes values\n", name);
5568 ctxt->wellFormed = 0;
5569 ctxt->disableSAX = 1;
5570 }
5571
5572 /*
5573 * Internal check, no parameter entities here ...
5574 */
5575 else {
5576 switch (ent->etype) {
5577 case XML_INTERNAL_PARAMETER_ENTITY:
5578 case XML_EXTERNAL_PARAMETER_ENTITY:
5579 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5581 ctxt->sax->error(ctxt->userData,
5582 "Attempt to reference the parameter entity '%s'\n", name);
5583 ctxt->wellFormed = 0;
5584 ctxt->disableSAX = 1;
5585 break;
5586 default:
5587 break;
5588 }
5589 }
5590
5591 /*
5592 * [ WFC: No Recursion ]
5593 * A parsed entity must not contain a recursive reference
5594 * to itself, either directly or indirectly.
5595 * Done somewhere else
5596 */
5597
5598 } else {
5599 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5601 ctxt->sax->error(ctxt->userData,
5602 "xmlParseEntityRef: expecting ';'\n");
5603 ctxt->wellFormed = 0;
5604 ctxt->disableSAX = 1;
5605 }
5606 xmlFree(name);
5607 }
5608 }
5609 return(ent);
5610}
5611
5612/**
5613 * xmlParseStringEntityRef:
5614 * @ctxt: an XML parser context
5615 * @str: a pointer to an index in the string
5616 *
5617 * parse ENTITY references declarations, but this version parses it from
5618 * a string value.
5619 *
5620 * [68] EntityRef ::= '&' Name ';'
5621 *
5622 * [ WFC: Entity Declared ]
5623 * In a document without any DTD, a document with only an internal DTD
5624 * subset which contains no parameter entity references, or a document
5625 * with "standalone='yes'", the Name given in the entity reference
5626 * must match that in an entity declaration, except that well-formed
5627 * documents need not declare any of the following entities: amp, lt,
5628 * gt, apos, quot. The declaration of a parameter entity must precede
5629 * any reference to it. Similarly, the declaration of a general entity
5630 * must precede any reference to it which appears in a default value in an
5631 * attribute-list declaration. Note that if entities are declared in the
5632 * external subset or in external parameter entities, a non-validating
5633 * processor is not obligated to read and process their declarations;
5634 * for such documents, the rule that an entity must be declared is a
5635 * well-formedness constraint only if standalone='yes'.
5636 *
5637 * [ WFC: Parsed Entity ]
5638 * An entity reference must not contain the name of an unparsed entity
5639 *
5640 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5641 * is updated to the current location in the string.
5642 */
5643xmlEntityPtr
5644xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5645 xmlChar *name;
5646 const xmlChar *ptr;
5647 xmlChar cur;
5648 xmlEntityPtr ent = NULL;
5649
5650 if ((str == NULL) || (*str == NULL))
5651 return(NULL);
5652 ptr = *str;
5653 cur = *ptr;
5654 if (cur == '&') {
5655 ptr++;
5656 cur = *ptr;
5657 name = xmlParseStringName(ctxt, &ptr);
5658 if (name == NULL) {
5659 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5661 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005662 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005663 ctxt->wellFormed = 0;
5664 ctxt->disableSAX = 1;
5665 } else {
5666 if (*ptr == ';') {
5667 ptr++;
5668 /*
5669 * Ask first SAX for entity resolution, otherwise try the
5670 * predefined set.
5671 */
5672 if (ctxt->sax != NULL) {
5673 if (ctxt->sax->getEntity != NULL)
5674 ent = ctxt->sax->getEntity(ctxt->userData, name);
5675 if (ent == NULL)
5676 ent = xmlGetPredefinedEntity(name);
5677 }
5678 /*
5679 * [ WFC: Entity Declared ]
5680 * In a document without any DTD, a document with only an
5681 * internal DTD subset which contains no parameter entity
5682 * references, or a document with "standalone='yes'", the
5683 * Name given in the entity reference must match that in an
5684 * entity declaration, except that well-formed documents
5685 * need not declare any of the following entities: amp, lt,
5686 * gt, apos, quot.
5687 * The declaration of a parameter entity must precede any
5688 * reference to it.
5689 * Similarly, the declaration of a general entity must
5690 * precede any reference to it which appears in a default
5691 * value in an attribute-list declaration. Note that if
5692 * entities are declared in the external subset or in
5693 * external parameter entities, a non-validating processor
5694 * is not obligated to read and process their declarations;
5695 * for such documents, the rule that an entity must be
5696 * declared is a well-formedness constraint only if
5697 * standalone='yes'.
5698 */
5699 if (ent == NULL) {
5700 if ((ctxt->standalone == 1) ||
5701 ((ctxt->hasExternalSubset == 0) &&
5702 (ctxt->hasPErefs == 0))) {
5703 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5705 ctxt->sax->error(ctxt->userData,
5706 "Entity '%s' not defined\n", name);
5707 ctxt->wellFormed = 0;
5708 ctxt->disableSAX = 1;
5709 } else {
5710 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5711 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5712 ctxt->sax->warning(ctxt->userData,
5713 "Entity '%s' not defined\n", name);
5714 }
5715 }
5716
5717 /*
5718 * [ WFC: Parsed Entity ]
5719 * An entity reference must not contain the name of an
5720 * unparsed entity
5721 */
5722 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5723 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5724 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5725 ctxt->sax->error(ctxt->userData,
5726 "Entity reference to unparsed entity %s\n", name);
5727 ctxt->wellFormed = 0;
5728 ctxt->disableSAX = 1;
5729 }
5730
5731 /*
5732 * [ WFC: No External Entity References ]
5733 * Attribute values cannot contain direct or indirect
5734 * entity references to external entities.
5735 */
5736 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5737 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5738 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5740 ctxt->sax->error(ctxt->userData,
5741 "Attribute references external entity '%s'\n", name);
5742 ctxt->wellFormed = 0;
5743 ctxt->disableSAX = 1;
5744 }
5745 /*
5746 * [ WFC: No < in Attribute Values ]
5747 * The replacement text of any entity referred to directly or
5748 * indirectly in an attribute value (other than "&lt;") must
5749 * not contain a <.
5750 */
5751 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5752 (ent != NULL) &&
5753 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5754 (ent->content != NULL) &&
5755 (xmlStrchr(ent->content, '<'))) {
5756 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5758 ctxt->sax->error(ctxt->userData,
5759 "'<' in entity '%s' is not allowed in attributes values\n", name);
5760 ctxt->wellFormed = 0;
5761 ctxt->disableSAX = 1;
5762 }
5763
5764 /*
5765 * Internal check, no parameter entities here ...
5766 */
5767 else {
5768 switch (ent->etype) {
5769 case XML_INTERNAL_PARAMETER_ENTITY:
5770 case XML_EXTERNAL_PARAMETER_ENTITY:
5771 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5773 ctxt->sax->error(ctxt->userData,
5774 "Attempt to reference the parameter entity '%s'\n", name);
5775 ctxt->wellFormed = 0;
5776 ctxt->disableSAX = 1;
5777 break;
5778 default:
5779 break;
5780 }
5781 }
5782
5783 /*
5784 * [ WFC: No Recursion ]
5785 * A parsed entity must not contain a recursive reference
5786 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005787 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005788 */
5789
5790 } else {
5791 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5793 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005794 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005795 ctxt->wellFormed = 0;
5796 ctxt->disableSAX = 1;
5797 }
5798 xmlFree(name);
5799 }
5800 }
5801 *str = ptr;
5802 return(ent);
5803}
5804
5805/**
5806 * xmlParsePEReference:
5807 * @ctxt: an XML parser context
5808 *
5809 * parse PEReference declarations
5810 * The entity content is handled directly by pushing it's content as
5811 * a new input stream.
5812 *
5813 * [69] PEReference ::= '%' Name ';'
5814 *
5815 * [ WFC: No Recursion ]
5816 * A parsed entity must not contain a recursive
5817 * reference to itself, either directly or indirectly.
5818 *
5819 * [ WFC: Entity Declared ]
5820 * In a document without any DTD, a document with only an internal DTD
5821 * subset which contains no parameter entity references, or a document
5822 * with "standalone='yes'", ... ... The declaration of a parameter
5823 * entity must precede any reference to it...
5824 *
5825 * [ VC: Entity Declared ]
5826 * In a document with an external subset or external parameter entities
5827 * with "standalone='no'", ... ... The declaration of a parameter entity
5828 * must precede any reference to it...
5829 *
5830 * [ WFC: In DTD ]
5831 * Parameter-entity references may only appear in the DTD.
5832 * NOTE: misleading but this is handled.
5833 */
5834void
5835xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5836 xmlChar *name;
5837 xmlEntityPtr entity = NULL;
5838 xmlParserInputPtr input;
5839
5840 if (RAW == '%') {
5841 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005842 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005843 if (name == NULL) {
5844 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5846 ctxt->sax->error(ctxt->userData,
5847 "xmlParsePEReference: no name\n");
5848 ctxt->wellFormed = 0;
5849 ctxt->disableSAX = 1;
5850 } else {
5851 if (RAW == ';') {
5852 NEXT;
5853 if ((ctxt->sax != NULL) &&
5854 (ctxt->sax->getParameterEntity != NULL))
5855 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5856 name);
5857 if (entity == NULL) {
5858 /*
5859 * [ WFC: Entity Declared ]
5860 * In a document without any DTD, a document with only an
5861 * internal DTD subset which contains no parameter entity
5862 * references, or a document with "standalone='yes'", ...
5863 * ... The declaration of a parameter entity must precede
5864 * any reference to it...
5865 */
5866 if ((ctxt->standalone == 1) ||
5867 ((ctxt->hasExternalSubset == 0) &&
5868 (ctxt->hasPErefs == 0))) {
5869 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5870 if ((!ctxt->disableSAX) &&
5871 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5872 ctxt->sax->error(ctxt->userData,
5873 "PEReference: %%%s; not found\n", name);
5874 ctxt->wellFormed = 0;
5875 ctxt->disableSAX = 1;
5876 } else {
5877 /*
5878 * [ VC: Entity Declared ]
5879 * In a document with an external subset or external
5880 * parameter entities with "standalone='no'", ...
5881 * ... The declaration of a parameter entity must precede
5882 * any reference to it...
5883 */
5884 if ((!ctxt->disableSAX) &&
5885 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5886 ctxt->sax->warning(ctxt->userData,
5887 "PEReference: %%%s; not found\n", name);
5888 ctxt->valid = 0;
5889 }
5890 } else {
5891 /*
5892 * Internal checking in case the entity quest barfed
5893 */
5894 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5895 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5896 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5897 ctxt->sax->warning(ctxt->userData,
5898 "Internal: %%%s; is not a parameter entity\n", name);
5899 } else {
5900 /*
5901 * TODO !!!
5902 * handle the extra spaces added before and after
5903 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5904 */
5905 input = xmlNewEntityInputStream(ctxt, entity);
5906 xmlPushInput(ctxt, input);
5907 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5908 (RAW == '<') && (NXT(1) == '?') &&
5909 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5910 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5911 xmlParseTextDecl(ctxt);
5912 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5913 /*
5914 * The XML REC instructs us to stop parsing
5915 * right here
5916 */
5917 ctxt->instate = XML_PARSER_EOF;
5918 xmlFree(name);
5919 return;
5920 }
5921 }
5922 if (ctxt->token == 0)
5923 ctxt->token = ' ';
5924 }
5925 }
5926 ctxt->hasPErefs = 1;
5927 } else {
5928 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5930 ctxt->sax->error(ctxt->userData,
5931 "xmlParsePEReference: expecting ';'\n");
5932 ctxt->wellFormed = 0;
5933 ctxt->disableSAX = 1;
5934 }
5935 xmlFree(name);
5936 }
5937 }
5938}
5939
5940/**
5941 * xmlParseStringPEReference:
5942 * @ctxt: an XML parser context
5943 * @str: a pointer to an index in the string
5944 *
5945 * parse PEReference declarations
5946 *
5947 * [69] PEReference ::= '%' Name ';'
5948 *
5949 * [ WFC: No Recursion ]
5950 * A parsed entity must not contain a recursive
5951 * reference to itself, either directly or indirectly.
5952 *
5953 * [ WFC: Entity Declared ]
5954 * In a document without any DTD, a document with only an internal DTD
5955 * subset which contains no parameter entity references, or a document
5956 * with "standalone='yes'", ... ... The declaration of a parameter
5957 * entity must precede any reference to it...
5958 *
5959 * [ VC: Entity Declared ]
5960 * In a document with an external subset or external parameter entities
5961 * with "standalone='no'", ... ... The declaration of a parameter entity
5962 * must precede any reference to it...
5963 *
5964 * [ WFC: In DTD ]
5965 * Parameter-entity references may only appear in the DTD.
5966 * NOTE: misleading but this is handled.
5967 *
5968 * Returns the string of the entity content.
5969 * str is updated to the current value of the index
5970 */
5971xmlEntityPtr
5972xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5973 const xmlChar *ptr;
5974 xmlChar cur;
5975 xmlChar *name;
5976 xmlEntityPtr entity = NULL;
5977
5978 if ((str == NULL) || (*str == NULL)) return(NULL);
5979 ptr = *str;
5980 cur = *ptr;
5981 if (cur == '%') {
5982 ptr++;
5983 cur = *ptr;
5984 name = xmlParseStringName(ctxt, &ptr);
5985 if (name == NULL) {
5986 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5988 ctxt->sax->error(ctxt->userData,
5989 "xmlParseStringPEReference: no name\n");
5990 ctxt->wellFormed = 0;
5991 ctxt->disableSAX = 1;
5992 } else {
5993 cur = *ptr;
5994 if (cur == ';') {
5995 ptr++;
5996 cur = *ptr;
5997 if ((ctxt->sax != NULL) &&
5998 (ctxt->sax->getParameterEntity != NULL))
5999 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6000 name);
6001 if (entity == NULL) {
6002 /*
6003 * [ WFC: Entity Declared ]
6004 * In a document without any DTD, a document with only an
6005 * internal DTD subset which contains no parameter entity
6006 * references, or a document with "standalone='yes'", ...
6007 * ... The declaration of a parameter entity must precede
6008 * any reference to it...
6009 */
6010 if ((ctxt->standalone == 1) ||
6011 ((ctxt->hasExternalSubset == 0) &&
6012 (ctxt->hasPErefs == 0))) {
6013 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6015 ctxt->sax->error(ctxt->userData,
6016 "PEReference: %%%s; not found\n", name);
6017 ctxt->wellFormed = 0;
6018 ctxt->disableSAX = 1;
6019 } else {
6020 /*
6021 * [ VC: Entity Declared ]
6022 * In a document with an external subset or external
6023 * parameter entities with "standalone='no'", ...
6024 * ... The declaration of a parameter entity must
6025 * precede any reference to it...
6026 */
6027 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6028 ctxt->sax->warning(ctxt->userData,
6029 "PEReference: %%%s; not found\n", name);
6030 ctxt->valid = 0;
6031 }
6032 } else {
6033 /*
6034 * Internal checking in case the entity quest barfed
6035 */
6036 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6037 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6038 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6039 ctxt->sax->warning(ctxt->userData,
6040 "Internal: %%%s; is not a parameter entity\n", name);
6041 }
6042 }
6043 ctxt->hasPErefs = 1;
6044 } else {
6045 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6047 ctxt->sax->error(ctxt->userData,
6048 "xmlParseStringPEReference: expecting ';'\n");
6049 ctxt->wellFormed = 0;
6050 ctxt->disableSAX = 1;
6051 }
6052 xmlFree(name);
6053 }
6054 }
6055 *str = ptr;
6056 return(entity);
6057}
6058
6059/**
6060 * xmlParseDocTypeDecl:
6061 * @ctxt: an XML parser context
6062 *
6063 * parse a DOCTYPE declaration
6064 *
6065 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6066 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6067 *
6068 * [ VC: Root Element Type ]
6069 * The Name in the document type declaration must match the element
6070 * type of the root element.
6071 */
6072
6073void
6074xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6075 xmlChar *name = NULL;
6076 xmlChar *ExternalID = NULL;
6077 xmlChar *URI = NULL;
6078
6079 /*
6080 * We know that '<!DOCTYPE' has been detected.
6081 */
6082 SKIP(9);
6083
6084 SKIP_BLANKS;
6085
6086 /*
6087 * Parse the DOCTYPE name.
6088 */
6089 name = xmlParseName(ctxt);
6090 if (name == NULL) {
6091 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6093 ctxt->sax->error(ctxt->userData,
6094 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6095 ctxt->wellFormed = 0;
6096 ctxt->disableSAX = 1;
6097 }
6098 ctxt->intSubName = name;
6099
6100 SKIP_BLANKS;
6101
6102 /*
6103 * Check for SystemID and ExternalID
6104 */
6105 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6106
6107 if ((URI != NULL) || (ExternalID != NULL)) {
6108 ctxt->hasExternalSubset = 1;
6109 }
6110 ctxt->extSubURI = URI;
6111 ctxt->extSubSystem = ExternalID;
6112
6113 SKIP_BLANKS;
6114
6115 /*
6116 * Create and update the internal subset.
6117 */
6118 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6119 (!ctxt->disableSAX))
6120 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6121
6122 /*
6123 * Is there any internal subset declarations ?
6124 * they are handled separately in xmlParseInternalSubset()
6125 */
6126 if (RAW == '[')
6127 return;
6128
6129 /*
6130 * We should be at the end of the DOCTYPE declaration.
6131 */
6132 if (RAW != '>') {
6133 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006135 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006136 ctxt->wellFormed = 0;
6137 ctxt->disableSAX = 1;
6138 }
6139 NEXT;
6140}
6141
6142/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006143 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006144 * @ctxt: an XML parser context
6145 *
6146 * parse the internal subset declaration
6147 *
6148 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6149 */
6150
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006151static void
Owen Taylor3473f882001-02-23 17:55:21 +00006152xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6153 /*
6154 * Is there any DTD definition ?
6155 */
6156 if (RAW == '[') {
6157 ctxt->instate = XML_PARSER_DTD;
6158 NEXT;
6159 /*
6160 * Parse the succession of Markup declarations and
6161 * PEReferences.
6162 * Subsequence (markupdecl | PEReference | S)*
6163 */
6164 while (RAW != ']') {
6165 const xmlChar *check = CUR_PTR;
6166 int cons = ctxt->input->consumed;
6167
6168 SKIP_BLANKS;
6169 xmlParseMarkupDecl(ctxt);
6170 xmlParsePEReference(ctxt);
6171
6172 /*
6173 * Pop-up of finished entities.
6174 */
6175 while ((RAW == 0) && (ctxt->inputNr > 1))
6176 xmlPopInput(ctxt);
6177
6178 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6179 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6181 ctxt->sax->error(ctxt->userData,
6182 "xmlParseInternalSubset: error detected in Markup declaration\n");
6183 ctxt->wellFormed = 0;
6184 ctxt->disableSAX = 1;
6185 break;
6186 }
6187 }
6188 if (RAW == ']') {
6189 NEXT;
6190 SKIP_BLANKS;
6191 }
6192 }
6193
6194 /*
6195 * We should be at the end of the DOCTYPE declaration.
6196 */
6197 if (RAW != '>') {
6198 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006200 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006201 ctxt->wellFormed = 0;
6202 ctxt->disableSAX = 1;
6203 }
6204 NEXT;
6205}
6206
6207/**
6208 * xmlParseAttribute:
6209 * @ctxt: an XML parser context
6210 * @value: a xmlChar ** used to store the value of the attribute
6211 *
6212 * parse an attribute
6213 *
6214 * [41] Attribute ::= Name Eq AttValue
6215 *
6216 * [ WFC: No External Entity References ]
6217 * Attribute values cannot contain direct or indirect entity references
6218 * to external entities.
6219 *
6220 * [ WFC: No < in Attribute Values ]
6221 * The replacement text of any entity referred to directly or indirectly in
6222 * an attribute value (other than "&lt;") must not contain a <.
6223 *
6224 * [ VC: Attribute Value Type ]
6225 * The attribute must have been declared; the value must be of the type
6226 * declared for it.
6227 *
6228 * [25] Eq ::= S? '=' S?
6229 *
6230 * With namespace:
6231 *
6232 * [NS 11] Attribute ::= QName Eq AttValue
6233 *
6234 * Also the case QName == xmlns:??? is handled independently as a namespace
6235 * definition.
6236 *
6237 * Returns the attribute name, and the value in *value.
6238 */
6239
6240xmlChar *
6241xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6242 xmlChar *name, *val;
6243
6244 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006245 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006246 name = xmlParseName(ctxt);
6247 if (name == NULL) {
6248 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6250 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6251 ctxt->wellFormed = 0;
6252 ctxt->disableSAX = 1;
6253 return(NULL);
6254 }
6255
6256 /*
6257 * read the value
6258 */
6259 SKIP_BLANKS;
6260 if (RAW == '=') {
6261 NEXT;
6262 SKIP_BLANKS;
6263 val = xmlParseAttValue(ctxt);
6264 ctxt->instate = XML_PARSER_CONTENT;
6265 } else {
6266 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6268 ctxt->sax->error(ctxt->userData,
6269 "Specification mandate value for attribute %s\n", name);
6270 ctxt->wellFormed = 0;
6271 ctxt->disableSAX = 1;
6272 xmlFree(name);
6273 return(NULL);
6274 }
6275
6276 /*
6277 * Check that xml:lang conforms to the specification
6278 * No more registered as an error, just generate a warning now
6279 * since this was deprecated in XML second edition
6280 */
6281 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6282 if (!xmlCheckLanguageID(val)) {
6283 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6284 ctxt->sax->warning(ctxt->userData,
6285 "Malformed value for xml:lang : %s\n", val);
6286 }
6287 }
6288
6289 /*
6290 * Check that xml:space conforms to the specification
6291 */
6292 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6293 if (xmlStrEqual(val, BAD_CAST "default"))
6294 *(ctxt->space) = 0;
6295 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6296 *(ctxt->space) = 1;
6297 else {
6298 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6300 ctxt->sax->error(ctxt->userData,
6301"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6302 val);
6303 ctxt->wellFormed = 0;
6304 ctxt->disableSAX = 1;
6305 }
6306 }
6307
6308 *value = val;
6309 return(name);
6310}
6311
6312/**
6313 * xmlParseStartTag:
6314 * @ctxt: an XML parser context
6315 *
6316 * parse a start of tag either for rule element or
6317 * EmptyElement. In both case we don't parse the tag closing chars.
6318 *
6319 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6320 *
6321 * [ WFC: Unique Att Spec ]
6322 * No attribute name may appear more than once in the same start-tag or
6323 * empty-element tag.
6324 *
6325 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6326 *
6327 * [ WFC: Unique Att Spec ]
6328 * No attribute name may appear more than once in the same start-tag or
6329 * empty-element tag.
6330 *
6331 * With namespace:
6332 *
6333 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6334 *
6335 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6336 *
6337 * Returns the element name parsed
6338 */
6339
6340xmlChar *
6341xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6342 xmlChar *name;
6343 xmlChar *attname;
6344 xmlChar *attvalue;
6345 const xmlChar **atts = NULL;
6346 int nbatts = 0;
6347 int maxatts = 0;
6348 int i;
6349
6350 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006351 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006352
6353 name = xmlParseName(ctxt);
6354 if (name == NULL) {
6355 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6357 ctxt->sax->error(ctxt->userData,
6358 "xmlParseStartTag: invalid element name\n");
6359 ctxt->wellFormed = 0;
6360 ctxt->disableSAX = 1;
6361 return(NULL);
6362 }
6363
6364 /*
6365 * Now parse the attributes, it ends up with the ending
6366 *
6367 * (S Attribute)* S?
6368 */
6369 SKIP_BLANKS;
6370 GROW;
6371
Daniel Veillard21a0f912001-02-25 19:54:14 +00006372 while ((RAW != '>') &&
6373 ((RAW != '/') || (NXT(1) != '>')) &&
6374 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006375 const xmlChar *q = CUR_PTR;
6376 int cons = ctxt->input->consumed;
6377
6378 attname = xmlParseAttribute(ctxt, &attvalue);
6379 if ((attname != NULL) && (attvalue != NULL)) {
6380 /*
6381 * [ WFC: Unique Att Spec ]
6382 * No attribute name may appear more than once in the same
6383 * start-tag or empty-element tag.
6384 */
6385 for (i = 0; i < nbatts;i += 2) {
6386 if (xmlStrEqual(atts[i], attname)) {
6387 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6389 ctxt->sax->error(ctxt->userData,
6390 "Attribute %s redefined\n",
6391 attname);
6392 ctxt->wellFormed = 0;
6393 ctxt->disableSAX = 1;
6394 xmlFree(attname);
6395 xmlFree(attvalue);
6396 goto failed;
6397 }
6398 }
6399
6400 /*
6401 * Add the pair to atts
6402 */
6403 if (atts == NULL) {
6404 maxatts = 10;
6405 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6406 if (atts == NULL) {
6407 xmlGenericError(xmlGenericErrorContext,
6408 "malloc of %ld byte failed\n",
6409 maxatts * (long)sizeof(xmlChar *));
6410 return(NULL);
6411 }
6412 } else if (nbatts + 4 > maxatts) {
6413 maxatts *= 2;
6414 atts = (const xmlChar **) xmlRealloc((void *) atts,
6415 maxatts * sizeof(xmlChar *));
6416 if (atts == NULL) {
6417 xmlGenericError(xmlGenericErrorContext,
6418 "realloc of %ld byte failed\n",
6419 maxatts * (long)sizeof(xmlChar *));
6420 return(NULL);
6421 }
6422 }
6423 atts[nbatts++] = attname;
6424 atts[nbatts++] = attvalue;
6425 atts[nbatts] = NULL;
6426 atts[nbatts + 1] = NULL;
6427 } else {
6428 if (attname != NULL)
6429 xmlFree(attname);
6430 if (attvalue != NULL)
6431 xmlFree(attvalue);
6432 }
6433
6434failed:
6435
6436 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6437 break;
6438 if (!IS_BLANK(RAW)) {
6439 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6440 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6441 ctxt->sax->error(ctxt->userData,
6442 "attributes construct error\n");
6443 ctxt->wellFormed = 0;
6444 ctxt->disableSAX = 1;
6445 }
6446 SKIP_BLANKS;
6447 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6448 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6450 ctxt->sax->error(ctxt->userData,
6451 "xmlParseStartTag: problem parsing attributes\n");
6452 ctxt->wellFormed = 0;
6453 ctxt->disableSAX = 1;
6454 break;
6455 }
6456 GROW;
6457 }
6458
6459 /*
6460 * SAX: Start of Element !
6461 */
6462 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6463 (!ctxt->disableSAX))
6464 ctxt->sax->startElement(ctxt->userData, name, atts);
6465
6466 if (atts != NULL) {
6467 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6468 xmlFree((void *) atts);
6469 }
6470 return(name);
6471}
6472
6473/**
6474 * xmlParseEndTag:
6475 * @ctxt: an XML parser context
6476 *
6477 * parse an end of tag
6478 *
6479 * [42] ETag ::= '</' Name S? '>'
6480 *
6481 * With namespace
6482 *
6483 * [NS 9] ETag ::= '</' QName S? '>'
6484 */
6485
6486void
6487xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6488 xmlChar *name;
6489 xmlChar *oldname;
6490
6491 GROW;
6492 if ((RAW != '<') || (NXT(1) != '/')) {
6493 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6495 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6496 ctxt->wellFormed = 0;
6497 ctxt->disableSAX = 1;
6498 return;
6499 }
6500 SKIP(2);
6501
6502 name = xmlParseName(ctxt);
6503
6504 /*
6505 * We should definitely be at the ending "S? '>'" part
6506 */
6507 GROW;
6508 SKIP_BLANKS;
6509 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6510 ctxt->errNo = XML_ERR_GT_REQUIRED;
6511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6512 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6513 ctxt->wellFormed = 0;
6514 ctxt->disableSAX = 1;
6515 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006516 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006517
6518 /*
6519 * [ WFC: Element Type Match ]
6520 * The Name in an element's end-tag must match the element type in the
6521 * start-tag.
6522 *
6523 */
6524 if ((name == NULL) || (ctxt->name == NULL) ||
6525 (!xmlStrEqual(name, ctxt->name))) {
6526 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6527 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6528 if ((name != NULL) && (ctxt->name != NULL)) {
6529 ctxt->sax->error(ctxt->userData,
6530 "Opening and ending tag mismatch: %s and %s\n",
6531 ctxt->name, name);
6532 } else if (ctxt->name != NULL) {
6533 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006534 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006535 } else {
6536 ctxt->sax->error(ctxt->userData,
6537 "Ending tag error: internal error ???\n");
6538 }
6539
6540 }
6541 ctxt->wellFormed = 0;
6542 ctxt->disableSAX = 1;
6543 }
6544
6545 /*
6546 * SAX: End of Tag
6547 */
6548 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6549 (!ctxt->disableSAX))
6550 ctxt->sax->endElement(ctxt->userData, name);
6551
6552 if (name != NULL)
6553 xmlFree(name);
6554 oldname = namePop(ctxt);
6555 spacePop(ctxt);
6556 if (oldname != NULL) {
6557#ifdef DEBUG_STACK
6558 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6559#endif
6560 xmlFree(oldname);
6561 }
6562 return;
6563}
6564
6565/**
6566 * xmlParseCDSect:
6567 * @ctxt: an XML parser context
6568 *
6569 * Parse escaped pure raw content.
6570 *
6571 * [18] CDSect ::= CDStart CData CDEnd
6572 *
6573 * [19] CDStart ::= '<![CDATA['
6574 *
6575 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6576 *
6577 * [21] CDEnd ::= ']]>'
6578 */
6579void
6580xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6581 xmlChar *buf = NULL;
6582 int len = 0;
6583 int size = XML_PARSER_BUFFER_SIZE;
6584 int r, rl;
6585 int s, sl;
6586 int cur, l;
6587 int count = 0;
6588
6589 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6590 (NXT(2) == '[') && (NXT(3) == 'C') &&
6591 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6592 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6593 (NXT(8) == '[')) {
6594 SKIP(9);
6595 } else
6596 return;
6597
6598 ctxt->instate = XML_PARSER_CDATA_SECTION;
6599 r = CUR_CHAR(rl);
6600 if (!IS_CHAR(r)) {
6601 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6603 ctxt->sax->error(ctxt->userData,
6604 "CData section not finished\n");
6605 ctxt->wellFormed = 0;
6606 ctxt->disableSAX = 1;
6607 ctxt->instate = XML_PARSER_CONTENT;
6608 return;
6609 }
6610 NEXTL(rl);
6611 s = CUR_CHAR(sl);
6612 if (!IS_CHAR(s)) {
6613 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6615 ctxt->sax->error(ctxt->userData,
6616 "CData section not finished\n");
6617 ctxt->wellFormed = 0;
6618 ctxt->disableSAX = 1;
6619 ctxt->instate = XML_PARSER_CONTENT;
6620 return;
6621 }
6622 NEXTL(sl);
6623 cur = CUR_CHAR(l);
6624 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6625 if (buf == NULL) {
6626 xmlGenericError(xmlGenericErrorContext,
6627 "malloc of %d byte failed\n", size);
6628 return;
6629 }
6630 while (IS_CHAR(cur) &&
6631 ((r != ']') || (s != ']') || (cur != '>'))) {
6632 if (len + 5 >= size) {
6633 size *= 2;
6634 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6635 if (buf == NULL) {
6636 xmlGenericError(xmlGenericErrorContext,
6637 "realloc of %d byte failed\n", size);
6638 return;
6639 }
6640 }
6641 COPY_BUF(rl,buf,len,r);
6642 r = s;
6643 rl = sl;
6644 s = cur;
6645 sl = l;
6646 count++;
6647 if (count > 50) {
6648 GROW;
6649 count = 0;
6650 }
6651 NEXTL(l);
6652 cur = CUR_CHAR(l);
6653 }
6654 buf[len] = 0;
6655 ctxt->instate = XML_PARSER_CONTENT;
6656 if (cur != '>') {
6657 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6659 ctxt->sax->error(ctxt->userData,
6660 "CData section not finished\n%.50s\n", buf);
6661 ctxt->wellFormed = 0;
6662 ctxt->disableSAX = 1;
6663 xmlFree(buf);
6664 return;
6665 }
6666 NEXTL(l);
6667
6668 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006669 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006670 */
6671 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6672 if (ctxt->sax->cdataBlock != NULL)
6673 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006674 else if (ctxt->sax->characters != NULL)
6675 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006676 }
6677 xmlFree(buf);
6678}
6679
6680/**
6681 * xmlParseContent:
6682 * @ctxt: an XML parser context
6683 *
6684 * Parse a content:
6685 *
6686 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6687 */
6688
6689void
6690xmlParseContent(xmlParserCtxtPtr ctxt) {
6691 GROW;
6692 while (((RAW != 0) || (ctxt->token != 0)) &&
6693 ((RAW != '<') || (NXT(1) != '/'))) {
6694 const xmlChar *test = CUR_PTR;
6695 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006696 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006697 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006698
6699 /*
6700 * Handle possible processed charrefs.
6701 */
6702 if (ctxt->token != 0) {
6703 xmlParseCharData(ctxt, 0);
6704 }
6705 /*
6706 * First case : a Processing Instruction.
6707 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006708 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006709 xmlParsePI(ctxt);
6710 }
6711
6712 /*
6713 * Second case : a CDSection
6714 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006715 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006716 (NXT(2) == '[') && (NXT(3) == 'C') &&
6717 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6718 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6719 (NXT(8) == '[')) {
6720 xmlParseCDSect(ctxt);
6721 }
6722
6723 /*
6724 * Third case : a comment
6725 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006726 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006727 (NXT(2) == '-') && (NXT(3) == '-')) {
6728 xmlParseComment(ctxt);
6729 ctxt->instate = XML_PARSER_CONTENT;
6730 }
6731
6732 /*
6733 * Fourth case : a sub-element.
6734 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006735 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006736 xmlParseElement(ctxt);
6737 }
6738
6739 /*
6740 * Fifth case : a reference. If if has not been resolved,
6741 * parsing returns it's Name, create the node
6742 */
6743
Daniel Veillard21a0f912001-02-25 19:54:14 +00006744 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006745 xmlParseReference(ctxt);
6746 }
6747
6748 /*
6749 * Last case, text. Note that References are handled directly.
6750 */
6751 else {
6752 xmlParseCharData(ctxt, 0);
6753 }
6754
6755 GROW;
6756 /*
6757 * Pop-up of finished entities.
6758 */
6759 while ((RAW == 0) && (ctxt->inputNr > 1))
6760 xmlPopInput(ctxt);
6761 SHRINK;
6762
6763 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6764 (tok == ctxt->token)) {
6765 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6767 ctxt->sax->error(ctxt->userData,
6768 "detected an error in element content\n");
6769 ctxt->wellFormed = 0;
6770 ctxt->disableSAX = 1;
6771 ctxt->instate = XML_PARSER_EOF;
6772 break;
6773 }
6774 }
6775}
6776
6777/**
6778 * xmlParseElement:
6779 * @ctxt: an XML parser context
6780 *
6781 * parse an XML element, this is highly recursive
6782 *
6783 * [39] element ::= EmptyElemTag | STag content ETag
6784 *
6785 * [ WFC: Element Type Match ]
6786 * The Name in an element's end-tag must match the element type in the
6787 * start-tag.
6788 *
6789 * [ VC: Element Valid ]
6790 * An element is valid if there is a declaration matching elementdecl
6791 * where the Name matches the element type and one of the following holds:
6792 * - The declaration matches EMPTY and the element has no content.
6793 * - The declaration matches children and the sequence of child elements
6794 * belongs to the language generated by the regular expression in the
6795 * content model, with optional white space (characters matching the
6796 * nonterminal S) between each pair of child elements.
6797 * - The declaration matches Mixed and the content consists of character
6798 * data and child elements whose types match names in the content model.
6799 * - The declaration matches ANY, and the types of any child elements have
6800 * been declared.
6801 */
6802
6803void
6804xmlParseElement(xmlParserCtxtPtr ctxt) {
6805 const xmlChar *openTag = CUR_PTR;
6806 xmlChar *name;
6807 xmlChar *oldname;
6808 xmlParserNodeInfo node_info;
6809 xmlNodePtr ret;
6810
6811 /* Capture start position */
6812 if (ctxt->record_info) {
6813 node_info.begin_pos = ctxt->input->consumed +
6814 (CUR_PTR - ctxt->input->base);
6815 node_info.begin_line = ctxt->input->line;
6816 }
6817
6818 if (ctxt->spaceNr == 0)
6819 spacePush(ctxt, -1);
6820 else
6821 spacePush(ctxt, *ctxt->space);
6822
6823 name = xmlParseStartTag(ctxt);
6824 if (name == NULL) {
6825 spacePop(ctxt);
6826 return;
6827 }
6828 namePush(ctxt, name);
6829 ret = ctxt->node;
6830
6831 /*
6832 * [ VC: Root Element Type ]
6833 * The Name in the document type declaration must match the element
6834 * type of the root element.
6835 */
6836 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6837 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6838 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6839
6840 /*
6841 * Check for an Empty Element.
6842 */
6843 if ((RAW == '/') && (NXT(1) == '>')) {
6844 SKIP(2);
6845 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6846 (!ctxt->disableSAX))
6847 ctxt->sax->endElement(ctxt->userData, name);
6848 oldname = namePop(ctxt);
6849 spacePop(ctxt);
6850 if (oldname != NULL) {
6851#ifdef DEBUG_STACK
6852 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6853#endif
6854 xmlFree(oldname);
6855 }
6856 if ( ret != NULL && ctxt->record_info ) {
6857 node_info.end_pos = ctxt->input->consumed +
6858 (CUR_PTR - ctxt->input->base);
6859 node_info.end_line = ctxt->input->line;
6860 node_info.node = ret;
6861 xmlParserAddNodeInfo(ctxt, &node_info);
6862 }
6863 return;
6864 }
6865 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006866 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006867 } else {
6868 ctxt->errNo = XML_ERR_GT_REQUIRED;
6869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6870 ctxt->sax->error(ctxt->userData,
6871 "Couldn't find end of Start Tag\n%.30s\n",
6872 openTag);
6873 ctxt->wellFormed = 0;
6874 ctxt->disableSAX = 1;
6875
6876 /*
6877 * end of parsing of this node.
6878 */
6879 nodePop(ctxt);
6880 oldname = namePop(ctxt);
6881 spacePop(ctxt);
6882 if (oldname != NULL) {
6883#ifdef DEBUG_STACK
6884 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6885#endif
6886 xmlFree(oldname);
6887 }
6888
6889 /*
6890 * Capture end position and add node
6891 */
6892 if ( ret != NULL && ctxt->record_info ) {
6893 node_info.end_pos = ctxt->input->consumed +
6894 (CUR_PTR - ctxt->input->base);
6895 node_info.end_line = ctxt->input->line;
6896 node_info.node = ret;
6897 xmlParserAddNodeInfo(ctxt, &node_info);
6898 }
6899 return;
6900 }
6901
6902 /*
6903 * Parse the content of the element:
6904 */
6905 xmlParseContent(ctxt);
6906 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00006907 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00006908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6909 ctxt->sax->error(ctxt->userData,
6910 "Premature end of data in tag %.30s\n", openTag);
6911 ctxt->wellFormed = 0;
6912 ctxt->disableSAX = 1;
6913
6914 /*
6915 * end of parsing of this node.
6916 */
6917 nodePop(ctxt);
6918 oldname = namePop(ctxt);
6919 spacePop(ctxt);
6920 if (oldname != NULL) {
6921#ifdef DEBUG_STACK
6922 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6923#endif
6924 xmlFree(oldname);
6925 }
6926 return;
6927 }
6928
6929 /*
6930 * parse the end of tag: '</' should be here.
6931 */
6932 xmlParseEndTag(ctxt);
6933
6934 /*
6935 * Capture end position and add node
6936 */
6937 if ( ret != NULL && ctxt->record_info ) {
6938 node_info.end_pos = ctxt->input->consumed +
6939 (CUR_PTR - ctxt->input->base);
6940 node_info.end_line = ctxt->input->line;
6941 node_info.node = ret;
6942 xmlParserAddNodeInfo(ctxt, &node_info);
6943 }
6944}
6945
6946/**
6947 * xmlParseVersionNum:
6948 * @ctxt: an XML parser context
6949 *
6950 * parse the XML version value.
6951 *
6952 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6953 *
6954 * Returns the string giving the XML version number, or NULL
6955 */
6956xmlChar *
6957xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6958 xmlChar *buf = NULL;
6959 int len = 0;
6960 int size = 10;
6961 xmlChar cur;
6962
6963 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6964 if (buf == NULL) {
6965 xmlGenericError(xmlGenericErrorContext,
6966 "malloc of %d byte failed\n", size);
6967 return(NULL);
6968 }
6969 cur = CUR;
6970 while (((cur >= 'a') && (cur <= 'z')) ||
6971 ((cur >= 'A') && (cur <= 'Z')) ||
6972 ((cur >= '0') && (cur <= '9')) ||
6973 (cur == '_') || (cur == '.') ||
6974 (cur == ':') || (cur == '-')) {
6975 if (len + 1 >= size) {
6976 size *= 2;
6977 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6978 if (buf == NULL) {
6979 xmlGenericError(xmlGenericErrorContext,
6980 "realloc of %d byte failed\n", size);
6981 return(NULL);
6982 }
6983 }
6984 buf[len++] = cur;
6985 NEXT;
6986 cur=CUR;
6987 }
6988 buf[len] = 0;
6989 return(buf);
6990}
6991
6992/**
6993 * xmlParseVersionInfo:
6994 * @ctxt: an XML parser context
6995 *
6996 * parse the XML version.
6997 *
6998 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6999 *
7000 * [25] Eq ::= S? '=' S?
7001 *
7002 * Returns the version string, e.g. "1.0"
7003 */
7004
7005xmlChar *
7006xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7007 xmlChar *version = NULL;
7008 const xmlChar *q;
7009
7010 if ((RAW == 'v') && (NXT(1) == 'e') &&
7011 (NXT(2) == 'r') && (NXT(3) == 's') &&
7012 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7013 (NXT(6) == 'n')) {
7014 SKIP(7);
7015 SKIP_BLANKS;
7016 if (RAW != '=') {
7017 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7019 ctxt->sax->error(ctxt->userData,
7020 "xmlParseVersionInfo : expected '='\n");
7021 ctxt->wellFormed = 0;
7022 ctxt->disableSAX = 1;
7023 return(NULL);
7024 }
7025 NEXT;
7026 SKIP_BLANKS;
7027 if (RAW == '"') {
7028 NEXT;
7029 q = CUR_PTR;
7030 version = xmlParseVersionNum(ctxt);
7031 if (RAW != '"') {
7032 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7034 ctxt->sax->error(ctxt->userData,
7035 "String not closed\n%.50s\n", q);
7036 ctxt->wellFormed = 0;
7037 ctxt->disableSAX = 1;
7038 } else
7039 NEXT;
7040 } else if (RAW == '\''){
7041 NEXT;
7042 q = CUR_PTR;
7043 version = xmlParseVersionNum(ctxt);
7044 if (RAW != '\'') {
7045 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7047 ctxt->sax->error(ctxt->userData,
7048 "String not closed\n%.50s\n", q);
7049 ctxt->wellFormed = 0;
7050 ctxt->disableSAX = 1;
7051 } else
7052 NEXT;
7053 } else {
7054 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7056 ctxt->sax->error(ctxt->userData,
7057 "xmlParseVersionInfo : expected ' or \"\n");
7058 ctxt->wellFormed = 0;
7059 ctxt->disableSAX = 1;
7060 }
7061 }
7062 return(version);
7063}
7064
7065/**
7066 * xmlParseEncName:
7067 * @ctxt: an XML parser context
7068 *
7069 * parse the XML encoding name
7070 *
7071 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7072 *
7073 * Returns the encoding name value or NULL
7074 */
7075xmlChar *
7076xmlParseEncName(xmlParserCtxtPtr ctxt) {
7077 xmlChar *buf = NULL;
7078 int len = 0;
7079 int size = 10;
7080 xmlChar cur;
7081
7082 cur = CUR;
7083 if (((cur >= 'a') && (cur <= 'z')) ||
7084 ((cur >= 'A') && (cur <= 'Z'))) {
7085 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7086 if (buf == NULL) {
7087 xmlGenericError(xmlGenericErrorContext,
7088 "malloc of %d byte failed\n", size);
7089 return(NULL);
7090 }
7091
7092 buf[len++] = cur;
7093 NEXT;
7094 cur = CUR;
7095 while (((cur >= 'a') && (cur <= 'z')) ||
7096 ((cur >= 'A') && (cur <= 'Z')) ||
7097 ((cur >= '0') && (cur <= '9')) ||
7098 (cur == '.') || (cur == '_') ||
7099 (cur == '-')) {
7100 if (len + 1 >= size) {
7101 size *= 2;
7102 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7103 if (buf == NULL) {
7104 xmlGenericError(xmlGenericErrorContext,
7105 "realloc of %d byte failed\n", size);
7106 return(NULL);
7107 }
7108 }
7109 buf[len++] = cur;
7110 NEXT;
7111 cur = CUR;
7112 if (cur == 0) {
7113 SHRINK;
7114 GROW;
7115 cur = CUR;
7116 }
7117 }
7118 buf[len] = 0;
7119 } else {
7120 ctxt->errNo = XML_ERR_ENCODING_NAME;
7121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7122 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7123 ctxt->wellFormed = 0;
7124 ctxt->disableSAX = 1;
7125 }
7126 return(buf);
7127}
7128
7129/**
7130 * xmlParseEncodingDecl:
7131 * @ctxt: an XML parser context
7132 *
7133 * parse the XML encoding declaration
7134 *
7135 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7136 *
7137 * this setups the conversion filters.
7138 *
7139 * Returns the encoding value or NULL
7140 */
7141
7142xmlChar *
7143xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7144 xmlChar *encoding = NULL;
7145 const xmlChar *q;
7146
7147 SKIP_BLANKS;
7148 if ((RAW == 'e') && (NXT(1) == 'n') &&
7149 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7150 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7151 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7152 SKIP(8);
7153 SKIP_BLANKS;
7154 if (RAW != '=') {
7155 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7157 ctxt->sax->error(ctxt->userData,
7158 "xmlParseEncodingDecl : expected '='\n");
7159 ctxt->wellFormed = 0;
7160 ctxt->disableSAX = 1;
7161 return(NULL);
7162 }
7163 NEXT;
7164 SKIP_BLANKS;
7165 if (RAW == '"') {
7166 NEXT;
7167 q = CUR_PTR;
7168 encoding = xmlParseEncName(ctxt);
7169 if (RAW != '"') {
7170 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7172 ctxt->sax->error(ctxt->userData,
7173 "String not closed\n%.50s\n", q);
7174 ctxt->wellFormed = 0;
7175 ctxt->disableSAX = 1;
7176 } else
7177 NEXT;
7178 } else if (RAW == '\''){
7179 NEXT;
7180 q = CUR_PTR;
7181 encoding = xmlParseEncName(ctxt);
7182 if (RAW != '\'') {
7183 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7185 ctxt->sax->error(ctxt->userData,
7186 "String not closed\n%.50s\n", q);
7187 ctxt->wellFormed = 0;
7188 ctxt->disableSAX = 1;
7189 } else
7190 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007191 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007192 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7194 ctxt->sax->error(ctxt->userData,
7195 "xmlParseEncodingDecl : expected ' or \"\n");
7196 ctxt->wellFormed = 0;
7197 ctxt->disableSAX = 1;
7198 }
7199 if (encoding != NULL) {
7200 xmlCharEncoding enc;
7201 xmlCharEncodingHandlerPtr handler;
7202
7203 if (ctxt->input->encoding != NULL)
7204 xmlFree((xmlChar *) ctxt->input->encoding);
7205 ctxt->input->encoding = encoding;
7206
7207 enc = xmlParseCharEncoding((const char *) encoding);
7208 /*
7209 * registered set of known encodings
7210 */
7211 if (enc != XML_CHAR_ENCODING_ERROR) {
7212 xmlSwitchEncoding(ctxt, enc);
7213 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7214 xmlFree(encoding);
7215 return(NULL);
7216 }
7217 } else {
7218 /*
7219 * fallback for unknown encodings
7220 */
7221 handler = xmlFindCharEncodingHandler((const char *) encoding);
7222 if (handler != NULL) {
7223 xmlSwitchToEncoding(ctxt, handler);
7224 } else {
7225 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7227 ctxt->sax->error(ctxt->userData,
7228 "Unsupported encoding %s\n", encoding);
7229 return(NULL);
7230 }
7231 }
7232 }
7233 }
7234 return(encoding);
7235}
7236
7237/**
7238 * xmlParseSDDecl:
7239 * @ctxt: an XML parser context
7240 *
7241 * parse the XML standalone declaration
7242 *
7243 * [32] SDDecl ::= S 'standalone' Eq
7244 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7245 *
7246 * [ VC: Standalone Document Declaration ]
7247 * TODO The standalone document declaration must have the value "no"
7248 * if any external markup declarations contain declarations of:
7249 * - attributes with default values, if elements to which these
7250 * attributes apply appear in the document without specifications
7251 * of values for these attributes, or
7252 * - entities (other than amp, lt, gt, apos, quot), if references
7253 * to those entities appear in the document, or
7254 * - attributes with values subject to normalization, where the
7255 * attribute appears in the document with a value which will change
7256 * as a result of normalization, or
7257 * - element types with element content, if white space occurs directly
7258 * within any instance of those types.
7259 *
7260 * Returns 1 if standalone, 0 otherwise
7261 */
7262
7263int
7264xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7265 int standalone = -1;
7266
7267 SKIP_BLANKS;
7268 if ((RAW == 's') && (NXT(1) == 't') &&
7269 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7270 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7271 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7272 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7273 SKIP(10);
7274 SKIP_BLANKS;
7275 if (RAW != '=') {
7276 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7277 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7278 ctxt->sax->error(ctxt->userData,
7279 "XML standalone declaration : expected '='\n");
7280 ctxt->wellFormed = 0;
7281 ctxt->disableSAX = 1;
7282 return(standalone);
7283 }
7284 NEXT;
7285 SKIP_BLANKS;
7286 if (RAW == '\''){
7287 NEXT;
7288 if ((RAW == 'n') && (NXT(1) == 'o')) {
7289 standalone = 0;
7290 SKIP(2);
7291 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7292 (NXT(2) == 's')) {
7293 standalone = 1;
7294 SKIP(3);
7295 } else {
7296 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7298 ctxt->sax->error(ctxt->userData,
7299 "standalone accepts only 'yes' or 'no'\n");
7300 ctxt->wellFormed = 0;
7301 ctxt->disableSAX = 1;
7302 }
7303 if (RAW != '\'') {
7304 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7306 ctxt->sax->error(ctxt->userData, "String not closed\n");
7307 ctxt->wellFormed = 0;
7308 ctxt->disableSAX = 1;
7309 } else
7310 NEXT;
7311 } else if (RAW == '"'){
7312 NEXT;
7313 if ((RAW == 'n') && (NXT(1) == 'o')) {
7314 standalone = 0;
7315 SKIP(2);
7316 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7317 (NXT(2) == 's')) {
7318 standalone = 1;
7319 SKIP(3);
7320 } else {
7321 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7323 ctxt->sax->error(ctxt->userData,
7324 "standalone accepts only 'yes' or 'no'\n");
7325 ctxt->wellFormed = 0;
7326 ctxt->disableSAX = 1;
7327 }
7328 if (RAW != '"') {
7329 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7331 ctxt->sax->error(ctxt->userData, "String not closed\n");
7332 ctxt->wellFormed = 0;
7333 ctxt->disableSAX = 1;
7334 } else
7335 NEXT;
7336 } else {
7337 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7339 ctxt->sax->error(ctxt->userData,
7340 "Standalone value not found\n");
7341 ctxt->wellFormed = 0;
7342 ctxt->disableSAX = 1;
7343 }
7344 }
7345 return(standalone);
7346}
7347
7348/**
7349 * xmlParseXMLDecl:
7350 * @ctxt: an XML parser context
7351 *
7352 * parse an XML declaration header
7353 *
7354 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7355 */
7356
7357void
7358xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7359 xmlChar *version;
7360
7361 /*
7362 * We know that '<?xml' is here.
7363 */
7364 SKIP(5);
7365
7366 if (!IS_BLANK(RAW)) {
7367 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7369 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7370 ctxt->wellFormed = 0;
7371 ctxt->disableSAX = 1;
7372 }
7373 SKIP_BLANKS;
7374
7375 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007376 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007377 */
7378 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007379 if (version == NULL) {
7380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7381 ctxt->sax->error(ctxt->userData,
7382 "Malformed declaration expecting version\n");
7383 ctxt->wellFormed = 0;
7384 ctxt->disableSAX = 1;
7385 } else {
7386 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7387 /*
7388 * TODO: Blueberry should be detected here
7389 */
7390 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7391 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7392 version);
7393 }
7394 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007395 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007396 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007397 }
Owen Taylor3473f882001-02-23 17:55:21 +00007398
7399 /*
7400 * We may have the encoding declaration
7401 */
7402 if (!IS_BLANK(RAW)) {
7403 if ((RAW == '?') && (NXT(1) == '>')) {
7404 SKIP(2);
7405 return;
7406 }
7407 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7409 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7410 ctxt->wellFormed = 0;
7411 ctxt->disableSAX = 1;
7412 }
7413 xmlParseEncodingDecl(ctxt);
7414 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7415 /*
7416 * The XML REC instructs us to stop parsing right here
7417 */
7418 return;
7419 }
7420
7421 /*
7422 * We may have the standalone status.
7423 */
7424 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7425 if ((RAW == '?') && (NXT(1) == '>')) {
7426 SKIP(2);
7427 return;
7428 }
7429 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7431 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7432 ctxt->wellFormed = 0;
7433 ctxt->disableSAX = 1;
7434 }
7435 SKIP_BLANKS;
7436 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7437
7438 SKIP_BLANKS;
7439 if ((RAW == '?') && (NXT(1) == '>')) {
7440 SKIP(2);
7441 } else if (RAW == '>') {
7442 /* Deprecated old WD ... */
7443 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7445 ctxt->sax->error(ctxt->userData,
7446 "XML declaration must end-up with '?>'\n");
7447 ctxt->wellFormed = 0;
7448 ctxt->disableSAX = 1;
7449 NEXT;
7450 } else {
7451 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7453 ctxt->sax->error(ctxt->userData,
7454 "parsing XML declaration: '?>' expected\n");
7455 ctxt->wellFormed = 0;
7456 ctxt->disableSAX = 1;
7457 MOVETO_ENDTAG(CUR_PTR);
7458 NEXT;
7459 }
7460}
7461
7462/**
7463 * xmlParseMisc:
7464 * @ctxt: an XML parser context
7465 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007466 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007467 *
7468 * [27] Misc ::= Comment | PI | S
7469 */
7470
7471void
7472xmlParseMisc(xmlParserCtxtPtr ctxt) {
7473 while (((RAW == '<') && (NXT(1) == '?')) ||
7474 ((RAW == '<') && (NXT(1) == '!') &&
7475 (NXT(2) == '-') && (NXT(3) == '-')) ||
7476 IS_BLANK(CUR)) {
7477 if ((RAW == '<') && (NXT(1) == '?')) {
7478 xmlParsePI(ctxt);
7479 } else if (IS_BLANK(CUR)) {
7480 NEXT;
7481 } else
7482 xmlParseComment(ctxt);
7483 }
7484}
7485
7486/**
7487 * xmlParseDocument:
7488 * @ctxt: an XML parser context
7489 *
7490 * parse an XML document (and build a tree if using the standard SAX
7491 * interface).
7492 *
7493 * [1] document ::= prolog element Misc*
7494 *
7495 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7496 *
7497 * Returns 0, -1 in case of error. the parser context is augmented
7498 * as a result of the parsing.
7499 */
7500
7501int
7502xmlParseDocument(xmlParserCtxtPtr ctxt) {
7503 xmlChar start[4];
7504 xmlCharEncoding enc;
7505
7506 xmlInitParser();
7507
7508 GROW;
7509
7510 /*
7511 * SAX: beginning of the document processing.
7512 */
7513 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7514 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7515
Daniel Veillard50f34372001-08-03 12:06:36 +00007516 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007517 /*
7518 * Get the 4 first bytes and decode the charset
7519 * if enc != XML_CHAR_ENCODING_NONE
7520 * plug some encoding conversion routines.
7521 */
7522 start[0] = RAW;
7523 start[1] = NXT(1);
7524 start[2] = NXT(2);
7525 start[3] = NXT(3);
7526 enc = xmlDetectCharEncoding(start, 4);
7527 if (enc != XML_CHAR_ENCODING_NONE) {
7528 xmlSwitchEncoding(ctxt, enc);
7529 }
Owen Taylor3473f882001-02-23 17:55:21 +00007530 }
7531
7532
7533 if (CUR == 0) {
7534 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7536 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7537 ctxt->wellFormed = 0;
7538 ctxt->disableSAX = 1;
7539 }
7540
7541 /*
7542 * Check for the XMLDecl in the Prolog.
7543 */
7544 GROW;
7545 if ((RAW == '<') && (NXT(1) == '?') &&
7546 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7547 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7548
7549 /*
7550 * Note that we will switch encoding on the fly.
7551 */
7552 xmlParseXMLDecl(ctxt);
7553 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7554 /*
7555 * The XML REC instructs us to stop parsing right here
7556 */
7557 return(-1);
7558 }
7559 ctxt->standalone = ctxt->input->standalone;
7560 SKIP_BLANKS;
7561 } else {
7562 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7563 }
7564 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7565 ctxt->sax->startDocument(ctxt->userData);
7566
7567 /*
7568 * The Misc part of the Prolog
7569 */
7570 GROW;
7571 xmlParseMisc(ctxt);
7572
7573 /*
7574 * Then possibly doc type declaration(s) and more Misc
7575 * (doctypedecl Misc*)?
7576 */
7577 GROW;
7578 if ((RAW == '<') && (NXT(1) == '!') &&
7579 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7580 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7581 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7582 (NXT(8) == 'E')) {
7583
7584 ctxt->inSubset = 1;
7585 xmlParseDocTypeDecl(ctxt);
7586 if (RAW == '[') {
7587 ctxt->instate = XML_PARSER_DTD;
7588 xmlParseInternalSubset(ctxt);
7589 }
7590
7591 /*
7592 * Create and update the external subset.
7593 */
7594 ctxt->inSubset = 2;
7595 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7596 (!ctxt->disableSAX))
7597 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7598 ctxt->extSubSystem, ctxt->extSubURI);
7599 ctxt->inSubset = 0;
7600
7601
7602 ctxt->instate = XML_PARSER_PROLOG;
7603 xmlParseMisc(ctxt);
7604 }
7605
7606 /*
7607 * Time to start parsing the tree itself
7608 */
7609 GROW;
7610 if (RAW != '<') {
7611 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7613 ctxt->sax->error(ctxt->userData,
7614 "Start tag expected, '<' not found\n");
7615 ctxt->wellFormed = 0;
7616 ctxt->disableSAX = 1;
7617 ctxt->instate = XML_PARSER_EOF;
7618 } else {
7619 ctxt->instate = XML_PARSER_CONTENT;
7620 xmlParseElement(ctxt);
7621 ctxt->instate = XML_PARSER_EPILOG;
7622
7623
7624 /*
7625 * The Misc part at the end
7626 */
7627 xmlParseMisc(ctxt);
7628
7629 if (RAW != 0) {
7630 ctxt->errNo = XML_ERR_DOCUMENT_END;
7631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7632 ctxt->sax->error(ctxt->userData,
7633 "Extra content at the end of the document\n");
7634 ctxt->wellFormed = 0;
7635 ctxt->disableSAX = 1;
7636 }
7637 ctxt->instate = XML_PARSER_EOF;
7638 }
7639
7640 /*
7641 * SAX: end of the document processing.
7642 */
7643 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7644 (!ctxt->disableSAX))
7645 ctxt->sax->endDocument(ctxt->userData);
7646
Daniel Veillardc7612992002-02-17 22:47:37 +00007647 if (! ctxt->wellFormed) {
7648 ctxt->valid = 0;
7649 return(-1);
7650 }
Owen Taylor3473f882001-02-23 17:55:21 +00007651 return(0);
7652}
7653
7654/**
7655 * xmlParseExtParsedEnt:
7656 * @ctxt: an XML parser context
7657 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007658 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007659 * An external general parsed entity is well-formed if it matches the
7660 * production labeled extParsedEnt.
7661 *
7662 * [78] extParsedEnt ::= TextDecl? content
7663 *
7664 * Returns 0, -1 in case of error. the parser context is augmented
7665 * as a result of the parsing.
7666 */
7667
7668int
7669xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7670 xmlChar start[4];
7671 xmlCharEncoding enc;
7672
7673 xmlDefaultSAXHandlerInit();
7674
7675 GROW;
7676
7677 /*
7678 * SAX: beginning of the document processing.
7679 */
7680 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7681 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7682
7683 /*
7684 * Get the 4 first bytes and decode the charset
7685 * if enc != XML_CHAR_ENCODING_NONE
7686 * plug some encoding conversion routines.
7687 */
7688 start[0] = RAW;
7689 start[1] = NXT(1);
7690 start[2] = NXT(2);
7691 start[3] = NXT(3);
7692 enc = xmlDetectCharEncoding(start, 4);
7693 if (enc != XML_CHAR_ENCODING_NONE) {
7694 xmlSwitchEncoding(ctxt, enc);
7695 }
7696
7697
7698 if (CUR == 0) {
7699 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7701 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7702 ctxt->wellFormed = 0;
7703 ctxt->disableSAX = 1;
7704 }
7705
7706 /*
7707 * Check for the XMLDecl in the Prolog.
7708 */
7709 GROW;
7710 if ((RAW == '<') && (NXT(1) == '?') &&
7711 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7712 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7713
7714 /*
7715 * Note that we will switch encoding on the fly.
7716 */
7717 xmlParseXMLDecl(ctxt);
7718 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7719 /*
7720 * The XML REC instructs us to stop parsing right here
7721 */
7722 return(-1);
7723 }
7724 SKIP_BLANKS;
7725 } else {
7726 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7727 }
7728 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7729 ctxt->sax->startDocument(ctxt->userData);
7730
7731 /*
7732 * Doing validity checking on chunk doesn't make sense
7733 */
7734 ctxt->instate = XML_PARSER_CONTENT;
7735 ctxt->validate = 0;
7736 ctxt->loadsubset = 0;
7737 ctxt->depth = 0;
7738
7739 xmlParseContent(ctxt);
7740
7741 if ((RAW == '<') && (NXT(1) == '/')) {
7742 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7744 ctxt->sax->error(ctxt->userData,
7745 "chunk is not well balanced\n");
7746 ctxt->wellFormed = 0;
7747 ctxt->disableSAX = 1;
7748 } else if (RAW != 0) {
7749 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7751 ctxt->sax->error(ctxt->userData,
7752 "extra content at the end of well balanced chunk\n");
7753 ctxt->wellFormed = 0;
7754 ctxt->disableSAX = 1;
7755 }
7756
7757 /*
7758 * SAX: end of the document processing.
7759 */
7760 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7761 (!ctxt->disableSAX))
7762 ctxt->sax->endDocument(ctxt->userData);
7763
7764 if (! ctxt->wellFormed) return(-1);
7765 return(0);
7766}
7767
7768/************************************************************************
7769 * *
7770 * Progressive parsing interfaces *
7771 * *
7772 ************************************************************************/
7773
7774/**
7775 * xmlParseLookupSequence:
7776 * @ctxt: an XML parser context
7777 * @first: the first char to lookup
7778 * @next: the next char to lookup or zero
7779 * @third: the next char to lookup or zero
7780 *
7781 * Try to find if a sequence (first, next, third) or just (first next) or
7782 * (first) is available in the input stream.
7783 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7784 * to avoid rescanning sequences of bytes, it DOES change the state of the
7785 * parser, do not use liberally.
7786 *
7787 * Returns the index to the current parsing point if the full sequence
7788 * is available, -1 otherwise.
7789 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007790static int
Owen Taylor3473f882001-02-23 17:55:21 +00007791xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7792 xmlChar next, xmlChar third) {
7793 int base, len;
7794 xmlParserInputPtr in;
7795 const xmlChar *buf;
7796
7797 in = ctxt->input;
7798 if (in == NULL) return(-1);
7799 base = in->cur - in->base;
7800 if (base < 0) return(-1);
7801 if (ctxt->checkIndex > base)
7802 base = ctxt->checkIndex;
7803 if (in->buf == NULL) {
7804 buf = in->base;
7805 len = in->length;
7806 } else {
7807 buf = in->buf->buffer->content;
7808 len = in->buf->buffer->use;
7809 }
7810 /* take into account the sequence length */
7811 if (third) len -= 2;
7812 else if (next) len --;
7813 for (;base < len;base++) {
7814 if (buf[base] == first) {
7815 if (third != 0) {
7816 if ((buf[base + 1] != next) ||
7817 (buf[base + 2] != third)) continue;
7818 } else if (next != 0) {
7819 if (buf[base + 1] != next) continue;
7820 }
7821 ctxt->checkIndex = 0;
7822#ifdef DEBUG_PUSH
7823 if (next == 0)
7824 xmlGenericError(xmlGenericErrorContext,
7825 "PP: lookup '%c' found at %d\n",
7826 first, base);
7827 else if (third == 0)
7828 xmlGenericError(xmlGenericErrorContext,
7829 "PP: lookup '%c%c' found at %d\n",
7830 first, next, base);
7831 else
7832 xmlGenericError(xmlGenericErrorContext,
7833 "PP: lookup '%c%c%c' found at %d\n",
7834 first, next, third, base);
7835#endif
7836 return(base - (in->cur - in->base));
7837 }
7838 }
7839 ctxt->checkIndex = base;
7840#ifdef DEBUG_PUSH
7841 if (next == 0)
7842 xmlGenericError(xmlGenericErrorContext,
7843 "PP: lookup '%c' failed\n", first);
7844 else if (third == 0)
7845 xmlGenericError(xmlGenericErrorContext,
7846 "PP: lookup '%c%c' failed\n", first, next);
7847 else
7848 xmlGenericError(xmlGenericErrorContext,
7849 "PP: lookup '%c%c%c' failed\n", first, next, third);
7850#endif
7851 return(-1);
7852}
7853
7854/**
7855 * xmlParseTryOrFinish:
7856 * @ctxt: an XML parser context
7857 * @terminate: last chunk indicator
7858 *
7859 * Try to progress on parsing
7860 *
7861 * Returns zero if no parsing was possible
7862 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007863static int
Owen Taylor3473f882001-02-23 17:55:21 +00007864xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7865 int ret = 0;
7866 int avail;
7867 xmlChar cur, next;
7868
7869#ifdef DEBUG_PUSH
7870 switch (ctxt->instate) {
7871 case XML_PARSER_EOF:
7872 xmlGenericError(xmlGenericErrorContext,
7873 "PP: try EOF\n"); break;
7874 case XML_PARSER_START:
7875 xmlGenericError(xmlGenericErrorContext,
7876 "PP: try START\n"); break;
7877 case XML_PARSER_MISC:
7878 xmlGenericError(xmlGenericErrorContext,
7879 "PP: try MISC\n");break;
7880 case XML_PARSER_COMMENT:
7881 xmlGenericError(xmlGenericErrorContext,
7882 "PP: try COMMENT\n");break;
7883 case XML_PARSER_PROLOG:
7884 xmlGenericError(xmlGenericErrorContext,
7885 "PP: try PROLOG\n");break;
7886 case XML_PARSER_START_TAG:
7887 xmlGenericError(xmlGenericErrorContext,
7888 "PP: try START_TAG\n");break;
7889 case XML_PARSER_CONTENT:
7890 xmlGenericError(xmlGenericErrorContext,
7891 "PP: try CONTENT\n");break;
7892 case XML_PARSER_CDATA_SECTION:
7893 xmlGenericError(xmlGenericErrorContext,
7894 "PP: try CDATA_SECTION\n");break;
7895 case XML_PARSER_END_TAG:
7896 xmlGenericError(xmlGenericErrorContext,
7897 "PP: try END_TAG\n");break;
7898 case XML_PARSER_ENTITY_DECL:
7899 xmlGenericError(xmlGenericErrorContext,
7900 "PP: try ENTITY_DECL\n");break;
7901 case XML_PARSER_ENTITY_VALUE:
7902 xmlGenericError(xmlGenericErrorContext,
7903 "PP: try ENTITY_VALUE\n");break;
7904 case XML_PARSER_ATTRIBUTE_VALUE:
7905 xmlGenericError(xmlGenericErrorContext,
7906 "PP: try ATTRIBUTE_VALUE\n");break;
7907 case XML_PARSER_DTD:
7908 xmlGenericError(xmlGenericErrorContext,
7909 "PP: try DTD\n");break;
7910 case XML_PARSER_EPILOG:
7911 xmlGenericError(xmlGenericErrorContext,
7912 "PP: try EPILOG\n");break;
7913 case XML_PARSER_PI:
7914 xmlGenericError(xmlGenericErrorContext,
7915 "PP: try PI\n");break;
7916 case XML_PARSER_IGNORE:
7917 xmlGenericError(xmlGenericErrorContext,
7918 "PP: try IGNORE\n");break;
7919 }
7920#endif
7921
7922 while (1) {
7923 /*
7924 * Pop-up of finished entities.
7925 */
7926 while ((RAW == 0) && (ctxt->inputNr > 1))
7927 xmlPopInput(ctxt);
7928
7929 if (ctxt->input ==NULL) break;
7930 if (ctxt->input->buf == NULL)
7931 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00007932 else {
7933 /*
7934 * If we are operating on converted input, try to flush
7935 * remainng chars to avoid them stalling in the non-converted
7936 * buffer.
7937 */
7938 if ((ctxt->input->buf->raw != NULL) &&
7939 (ctxt->input->buf->raw->use > 0)) {
7940 int base = ctxt->input->base -
7941 ctxt->input->buf->buffer->content;
7942 int current = ctxt->input->cur - ctxt->input->base;
7943
7944 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
7945 ctxt->input->base = ctxt->input->buf->buffer->content + base;
7946 ctxt->input->cur = ctxt->input->base + current;
7947 ctxt->input->end =
7948 &ctxt->input->buf->buffer->content[
7949 ctxt->input->buf->buffer->use];
7950 }
7951 avail = ctxt->input->buf->buffer->use -
7952 (ctxt->input->cur - ctxt->input->base);
7953 }
Owen Taylor3473f882001-02-23 17:55:21 +00007954 if (avail < 1)
7955 goto done;
7956 switch (ctxt->instate) {
7957 case XML_PARSER_EOF:
7958 /*
7959 * Document parsing is done !
7960 */
7961 goto done;
7962 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007963 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7964 xmlChar start[4];
7965 xmlCharEncoding enc;
7966
7967 /*
7968 * Very first chars read from the document flow.
7969 */
7970 if (avail < 4)
7971 goto done;
7972
7973 /*
7974 * Get the 4 first bytes and decode the charset
7975 * if enc != XML_CHAR_ENCODING_NONE
7976 * plug some encoding conversion routines.
7977 */
7978 start[0] = RAW;
7979 start[1] = NXT(1);
7980 start[2] = NXT(2);
7981 start[3] = NXT(3);
7982 enc = xmlDetectCharEncoding(start, 4);
7983 if (enc != XML_CHAR_ENCODING_NONE) {
7984 xmlSwitchEncoding(ctxt, enc);
7985 }
7986 break;
7987 }
Owen Taylor3473f882001-02-23 17:55:21 +00007988
7989 cur = ctxt->input->cur[0];
7990 next = ctxt->input->cur[1];
7991 if (cur == 0) {
7992 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7993 ctxt->sax->setDocumentLocator(ctxt->userData,
7994 &xmlDefaultSAXLocator);
7995 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7997 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7998 ctxt->wellFormed = 0;
7999 ctxt->disableSAX = 1;
8000 ctxt->instate = XML_PARSER_EOF;
8001#ifdef DEBUG_PUSH
8002 xmlGenericError(xmlGenericErrorContext,
8003 "PP: entering EOF\n");
8004#endif
8005 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8006 ctxt->sax->endDocument(ctxt->userData);
8007 goto done;
8008 }
8009 if ((cur == '<') && (next == '?')) {
8010 /* PI or XML decl */
8011 if (avail < 5) return(ret);
8012 if ((!terminate) &&
8013 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8014 return(ret);
8015 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8016 ctxt->sax->setDocumentLocator(ctxt->userData,
8017 &xmlDefaultSAXLocator);
8018 if ((ctxt->input->cur[2] == 'x') &&
8019 (ctxt->input->cur[3] == 'm') &&
8020 (ctxt->input->cur[4] == 'l') &&
8021 (IS_BLANK(ctxt->input->cur[5]))) {
8022 ret += 5;
8023#ifdef DEBUG_PUSH
8024 xmlGenericError(xmlGenericErrorContext,
8025 "PP: Parsing XML Decl\n");
8026#endif
8027 xmlParseXMLDecl(ctxt);
8028 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8029 /*
8030 * The XML REC instructs us to stop parsing right
8031 * here
8032 */
8033 ctxt->instate = XML_PARSER_EOF;
8034 return(0);
8035 }
8036 ctxt->standalone = ctxt->input->standalone;
8037 if ((ctxt->encoding == NULL) &&
8038 (ctxt->input->encoding != NULL))
8039 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8040 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8041 (!ctxt->disableSAX))
8042 ctxt->sax->startDocument(ctxt->userData);
8043 ctxt->instate = XML_PARSER_MISC;
8044#ifdef DEBUG_PUSH
8045 xmlGenericError(xmlGenericErrorContext,
8046 "PP: entering MISC\n");
8047#endif
8048 } else {
8049 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8050 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8051 (!ctxt->disableSAX))
8052 ctxt->sax->startDocument(ctxt->userData);
8053 ctxt->instate = XML_PARSER_MISC;
8054#ifdef DEBUG_PUSH
8055 xmlGenericError(xmlGenericErrorContext,
8056 "PP: entering MISC\n");
8057#endif
8058 }
8059 } else {
8060 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8061 ctxt->sax->setDocumentLocator(ctxt->userData,
8062 &xmlDefaultSAXLocator);
8063 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8064 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8065 (!ctxt->disableSAX))
8066 ctxt->sax->startDocument(ctxt->userData);
8067 ctxt->instate = XML_PARSER_MISC;
8068#ifdef DEBUG_PUSH
8069 xmlGenericError(xmlGenericErrorContext,
8070 "PP: entering MISC\n");
8071#endif
8072 }
8073 break;
8074 case XML_PARSER_MISC:
8075 SKIP_BLANKS;
8076 if (ctxt->input->buf == NULL)
8077 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8078 else
8079 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8080 if (avail < 2)
8081 goto done;
8082 cur = ctxt->input->cur[0];
8083 next = ctxt->input->cur[1];
8084 if ((cur == '<') && (next == '?')) {
8085 if ((!terminate) &&
8086 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8087 goto done;
8088#ifdef DEBUG_PUSH
8089 xmlGenericError(xmlGenericErrorContext,
8090 "PP: Parsing PI\n");
8091#endif
8092 xmlParsePI(ctxt);
8093 } else if ((cur == '<') && (next == '!') &&
8094 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8095 if ((!terminate) &&
8096 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8097 goto done;
8098#ifdef DEBUG_PUSH
8099 xmlGenericError(xmlGenericErrorContext,
8100 "PP: Parsing Comment\n");
8101#endif
8102 xmlParseComment(ctxt);
8103 ctxt->instate = XML_PARSER_MISC;
8104 } else if ((cur == '<') && (next == '!') &&
8105 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8106 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8107 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8108 (ctxt->input->cur[8] == 'E')) {
8109 if ((!terminate) &&
8110 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8111 goto done;
8112#ifdef DEBUG_PUSH
8113 xmlGenericError(xmlGenericErrorContext,
8114 "PP: Parsing internal subset\n");
8115#endif
8116 ctxt->inSubset = 1;
8117 xmlParseDocTypeDecl(ctxt);
8118 if (RAW == '[') {
8119 ctxt->instate = XML_PARSER_DTD;
8120#ifdef DEBUG_PUSH
8121 xmlGenericError(xmlGenericErrorContext,
8122 "PP: entering DTD\n");
8123#endif
8124 } else {
8125 /*
8126 * Create and update the external subset.
8127 */
8128 ctxt->inSubset = 2;
8129 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8130 (ctxt->sax->externalSubset != NULL))
8131 ctxt->sax->externalSubset(ctxt->userData,
8132 ctxt->intSubName, ctxt->extSubSystem,
8133 ctxt->extSubURI);
8134 ctxt->inSubset = 0;
8135 ctxt->instate = XML_PARSER_PROLOG;
8136#ifdef DEBUG_PUSH
8137 xmlGenericError(xmlGenericErrorContext,
8138 "PP: entering PROLOG\n");
8139#endif
8140 }
8141 } else if ((cur == '<') && (next == '!') &&
8142 (avail < 9)) {
8143 goto done;
8144 } else {
8145 ctxt->instate = XML_PARSER_START_TAG;
8146#ifdef DEBUG_PUSH
8147 xmlGenericError(xmlGenericErrorContext,
8148 "PP: entering START_TAG\n");
8149#endif
8150 }
8151 break;
8152 case XML_PARSER_IGNORE:
8153 xmlGenericError(xmlGenericErrorContext,
8154 "PP: internal error, state == IGNORE");
8155 ctxt->instate = XML_PARSER_DTD;
8156#ifdef DEBUG_PUSH
8157 xmlGenericError(xmlGenericErrorContext,
8158 "PP: entering DTD\n");
8159#endif
8160 break;
8161 case XML_PARSER_PROLOG:
8162 SKIP_BLANKS;
8163 if (ctxt->input->buf == NULL)
8164 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8165 else
8166 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8167 if (avail < 2)
8168 goto done;
8169 cur = ctxt->input->cur[0];
8170 next = ctxt->input->cur[1];
8171 if ((cur == '<') && (next == '?')) {
8172 if ((!terminate) &&
8173 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8174 goto done;
8175#ifdef DEBUG_PUSH
8176 xmlGenericError(xmlGenericErrorContext,
8177 "PP: Parsing PI\n");
8178#endif
8179 xmlParsePI(ctxt);
8180 } else if ((cur == '<') && (next == '!') &&
8181 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8182 if ((!terminate) &&
8183 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8184 goto done;
8185#ifdef DEBUG_PUSH
8186 xmlGenericError(xmlGenericErrorContext,
8187 "PP: Parsing Comment\n");
8188#endif
8189 xmlParseComment(ctxt);
8190 ctxt->instate = XML_PARSER_PROLOG;
8191 } else if ((cur == '<') && (next == '!') &&
8192 (avail < 4)) {
8193 goto done;
8194 } else {
8195 ctxt->instate = XML_PARSER_START_TAG;
8196#ifdef DEBUG_PUSH
8197 xmlGenericError(xmlGenericErrorContext,
8198 "PP: entering START_TAG\n");
8199#endif
8200 }
8201 break;
8202 case XML_PARSER_EPILOG:
8203 SKIP_BLANKS;
8204 if (ctxt->input->buf == NULL)
8205 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8206 else
8207 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8208 if (avail < 2)
8209 goto done;
8210 cur = ctxt->input->cur[0];
8211 next = ctxt->input->cur[1];
8212 if ((cur == '<') && (next == '?')) {
8213 if ((!terminate) &&
8214 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8215 goto done;
8216#ifdef DEBUG_PUSH
8217 xmlGenericError(xmlGenericErrorContext,
8218 "PP: Parsing PI\n");
8219#endif
8220 xmlParsePI(ctxt);
8221 ctxt->instate = XML_PARSER_EPILOG;
8222 } else if ((cur == '<') && (next == '!') &&
8223 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8224 if ((!terminate) &&
8225 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8226 goto done;
8227#ifdef DEBUG_PUSH
8228 xmlGenericError(xmlGenericErrorContext,
8229 "PP: Parsing Comment\n");
8230#endif
8231 xmlParseComment(ctxt);
8232 ctxt->instate = XML_PARSER_EPILOG;
8233 } else if ((cur == '<') && (next == '!') &&
8234 (avail < 4)) {
8235 goto done;
8236 } else {
8237 ctxt->errNo = XML_ERR_DOCUMENT_END;
8238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8239 ctxt->sax->error(ctxt->userData,
8240 "Extra content at the end of the document\n");
8241 ctxt->wellFormed = 0;
8242 ctxt->disableSAX = 1;
8243 ctxt->instate = XML_PARSER_EOF;
8244#ifdef DEBUG_PUSH
8245 xmlGenericError(xmlGenericErrorContext,
8246 "PP: entering EOF\n");
8247#endif
8248 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8249 (!ctxt->disableSAX))
8250 ctxt->sax->endDocument(ctxt->userData);
8251 goto done;
8252 }
8253 break;
8254 case XML_PARSER_START_TAG: {
8255 xmlChar *name, *oldname;
8256
8257 if ((avail < 2) && (ctxt->inputNr == 1))
8258 goto done;
8259 cur = ctxt->input->cur[0];
8260 if (cur != '<') {
8261 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8262 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8263 ctxt->sax->error(ctxt->userData,
8264 "Start tag expect, '<' not found\n");
8265 ctxt->wellFormed = 0;
8266 ctxt->disableSAX = 1;
8267 ctxt->instate = XML_PARSER_EOF;
8268#ifdef DEBUG_PUSH
8269 xmlGenericError(xmlGenericErrorContext,
8270 "PP: entering EOF\n");
8271#endif
8272 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8273 (!ctxt->disableSAX))
8274 ctxt->sax->endDocument(ctxt->userData);
8275 goto done;
8276 }
8277 if ((!terminate) &&
8278 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8279 goto done;
8280 if (ctxt->spaceNr == 0)
8281 spacePush(ctxt, -1);
8282 else
8283 spacePush(ctxt, *ctxt->space);
8284 name = xmlParseStartTag(ctxt);
8285 if (name == NULL) {
8286 spacePop(ctxt);
8287 ctxt->instate = XML_PARSER_EOF;
8288#ifdef DEBUG_PUSH
8289 xmlGenericError(xmlGenericErrorContext,
8290 "PP: entering EOF\n");
8291#endif
8292 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8293 (!ctxt->disableSAX))
8294 ctxt->sax->endDocument(ctxt->userData);
8295 goto done;
8296 }
8297 namePush(ctxt, xmlStrdup(name));
8298
8299 /*
8300 * [ VC: Root Element Type ]
8301 * The Name in the document type declaration must match
8302 * the element type of the root element.
8303 */
8304 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8305 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8306 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8307
8308 /*
8309 * Check for an Empty Element.
8310 */
8311 if ((RAW == '/') && (NXT(1) == '>')) {
8312 SKIP(2);
8313 if ((ctxt->sax != NULL) &&
8314 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8315 ctxt->sax->endElement(ctxt->userData, name);
8316 xmlFree(name);
8317 oldname = namePop(ctxt);
8318 spacePop(ctxt);
8319 if (oldname != NULL) {
8320#ifdef DEBUG_STACK
8321 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8322#endif
8323 xmlFree(oldname);
8324 }
8325 if (ctxt->name == NULL) {
8326 ctxt->instate = XML_PARSER_EPILOG;
8327#ifdef DEBUG_PUSH
8328 xmlGenericError(xmlGenericErrorContext,
8329 "PP: entering EPILOG\n");
8330#endif
8331 } else {
8332 ctxt->instate = XML_PARSER_CONTENT;
8333#ifdef DEBUG_PUSH
8334 xmlGenericError(xmlGenericErrorContext,
8335 "PP: entering CONTENT\n");
8336#endif
8337 }
8338 break;
8339 }
8340 if (RAW == '>') {
8341 NEXT;
8342 } else {
8343 ctxt->errNo = XML_ERR_GT_REQUIRED;
8344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8345 ctxt->sax->error(ctxt->userData,
8346 "Couldn't find end of Start Tag %s\n",
8347 name);
8348 ctxt->wellFormed = 0;
8349 ctxt->disableSAX = 1;
8350
8351 /*
8352 * end of parsing of this node.
8353 */
8354 nodePop(ctxt);
8355 oldname = namePop(ctxt);
8356 spacePop(ctxt);
8357 if (oldname != NULL) {
8358#ifdef DEBUG_STACK
8359 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8360#endif
8361 xmlFree(oldname);
8362 }
8363 }
8364 xmlFree(name);
8365 ctxt->instate = XML_PARSER_CONTENT;
8366#ifdef DEBUG_PUSH
8367 xmlGenericError(xmlGenericErrorContext,
8368 "PP: entering CONTENT\n");
8369#endif
8370 break;
8371 }
8372 case XML_PARSER_CONTENT: {
8373 const xmlChar *test;
8374 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008375 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008376
8377 /*
8378 * Handle preparsed entities and charRef
8379 */
8380 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008381 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008382
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008383 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008384 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8385 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008386 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008387 ctxt->token = 0;
8388 }
8389 if ((avail < 2) && (ctxt->inputNr == 1))
8390 goto done;
8391 cur = ctxt->input->cur[0];
8392 next = ctxt->input->cur[1];
8393
8394 test = CUR_PTR;
8395 cons = ctxt->input->consumed;
8396 tok = ctxt->token;
8397 if ((cur == '<') && (next == '?')) {
8398 if ((!terminate) &&
8399 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8400 goto done;
8401#ifdef DEBUG_PUSH
8402 xmlGenericError(xmlGenericErrorContext,
8403 "PP: Parsing PI\n");
8404#endif
8405 xmlParsePI(ctxt);
8406 } else if ((cur == '<') && (next == '!') &&
8407 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8408 if ((!terminate) &&
8409 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8410 goto done;
8411#ifdef DEBUG_PUSH
8412 xmlGenericError(xmlGenericErrorContext,
8413 "PP: Parsing Comment\n");
8414#endif
8415 xmlParseComment(ctxt);
8416 ctxt->instate = XML_PARSER_CONTENT;
8417 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8418 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8419 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8420 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8421 (ctxt->input->cur[8] == '[')) {
8422 SKIP(9);
8423 ctxt->instate = XML_PARSER_CDATA_SECTION;
8424#ifdef DEBUG_PUSH
8425 xmlGenericError(xmlGenericErrorContext,
8426 "PP: entering CDATA_SECTION\n");
8427#endif
8428 break;
8429 } else if ((cur == '<') && (next == '!') &&
8430 (avail < 9)) {
8431 goto done;
8432 } else if ((cur == '<') && (next == '/')) {
8433 ctxt->instate = XML_PARSER_END_TAG;
8434#ifdef DEBUG_PUSH
8435 xmlGenericError(xmlGenericErrorContext,
8436 "PP: entering END_TAG\n");
8437#endif
8438 break;
8439 } else if (cur == '<') {
8440 ctxt->instate = XML_PARSER_START_TAG;
8441#ifdef DEBUG_PUSH
8442 xmlGenericError(xmlGenericErrorContext,
8443 "PP: entering START_TAG\n");
8444#endif
8445 break;
8446 } else if (cur == '&') {
8447 if ((!terminate) &&
8448 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8449 goto done;
8450#ifdef DEBUG_PUSH
8451 xmlGenericError(xmlGenericErrorContext,
8452 "PP: Parsing Reference\n");
8453#endif
8454 xmlParseReference(ctxt);
8455 } else {
8456 /* TODO Avoid the extra copy, handle directly !!! */
8457 /*
8458 * Goal of the following test is:
8459 * - minimize calls to the SAX 'character' callback
8460 * when they are mergeable
8461 * - handle an problem for isBlank when we only parse
8462 * a sequence of blank chars and the next one is
8463 * not available to check against '<' presence.
8464 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008465 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008466 * of the parser.
8467 */
8468 if ((ctxt->inputNr == 1) &&
8469 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8470 if ((!terminate) &&
8471 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8472 goto done;
8473 }
8474 ctxt->checkIndex = 0;
8475#ifdef DEBUG_PUSH
8476 xmlGenericError(xmlGenericErrorContext,
8477 "PP: Parsing char data\n");
8478#endif
8479 xmlParseCharData(ctxt, 0);
8480 }
8481 /*
8482 * Pop-up of finished entities.
8483 */
8484 while ((RAW == 0) && (ctxt->inputNr > 1))
8485 xmlPopInput(ctxt);
8486 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8487 (tok == ctxt->token)) {
8488 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8490 ctxt->sax->error(ctxt->userData,
8491 "detected an error in element content\n");
8492 ctxt->wellFormed = 0;
8493 ctxt->disableSAX = 1;
8494 ctxt->instate = XML_PARSER_EOF;
8495 break;
8496 }
8497 break;
8498 }
8499 case XML_PARSER_CDATA_SECTION: {
8500 /*
8501 * The Push mode need to have the SAX callback for
8502 * cdataBlock merge back contiguous callbacks.
8503 */
8504 int base;
8505
8506 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8507 if (base < 0) {
8508 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8509 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8510 if (ctxt->sax->cdataBlock != NULL)
8511 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8512 XML_PARSER_BIG_BUFFER_SIZE);
8513 }
8514 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8515 ctxt->checkIndex = 0;
8516 }
8517 goto done;
8518 } else {
8519 if ((ctxt->sax != NULL) && (base > 0) &&
8520 (!ctxt->disableSAX)) {
8521 if (ctxt->sax->cdataBlock != NULL)
8522 ctxt->sax->cdataBlock(ctxt->userData,
8523 ctxt->input->cur, base);
8524 }
8525 SKIP(base + 3);
8526 ctxt->checkIndex = 0;
8527 ctxt->instate = XML_PARSER_CONTENT;
8528#ifdef DEBUG_PUSH
8529 xmlGenericError(xmlGenericErrorContext,
8530 "PP: entering CONTENT\n");
8531#endif
8532 }
8533 break;
8534 }
8535 case XML_PARSER_END_TAG:
8536 if (avail < 2)
8537 goto done;
8538 if ((!terminate) &&
8539 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8540 goto done;
8541 xmlParseEndTag(ctxt);
8542 if (ctxt->name == NULL) {
8543 ctxt->instate = XML_PARSER_EPILOG;
8544#ifdef DEBUG_PUSH
8545 xmlGenericError(xmlGenericErrorContext,
8546 "PP: entering EPILOG\n");
8547#endif
8548 } else {
8549 ctxt->instate = XML_PARSER_CONTENT;
8550#ifdef DEBUG_PUSH
8551 xmlGenericError(xmlGenericErrorContext,
8552 "PP: entering CONTENT\n");
8553#endif
8554 }
8555 break;
8556 case XML_PARSER_DTD: {
8557 /*
8558 * Sorry but progressive parsing of the internal subset
8559 * is not expected to be supported. We first check that
8560 * the full content of the internal subset is available and
8561 * the parsing is launched only at that point.
8562 * Internal subset ends up with "']' S? '>'" in an unescaped
8563 * section and not in a ']]>' sequence which are conditional
8564 * sections (whoever argued to keep that crap in XML deserve
8565 * a place in hell !).
8566 */
8567 int base, i;
8568 xmlChar *buf;
8569 xmlChar quote = 0;
8570
8571 base = ctxt->input->cur - ctxt->input->base;
8572 if (base < 0) return(0);
8573 if (ctxt->checkIndex > base)
8574 base = ctxt->checkIndex;
8575 buf = ctxt->input->buf->buffer->content;
8576 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8577 base++) {
8578 if (quote != 0) {
8579 if (buf[base] == quote)
8580 quote = 0;
8581 continue;
8582 }
8583 if (buf[base] == '"') {
8584 quote = '"';
8585 continue;
8586 }
8587 if (buf[base] == '\'') {
8588 quote = '\'';
8589 continue;
8590 }
8591 if (buf[base] == ']') {
8592 if ((unsigned int) base +1 >=
8593 ctxt->input->buf->buffer->use)
8594 break;
8595 if (buf[base + 1] == ']') {
8596 /* conditional crap, skip both ']' ! */
8597 base++;
8598 continue;
8599 }
8600 for (i = 0;
8601 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8602 i++) {
8603 if (buf[base + i] == '>')
8604 goto found_end_int_subset;
8605 }
8606 break;
8607 }
8608 }
8609 /*
8610 * We didn't found the end of the Internal subset
8611 */
8612 if (quote == 0)
8613 ctxt->checkIndex = base;
8614#ifdef DEBUG_PUSH
8615 if (next == 0)
8616 xmlGenericError(xmlGenericErrorContext,
8617 "PP: lookup of int subset end filed\n");
8618#endif
8619 goto done;
8620
8621found_end_int_subset:
8622 xmlParseInternalSubset(ctxt);
8623 ctxt->inSubset = 2;
8624 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8625 (ctxt->sax->externalSubset != NULL))
8626 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8627 ctxt->extSubSystem, ctxt->extSubURI);
8628 ctxt->inSubset = 0;
8629 ctxt->instate = XML_PARSER_PROLOG;
8630 ctxt->checkIndex = 0;
8631#ifdef DEBUG_PUSH
8632 xmlGenericError(xmlGenericErrorContext,
8633 "PP: entering PROLOG\n");
8634#endif
8635 break;
8636 }
8637 case XML_PARSER_COMMENT:
8638 xmlGenericError(xmlGenericErrorContext,
8639 "PP: internal error, state == COMMENT\n");
8640 ctxt->instate = XML_PARSER_CONTENT;
8641#ifdef DEBUG_PUSH
8642 xmlGenericError(xmlGenericErrorContext,
8643 "PP: entering CONTENT\n");
8644#endif
8645 break;
8646 case XML_PARSER_PI:
8647 xmlGenericError(xmlGenericErrorContext,
8648 "PP: internal error, state == PI\n");
8649 ctxt->instate = XML_PARSER_CONTENT;
8650#ifdef DEBUG_PUSH
8651 xmlGenericError(xmlGenericErrorContext,
8652 "PP: entering CONTENT\n");
8653#endif
8654 break;
8655 case XML_PARSER_ENTITY_DECL:
8656 xmlGenericError(xmlGenericErrorContext,
8657 "PP: internal error, state == ENTITY_DECL\n");
8658 ctxt->instate = XML_PARSER_DTD;
8659#ifdef DEBUG_PUSH
8660 xmlGenericError(xmlGenericErrorContext,
8661 "PP: entering DTD\n");
8662#endif
8663 break;
8664 case XML_PARSER_ENTITY_VALUE:
8665 xmlGenericError(xmlGenericErrorContext,
8666 "PP: internal error, state == ENTITY_VALUE\n");
8667 ctxt->instate = XML_PARSER_CONTENT;
8668#ifdef DEBUG_PUSH
8669 xmlGenericError(xmlGenericErrorContext,
8670 "PP: entering DTD\n");
8671#endif
8672 break;
8673 case XML_PARSER_ATTRIBUTE_VALUE:
8674 xmlGenericError(xmlGenericErrorContext,
8675 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8676 ctxt->instate = XML_PARSER_START_TAG;
8677#ifdef DEBUG_PUSH
8678 xmlGenericError(xmlGenericErrorContext,
8679 "PP: entering START_TAG\n");
8680#endif
8681 break;
8682 case XML_PARSER_SYSTEM_LITERAL:
8683 xmlGenericError(xmlGenericErrorContext,
8684 "PP: internal error, state == SYSTEM_LITERAL\n");
8685 ctxt->instate = XML_PARSER_START_TAG;
8686#ifdef DEBUG_PUSH
8687 xmlGenericError(xmlGenericErrorContext,
8688 "PP: entering START_TAG\n");
8689#endif
8690 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008691 case XML_PARSER_PUBLIC_LITERAL:
8692 xmlGenericError(xmlGenericErrorContext,
8693 "PP: internal error, state == PUBLIC_LITERAL\n");
8694 ctxt->instate = XML_PARSER_START_TAG;
8695#ifdef DEBUG_PUSH
8696 xmlGenericError(xmlGenericErrorContext,
8697 "PP: entering START_TAG\n");
8698#endif
8699 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008700 }
8701 }
8702done:
8703#ifdef DEBUG_PUSH
8704 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8705#endif
8706 return(ret);
8707}
8708
8709/**
Owen Taylor3473f882001-02-23 17:55:21 +00008710 * xmlParseChunk:
8711 * @ctxt: an XML parser context
8712 * @chunk: an char array
8713 * @size: the size in byte of the chunk
8714 * @terminate: last chunk indicator
8715 *
8716 * Parse a Chunk of memory
8717 *
8718 * Returns zero if no error, the xmlParserErrors otherwise.
8719 */
8720int
8721xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8722 int terminate) {
8723 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8724 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8725 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8726 int cur = ctxt->input->cur - ctxt->input->base;
8727
8728 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8729 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8730 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008731 ctxt->input->end =
8732 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008733#ifdef DEBUG_PUSH
8734 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8735#endif
8736
8737 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8738 xmlParseTryOrFinish(ctxt, terminate);
8739 } else if (ctxt->instate != XML_PARSER_EOF) {
8740 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8741 xmlParserInputBufferPtr in = ctxt->input->buf;
8742 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8743 (in->raw != NULL)) {
8744 int nbchars;
8745
8746 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8747 if (nbchars < 0) {
8748 xmlGenericError(xmlGenericErrorContext,
8749 "xmlParseChunk: encoder error\n");
8750 return(XML_ERR_INVALID_ENCODING);
8751 }
8752 }
8753 }
8754 }
8755 xmlParseTryOrFinish(ctxt, terminate);
8756 if (terminate) {
8757 /*
8758 * Check for termination
8759 */
8760 if ((ctxt->instate != XML_PARSER_EOF) &&
8761 (ctxt->instate != XML_PARSER_EPILOG)) {
8762 ctxt->errNo = XML_ERR_DOCUMENT_END;
8763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8764 ctxt->sax->error(ctxt->userData,
8765 "Extra content at the end of the document\n");
8766 ctxt->wellFormed = 0;
8767 ctxt->disableSAX = 1;
8768 }
8769 if (ctxt->instate != XML_PARSER_EOF) {
8770 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8771 (!ctxt->disableSAX))
8772 ctxt->sax->endDocument(ctxt->userData);
8773 }
8774 ctxt->instate = XML_PARSER_EOF;
8775 }
8776 return((xmlParserErrors) ctxt->errNo);
8777}
8778
8779/************************************************************************
8780 * *
8781 * I/O front end functions to the parser *
8782 * *
8783 ************************************************************************/
8784
8785/**
8786 * xmlStopParser:
8787 * @ctxt: an XML parser context
8788 *
8789 * Blocks further parser processing
8790 */
8791void
8792xmlStopParser(xmlParserCtxtPtr ctxt) {
8793 ctxt->instate = XML_PARSER_EOF;
8794 if (ctxt->input != NULL)
8795 ctxt->input->cur = BAD_CAST"";
8796}
8797
8798/**
8799 * xmlCreatePushParserCtxt:
8800 * @sax: a SAX handler
8801 * @user_data: The user data returned on SAX callbacks
8802 * @chunk: a pointer to an array of chars
8803 * @size: number of chars in the array
8804 * @filename: an optional file name or URI
8805 *
8806 * Create a parser context for using the XML parser in push mode
8807 * To allow content encoding detection, @size should be >= 4
8808 * The value of @filename is used for fetching external entities
8809 * and error/warning reports.
8810 *
8811 * Returns the new parser context or NULL
8812 */
8813xmlParserCtxtPtr
8814xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8815 const char *chunk, int size, const char *filename) {
8816 xmlParserCtxtPtr ctxt;
8817 xmlParserInputPtr inputStream;
8818 xmlParserInputBufferPtr buf;
8819 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8820
8821 /*
8822 * plug some encoding conversion routines
8823 */
8824 if ((chunk != NULL) && (size >= 4))
8825 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8826
8827 buf = xmlAllocParserInputBuffer(enc);
8828 if (buf == NULL) return(NULL);
8829
8830 ctxt = xmlNewParserCtxt();
8831 if (ctxt == NULL) {
8832 xmlFree(buf);
8833 return(NULL);
8834 }
8835 if (sax != NULL) {
8836 if (ctxt->sax != &xmlDefaultSAXHandler)
8837 xmlFree(ctxt->sax);
8838 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8839 if (ctxt->sax == NULL) {
8840 xmlFree(buf);
8841 xmlFree(ctxt);
8842 return(NULL);
8843 }
8844 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8845 if (user_data != NULL)
8846 ctxt->userData = user_data;
8847 }
8848 if (filename == NULL) {
8849 ctxt->directory = NULL;
8850 } else {
8851 ctxt->directory = xmlParserGetDirectory(filename);
8852 }
8853
8854 inputStream = xmlNewInputStream(ctxt);
8855 if (inputStream == NULL) {
8856 xmlFreeParserCtxt(ctxt);
8857 return(NULL);
8858 }
8859
8860 if (filename == NULL)
8861 inputStream->filename = NULL;
8862 else
8863 inputStream->filename = xmlMemStrdup(filename);
8864 inputStream->buf = buf;
8865 inputStream->base = inputStream->buf->buffer->content;
8866 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008867 inputStream->end =
8868 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008869
8870 inputPush(ctxt, inputStream);
8871
8872 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8873 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008874 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8875 int cur = ctxt->input->cur - ctxt->input->base;
8876
Owen Taylor3473f882001-02-23 17:55:21 +00008877 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008878
8879 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8880 ctxt->input->cur = ctxt->input->base + cur;
8881 ctxt->input->end =
8882 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008883#ifdef DEBUG_PUSH
8884 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8885#endif
8886 }
8887
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008888 if (enc != XML_CHAR_ENCODING_NONE) {
8889 xmlSwitchEncoding(ctxt, enc);
8890 }
8891
Owen Taylor3473f882001-02-23 17:55:21 +00008892 return(ctxt);
8893}
8894
8895/**
8896 * xmlCreateIOParserCtxt:
8897 * @sax: a SAX handler
8898 * @user_data: The user data returned on SAX callbacks
8899 * @ioread: an I/O read function
8900 * @ioclose: an I/O close function
8901 * @ioctx: an I/O handler
8902 * @enc: the charset encoding if known
8903 *
8904 * Create a parser context for using the XML parser with an existing
8905 * I/O stream
8906 *
8907 * Returns the new parser context or NULL
8908 */
8909xmlParserCtxtPtr
8910xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8911 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8912 void *ioctx, xmlCharEncoding enc) {
8913 xmlParserCtxtPtr ctxt;
8914 xmlParserInputPtr inputStream;
8915 xmlParserInputBufferPtr buf;
8916
8917 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8918 if (buf == NULL) return(NULL);
8919
8920 ctxt = xmlNewParserCtxt();
8921 if (ctxt == NULL) {
8922 xmlFree(buf);
8923 return(NULL);
8924 }
8925 if (sax != NULL) {
8926 if (ctxt->sax != &xmlDefaultSAXHandler)
8927 xmlFree(ctxt->sax);
8928 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8929 if (ctxt->sax == NULL) {
8930 xmlFree(buf);
8931 xmlFree(ctxt);
8932 return(NULL);
8933 }
8934 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8935 if (user_data != NULL)
8936 ctxt->userData = user_data;
8937 }
8938
8939 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8940 if (inputStream == NULL) {
8941 xmlFreeParserCtxt(ctxt);
8942 return(NULL);
8943 }
8944 inputPush(ctxt, inputStream);
8945
8946 return(ctxt);
8947}
8948
8949/************************************************************************
8950 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008951 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00008952 * *
8953 ************************************************************************/
8954
8955/**
8956 * xmlIOParseDTD:
8957 * @sax: the SAX handler block or NULL
8958 * @input: an Input Buffer
8959 * @enc: the charset encoding if known
8960 *
8961 * Load and parse a DTD
8962 *
8963 * Returns the resulting xmlDtdPtr or NULL in case of error.
8964 * @input will be freed at parsing end.
8965 */
8966
8967xmlDtdPtr
8968xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8969 xmlCharEncoding enc) {
8970 xmlDtdPtr ret = NULL;
8971 xmlParserCtxtPtr ctxt;
8972 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008973 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008974
8975 if (input == NULL)
8976 return(NULL);
8977
8978 ctxt = xmlNewParserCtxt();
8979 if (ctxt == NULL) {
8980 return(NULL);
8981 }
8982
8983 /*
8984 * Set-up the SAX context
8985 */
8986 if (sax != NULL) {
8987 if (ctxt->sax != NULL)
8988 xmlFree(ctxt->sax);
8989 ctxt->sax = sax;
8990 ctxt->userData = NULL;
8991 }
8992
8993 /*
8994 * generate a parser input from the I/O handler
8995 */
8996
8997 pinput = xmlNewIOInputStream(ctxt, input, enc);
8998 if (pinput == NULL) {
8999 if (sax != NULL) ctxt->sax = NULL;
9000 xmlFreeParserCtxt(ctxt);
9001 return(NULL);
9002 }
9003
9004 /*
9005 * plug some encoding conversion routines here.
9006 */
9007 xmlPushInput(ctxt, pinput);
9008
9009 pinput->filename = NULL;
9010 pinput->line = 1;
9011 pinput->col = 1;
9012 pinput->base = ctxt->input->cur;
9013 pinput->cur = ctxt->input->cur;
9014 pinput->free = NULL;
9015
9016 /*
9017 * let's parse that entity knowing it's an external subset.
9018 */
9019 ctxt->inSubset = 2;
9020 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9021 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9022 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009023
9024 if (enc == XML_CHAR_ENCODING_NONE) {
9025 /*
9026 * Get the 4 first bytes and decode the charset
9027 * if enc != XML_CHAR_ENCODING_NONE
9028 * plug some encoding conversion routines.
9029 */
9030 start[0] = RAW;
9031 start[1] = NXT(1);
9032 start[2] = NXT(2);
9033 start[3] = NXT(3);
9034 enc = xmlDetectCharEncoding(start, 4);
9035 if (enc != XML_CHAR_ENCODING_NONE) {
9036 xmlSwitchEncoding(ctxt, enc);
9037 }
9038 }
9039
Owen Taylor3473f882001-02-23 17:55:21 +00009040 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9041
9042 if (ctxt->myDoc != NULL) {
9043 if (ctxt->wellFormed) {
9044 ret = ctxt->myDoc->extSubset;
9045 ctxt->myDoc->extSubset = NULL;
9046 } else {
9047 ret = NULL;
9048 }
9049 xmlFreeDoc(ctxt->myDoc);
9050 ctxt->myDoc = NULL;
9051 }
9052 if (sax != NULL) ctxt->sax = NULL;
9053 xmlFreeParserCtxt(ctxt);
9054
9055 return(ret);
9056}
9057
9058/**
9059 * xmlSAXParseDTD:
9060 * @sax: the SAX handler block
9061 * @ExternalID: a NAME* containing the External ID of the DTD
9062 * @SystemID: a NAME* containing the URL to the DTD
9063 *
9064 * Load and parse an external subset.
9065 *
9066 * Returns the resulting xmlDtdPtr or NULL in case of error.
9067 */
9068
9069xmlDtdPtr
9070xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9071 const xmlChar *SystemID) {
9072 xmlDtdPtr ret = NULL;
9073 xmlParserCtxtPtr ctxt;
9074 xmlParserInputPtr input = NULL;
9075 xmlCharEncoding enc;
9076
9077 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9078
9079 ctxt = xmlNewParserCtxt();
9080 if (ctxt == NULL) {
9081 return(NULL);
9082 }
9083
9084 /*
9085 * Set-up the SAX context
9086 */
9087 if (sax != NULL) {
9088 if (ctxt->sax != NULL)
9089 xmlFree(ctxt->sax);
9090 ctxt->sax = sax;
9091 ctxt->userData = NULL;
9092 }
9093
9094 /*
9095 * Ask the Entity resolver to load the damn thing
9096 */
9097
9098 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9099 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9100 if (input == NULL) {
9101 if (sax != NULL) ctxt->sax = NULL;
9102 xmlFreeParserCtxt(ctxt);
9103 return(NULL);
9104 }
9105
9106 /*
9107 * plug some encoding conversion routines here.
9108 */
9109 xmlPushInput(ctxt, input);
9110 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9111 xmlSwitchEncoding(ctxt, enc);
9112
9113 if (input->filename == NULL)
9114 input->filename = (char *) xmlStrdup(SystemID);
9115 input->line = 1;
9116 input->col = 1;
9117 input->base = ctxt->input->cur;
9118 input->cur = ctxt->input->cur;
9119 input->free = NULL;
9120
9121 /*
9122 * let's parse that entity knowing it's an external subset.
9123 */
9124 ctxt->inSubset = 2;
9125 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9126 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9127 ExternalID, SystemID);
9128 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9129
9130 if (ctxt->myDoc != NULL) {
9131 if (ctxt->wellFormed) {
9132 ret = ctxt->myDoc->extSubset;
9133 ctxt->myDoc->extSubset = NULL;
9134 } else {
9135 ret = NULL;
9136 }
9137 xmlFreeDoc(ctxt->myDoc);
9138 ctxt->myDoc = NULL;
9139 }
9140 if (sax != NULL) ctxt->sax = NULL;
9141 xmlFreeParserCtxt(ctxt);
9142
9143 return(ret);
9144}
9145
9146/**
9147 * xmlParseDTD:
9148 * @ExternalID: a NAME* containing the External ID of the DTD
9149 * @SystemID: a NAME* containing the URL to the DTD
9150 *
9151 * Load and parse an external subset.
9152 *
9153 * Returns the resulting xmlDtdPtr or NULL in case of error.
9154 */
9155
9156xmlDtdPtr
9157xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9158 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9159}
9160
9161/************************************************************************
9162 * *
9163 * Front ends when parsing an Entity *
9164 * *
9165 ************************************************************************/
9166
9167/**
Owen Taylor3473f882001-02-23 17:55:21 +00009168 * xmlParseCtxtExternalEntity:
9169 * @ctx: the existing parsing context
9170 * @URL: the URL for the entity to load
9171 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009172 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009173 *
9174 * Parse an external general entity within an existing parsing context
9175 * An external general parsed entity is well-formed if it matches the
9176 * production labeled extParsedEnt.
9177 *
9178 * [78] extParsedEnt ::= TextDecl? content
9179 *
9180 * Returns 0 if the entity is well formed, -1 in case of args problem and
9181 * the parser error code otherwise
9182 */
9183
9184int
9185xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009186 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009187 xmlParserCtxtPtr ctxt;
9188 xmlDocPtr newDoc;
9189 xmlSAXHandlerPtr oldsax = NULL;
9190 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009191 xmlChar start[4];
9192 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009193
9194 if (ctx->depth > 40) {
9195 return(XML_ERR_ENTITY_LOOP);
9196 }
9197
Daniel Veillardcda96922001-08-21 10:56:31 +00009198 if (lst != NULL)
9199 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009200 if ((URL == NULL) && (ID == NULL))
9201 return(-1);
9202 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9203 return(-1);
9204
9205
9206 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9207 if (ctxt == NULL) return(-1);
9208 ctxt->userData = ctxt;
9209 oldsax = ctxt->sax;
9210 ctxt->sax = ctx->sax;
9211 newDoc = xmlNewDoc(BAD_CAST "1.0");
9212 if (newDoc == NULL) {
9213 xmlFreeParserCtxt(ctxt);
9214 return(-1);
9215 }
9216 if (ctx->myDoc != NULL) {
9217 newDoc->intSubset = ctx->myDoc->intSubset;
9218 newDoc->extSubset = ctx->myDoc->extSubset;
9219 }
9220 if (ctx->myDoc->URL != NULL) {
9221 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9222 }
9223 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9224 if (newDoc->children == NULL) {
9225 ctxt->sax = oldsax;
9226 xmlFreeParserCtxt(ctxt);
9227 newDoc->intSubset = NULL;
9228 newDoc->extSubset = NULL;
9229 xmlFreeDoc(newDoc);
9230 return(-1);
9231 }
9232 nodePush(ctxt, newDoc->children);
9233 if (ctx->myDoc == NULL) {
9234 ctxt->myDoc = newDoc;
9235 } else {
9236 ctxt->myDoc = ctx->myDoc;
9237 newDoc->children->doc = ctx->myDoc;
9238 }
9239
Daniel Veillard87a764e2001-06-20 17:41:10 +00009240 /*
9241 * Get the 4 first bytes and decode the charset
9242 * if enc != XML_CHAR_ENCODING_NONE
9243 * plug some encoding conversion routines.
9244 */
9245 GROW
9246 start[0] = RAW;
9247 start[1] = NXT(1);
9248 start[2] = NXT(2);
9249 start[3] = NXT(3);
9250 enc = xmlDetectCharEncoding(start, 4);
9251 if (enc != XML_CHAR_ENCODING_NONE) {
9252 xmlSwitchEncoding(ctxt, enc);
9253 }
9254
Owen Taylor3473f882001-02-23 17:55:21 +00009255 /*
9256 * Parse a possible text declaration first
9257 */
Owen Taylor3473f882001-02-23 17:55:21 +00009258 if ((RAW == '<') && (NXT(1) == '?') &&
9259 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9260 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9261 xmlParseTextDecl(ctxt);
9262 }
9263
9264 /*
9265 * Doing validity checking on chunk doesn't make sense
9266 */
9267 ctxt->instate = XML_PARSER_CONTENT;
9268 ctxt->validate = ctx->validate;
9269 ctxt->loadsubset = ctx->loadsubset;
9270 ctxt->depth = ctx->depth + 1;
9271 ctxt->replaceEntities = ctx->replaceEntities;
9272 if (ctxt->validate) {
9273 ctxt->vctxt.error = ctx->vctxt.error;
9274 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009275 } else {
9276 ctxt->vctxt.error = NULL;
9277 ctxt->vctxt.warning = NULL;
9278 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009279 ctxt->vctxt.nodeTab = NULL;
9280 ctxt->vctxt.nodeNr = 0;
9281 ctxt->vctxt.nodeMax = 0;
9282 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009283
9284 xmlParseContent(ctxt);
9285
9286 if ((RAW == '<') && (NXT(1) == '/')) {
9287 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9288 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9289 ctxt->sax->error(ctxt->userData,
9290 "chunk is not well balanced\n");
9291 ctxt->wellFormed = 0;
9292 ctxt->disableSAX = 1;
9293 } else if (RAW != 0) {
9294 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9295 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9296 ctxt->sax->error(ctxt->userData,
9297 "extra content at the end of well balanced chunk\n");
9298 ctxt->wellFormed = 0;
9299 ctxt->disableSAX = 1;
9300 }
9301 if (ctxt->node != newDoc->children) {
9302 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9304 ctxt->sax->error(ctxt->userData,
9305 "chunk is not well balanced\n");
9306 ctxt->wellFormed = 0;
9307 ctxt->disableSAX = 1;
9308 }
9309
9310 if (!ctxt->wellFormed) {
9311 if (ctxt->errNo == 0)
9312 ret = 1;
9313 else
9314 ret = ctxt->errNo;
9315 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009316 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009317 xmlNodePtr cur;
9318
9319 /*
9320 * Return the newly created nodeset after unlinking it from
9321 * they pseudo parent.
9322 */
9323 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009324 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009325 while (cur != NULL) {
9326 cur->parent = NULL;
9327 cur = cur->next;
9328 }
9329 newDoc->children->children = NULL;
9330 }
9331 ret = 0;
9332 }
9333 ctxt->sax = oldsax;
9334 xmlFreeParserCtxt(ctxt);
9335 newDoc->intSubset = NULL;
9336 newDoc->extSubset = NULL;
9337 xmlFreeDoc(newDoc);
9338
9339 return(ret);
9340}
9341
9342/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009343 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009344 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009345 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009346 * @sax: the SAX handler bloc (possibly NULL)
9347 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9348 * @depth: Used for loop detection, use 0
9349 * @URL: the URL for the entity to load
9350 * @ID: the System ID for the entity to load
9351 * @list: the return value for the set of parsed nodes
9352 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009353 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009354 *
9355 * Returns 0 if the entity is well formed, -1 in case of args problem and
9356 * the parser error code otherwise
9357 */
9358
Daniel Veillard257d9102001-05-08 10:41:44 +00009359static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009360xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9361 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009362 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009363 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009364 xmlParserCtxtPtr ctxt;
9365 xmlDocPtr newDoc;
9366 xmlSAXHandlerPtr oldsax = NULL;
9367 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009368 xmlChar start[4];
9369 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009370
9371 if (depth > 40) {
9372 return(XML_ERR_ENTITY_LOOP);
9373 }
9374
9375
9376
9377 if (list != NULL)
9378 *list = NULL;
9379 if ((URL == NULL) && (ID == NULL))
9380 return(-1);
9381 if (doc == NULL) /* @@ relax but check for dereferences */
9382 return(-1);
9383
9384
9385 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9386 if (ctxt == NULL) return(-1);
9387 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009388 if (oldctxt != NULL) {
9389 ctxt->_private = oldctxt->_private;
9390 ctxt->loadsubset = oldctxt->loadsubset;
9391 ctxt->validate = oldctxt->validate;
9392 ctxt->external = oldctxt->external;
9393 } else {
9394 /*
9395 * Doing validity checking on chunk without context
9396 * doesn't make sense
9397 */
9398 ctxt->_private = NULL;
9399 ctxt->validate = 0;
9400 ctxt->external = 2;
9401 ctxt->loadsubset = 0;
9402 }
Owen Taylor3473f882001-02-23 17:55:21 +00009403 if (sax != NULL) {
9404 oldsax = ctxt->sax;
9405 ctxt->sax = sax;
9406 if (user_data != NULL)
9407 ctxt->userData = user_data;
9408 }
9409 newDoc = xmlNewDoc(BAD_CAST "1.0");
9410 if (newDoc == NULL) {
9411 xmlFreeParserCtxt(ctxt);
9412 return(-1);
9413 }
9414 if (doc != NULL) {
9415 newDoc->intSubset = doc->intSubset;
9416 newDoc->extSubset = doc->extSubset;
9417 }
9418 if (doc->URL != NULL) {
9419 newDoc->URL = xmlStrdup(doc->URL);
9420 }
9421 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9422 if (newDoc->children == NULL) {
9423 if (sax != NULL)
9424 ctxt->sax = oldsax;
9425 xmlFreeParserCtxt(ctxt);
9426 newDoc->intSubset = NULL;
9427 newDoc->extSubset = NULL;
9428 xmlFreeDoc(newDoc);
9429 return(-1);
9430 }
9431 nodePush(ctxt, newDoc->children);
9432 if (doc == NULL) {
9433 ctxt->myDoc = newDoc;
9434 } else {
9435 ctxt->myDoc = doc;
9436 newDoc->children->doc = doc;
9437 }
9438
Daniel Veillard87a764e2001-06-20 17:41:10 +00009439 /*
9440 * Get the 4 first bytes and decode the charset
9441 * if enc != XML_CHAR_ENCODING_NONE
9442 * plug some encoding conversion routines.
9443 */
9444 GROW;
9445 start[0] = RAW;
9446 start[1] = NXT(1);
9447 start[2] = NXT(2);
9448 start[3] = NXT(3);
9449 enc = xmlDetectCharEncoding(start, 4);
9450 if (enc != XML_CHAR_ENCODING_NONE) {
9451 xmlSwitchEncoding(ctxt, enc);
9452 }
9453
Owen Taylor3473f882001-02-23 17:55:21 +00009454 /*
9455 * Parse a possible text declaration first
9456 */
Owen Taylor3473f882001-02-23 17:55:21 +00009457 if ((RAW == '<') && (NXT(1) == '?') &&
9458 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9459 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9460 xmlParseTextDecl(ctxt);
9461 }
9462
Owen Taylor3473f882001-02-23 17:55:21 +00009463 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009464 ctxt->depth = depth;
9465
9466 xmlParseContent(ctxt);
9467
9468 if ((RAW == '<') && (NXT(1) == '/')) {
9469 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9471 ctxt->sax->error(ctxt->userData,
9472 "chunk is not well balanced\n");
9473 ctxt->wellFormed = 0;
9474 ctxt->disableSAX = 1;
9475 } else if (RAW != 0) {
9476 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9478 ctxt->sax->error(ctxt->userData,
9479 "extra content at the end of well balanced chunk\n");
9480 ctxt->wellFormed = 0;
9481 ctxt->disableSAX = 1;
9482 }
9483 if (ctxt->node != newDoc->children) {
9484 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9486 ctxt->sax->error(ctxt->userData,
9487 "chunk is not well balanced\n");
9488 ctxt->wellFormed = 0;
9489 ctxt->disableSAX = 1;
9490 }
9491
9492 if (!ctxt->wellFormed) {
9493 if (ctxt->errNo == 0)
9494 ret = 1;
9495 else
9496 ret = ctxt->errNo;
9497 } else {
9498 if (list != NULL) {
9499 xmlNodePtr cur;
9500
9501 /*
9502 * Return the newly created nodeset after unlinking it from
9503 * they pseudo parent.
9504 */
9505 cur = newDoc->children->children;
9506 *list = cur;
9507 while (cur != NULL) {
9508 cur->parent = NULL;
9509 cur = cur->next;
9510 }
9511 newDoc->children->children = NULL;
9512 }
9513 ret = 0;
9514 }
9515 if (sax != NULL)
9516 ctxt->sax = oldsax;
9517 xmlFreeParserCtxt(ctxt);
9518 newDoc->intSubset = NULL;
9519 newDoc->extSubset = NULL;
9520 xmlFreeDoc(newDoc);
9521
9522 return(ret);
9523}
9524
9525/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009526 * xmlParseExternalEntity:
9527 * @doc: the document the chunk pertains to
9528 * @sax: the SAX handler bloc (possibly NULL)
9529 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9530 * @depth: Used for loop detection, use 0
9531 * @URL: the URL for the entity to load
9532 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009533 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009534 *
9535 * Parse an external general entity
9536 * An external general parsed entity is well-formed if it matches the
9537 * production labeled extParsedEnt.
9538 *
9539 * [78] extParsedEnt ::= TextDecl? content
9540 *
9541 * Returns 0 if the entity is well formed, -1 in case of args problem and
9542 * the parser error code otherwise
9543 */
9544
9545int
9546xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009547 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009548 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009549 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009550}
9551
9552/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009553 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009554 * @doc: the document the chunk pertains to
9555 * @sax: the SAX handler bloc (possibly NULL)
9556 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9557 * @depth: Used for loop detection, use 0
9558 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009559 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009560 *
9561 * Parse a well-balanced chunk of an XML document
9562 * called by the parser
9563 * The allowed sequence for the Well Balanced Chunk is the one defined by
9564 * the content production in the XML grammar:
9565 *
9566 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9567 *
9568 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9569 * the parser error code otherwise
9570 */
9571
9572int
9573xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009574 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009575 xmlParserCtxtPtr ctxt;
9576 xmlDocPtr newDoc;
9577 xmlSAXHandlerPtr oldsax = NULL;
9578 int size;
9579 int ret = 0;
9580
9581 if (depth > 40) {
9582 return(XML_ERR_ENTITY_LOOP);
9583 }
9584
9585
Daniel Veillardcda96922001-08-21 10:56:31 +00009586 if (lst != NULL)
9587 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009588 if (string == NULL)
9589 return(-1);
9590
9591 size = xmlStrlen(string);
9592
9593 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9594 if (ctxt == NULL) return(-1);
9595 ctxt->userData = ctxt;
9596 if (sax != NULL) {
9597 oldsax = ctxt->sax;
9598 ctxt->sax = sax;
9599 if (user_data != NULL)
9600 ctxt->userData = user_data;
9601 }
9602 newDoc = xmlNewDoc(BAD_CAST "1.0");
9603 if (newDoc == NULL) {
9604 xmlFreeParserCtxt(ctxt);
9605 return(-1);
9606 }
9607 if (doc != NULL) {
9608 newDoc->intSubset = doc->intSubset;
9609 newDoc->extSubset = doc->extSubset;
9610 }
9611 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9612 if (newDoc->children == NULL) {
9613 if (sax != NULL)
9614 ctxt->sax = oldsax;
9615 xmlFreeParserCtxt(ctxt);
9616 newDoc->intSubset = NULL;
9617 newDoc->extSubset = NULL;
9618 xmlFreeDoc(newDoc);
9619 return(-1);
9620 }
9621 nodePush(ctxt, newDoc->children);
9622 if (doc == NULL) {
9623 ctxt->myDoc = newDoc;
9624 } else {
9625 ctxt->myDoc = doc;
9626 newDoc->children->doc = doc;
9627 }
9628 ctxt->instate = XML_PARSER_CONTENT;
9629 ctxt->depth = depth;
9630
9631 /*
9632 * Doing validity checking on chunk doesn't make sense
9633 */
9634 ctxt->validate = 0;
9635 ctxt->loadsubset = 0;
9636
9637 xmlParseContent(ctxt);
9638
9639 if ((RAW == '<') && (NXT(1) == '/')) {
9640 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9642 ctxt->sax->error(ctxt->userData,
9643 "chunk is not well balanced\n");
9644 ctxt->wellFormed = 0;
9645 ctxt->disableSAX = 1;
9646 } else if (RAW != 0) {
9647 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9649 ctxt->sax->error(ctxt->userData,
9650 "extra content at the end of well balanced chunk\n");
9651 ctxt->wellFormed = 0;
9652 ctxt->disableSAX = 1;
9653 }
9654 if (ctxt->node != newDoc->children) {
9655 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9657 ctxt->sax->error(ctxt->userData,
9658 "chunk is not well balanced\n");
9659 ctxt->wellFormed = 0;
9660 ctxt->disableSAX = 1;
9661 }
9662
9663 if (!ctxt->wellFormed) {
9664 if (ctxt->errNo == 0)
9665 ret = 1;
9666 else
9667 ret = ctxt->errNo;
9668 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009669 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009670 xmlNodePtr cur;
9671
9672 /*
9673 * Return the newly created nodeset after unlinking it from
9674 * they pseudo parent.
9675 */
9676 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009677 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009678 while (cur != NULL) {
9679 cur->parent = NULL;
9680 cur = cur->next;
9681 }
9682 newDoc->children->children = NULL;
9683 }
9684 ret = 0;
9685 }
9686 if (sax != NULL)
9687 ctxt->sax = oldsax;
9688 xmlFreeParserCtxt(ctxt);
9689 newDoc->intSubset = NULL;
9690 newDoc->extSubset = NULL;
9691 xmlFreeDoc(newDoc);
9692
9693 return(ret);
9694}
9695
9696/**
9697 * xmlSAXParseEntity:
9698 * @sax: the SAX handler block
9699 * @filename: the filename
9700 *
9701 * parse an XML external entity out of context and build a tree.
9702 * It use the given SAX function block to handle the parsing callback.
9703 * If sax is NULL, fallback to the default DOM tree building routines.
9704 *
9705 * [78] extParsedEnt ::= TextDecl? content
9706 *
9707 * This correspond to a "Well Balanced" chunk
9708 *
9709 * Returns the resulting document tree
9710 */
9711
9712xmlDocPtr
9713xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9714 xmlDocPtr ret;
9715 xmlParserCtxtPtr ctxt;
9716 char *directory = NULL;
9717
9718 ctxt = xmlCreateFileParserCtxt(filename);
9719 if (ctxt == NULL) {
9720 return(NULL);
9721 }
9722 if (sax != NULL) {
9723 if (ctxt->sax != NULL)
9724 xmlFree(ctxt->sax);
9725 ctxt->sax = sax;
9726 ctxt->userData = NULL;
9727 }
9728
9729 if ((ctxt->directory == NULL) && (directory == NULL))
9730 directory = xmlParserGetDirectory(filename);
9731
9732 xmlParseExtParsedEnt(ctxt);
9733
9734 if (ctxt->wellFormed)
9735 ret = ctxt->myDoc;
9736 else {
9737 ret = NULL;
9738 xmlFreeDoc(ctxt->myDoc);
9739 ctxt->myDoc = NULL;
9740 }
9741 if (sax != NULL)
9742 ctxt->sax = NULL;
9743 xmlFreeParserCtxt(ctxt);
9744
9745 return(ret);
9746}
9747
9748/**
9749 * xmlParseEntity:
9750 * @filename: the filename
9751 *
9752 * parse an XML external entity out of context and build a tree.
9753 *
9754 * [78] extParsedEnt ::= TextDecl? content
9755 *
9756 * This correspond to a "Well Balanced" chunk
9757 *
9758 * Returns the resulting document tree
9759 */
9760
9761xmlDocPtr
9762xmlParseEntity(const char *filename) {
9763 return(xmlSAXParseEntity(NULL, filename));
9764}
9765
9766/**
9767 * xmlCreateEntityParserCtxt:
9768 * @URL: the entity URL
9769 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009770 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009771 *
9772 * Create a parser context for an external entity
9773 * Automatic support for ZLIB/Compress compressed document is provided
9774 * by default if found at compile-time.
9775 *
9776 * Returns the new parser context or NULL
9777 */
9778xmlParserCtxtPtr
9779xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9780 const xmlChar *base) {
9781 xmlParserCtxtPtr ctxt;
9782 xmlParserInputPtr inputStream;
9783 char *directory = NULL;
9784 xmlChar *uri;
9785
9786 ctxt = xmlNewParserCtxt();
9787 if (ctxt == NULL) {
9788 return(NULL);
9789 }
9790
9791 uri = xmlBuildURI(URL, base);
9792
9793 if (uri == NULL) {
9794 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9795 if (inputStream == NULL) {
9796 xmlFreeParserCtxt(ctxt);
9797 return(NULL);
9798 }
9799
9800 inputPush(ctxt, inputStream);
9801
9802 if ((ctxt->directory == NULL) && (directory == NULL))
9803 directory = xmlParserGetDirectory((char *)URL);
9804 if ((ctxt->directory == NULL) && (directory != NULL))
9805 ctxt->directory = directory;
9806 } else {
9807 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9808 if (inputStream == NULL) {
9809 xmlFree(uri);
9810 xmlFreeParserCtxt(ctxt);
9811 return(NULL);
9812 }
9813
9814 inputPush(ctxt, inputStream);
9815
9816 if ((ctxt->directory == NULL) && (directory == NULL))
9817 directory = xmlParserGetDirectory((char *)uri);
9818 if ((ctxt->directory == NULL) && (directory != NULL))
9819 ctxt->directory = directory;
9820 xmlFree(uri);
9821 }
9822
9823 return(ctxt);
9824}
9825
9826/************************************************************************
9827 * *
9828 * Front ends when parsing from a file *
9829 * *
9830 ************************************************************************/
9831
9832/**
9833 * xmlCreateFileParserCtxt:
9834 * @filename: the filename
9835 *
9836 * Create a parser context for a file content.
9837 * Automatic support for ZLIB/Compress compressed document is provided
9838 * by default if found at compile-time.
9839 *
9840 * Returns the new parser context or NULL
9841 */
9842xmlParserCtxtPtr
9843xmlCreateFileParserCtxt(const char *filename)
9844{
9845 xmlParserCtxtPtr ctxt;
9846 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009847 char *directory = NULL;
9848
Owen Taylor3473f882001-02-23 17:55:21 +00009849 ctxt = xmlNewParserCtxt();
9850 if (ctxt == NULL) {
9851 if (xmlDefaultSAXHandler.error != NULL) {
9852 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9853 }
9854 return(NULL);
9855 }
9856
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009857 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009858 if (inputStream == NULL) {
9859 xmlFreeParserCtxt(ctxt);
9860 return(NULL);
9861 }
9862
Owen Taylor3473f882001-02-23 17:55:21 +00009863 inputPush(ctxt, inputStream);
9864 if ((ctxt->directory == NULL) && (directory == NULL))
9865 directory = xmlParserGetDirectory(filename);
9866 if ((ctxt->directory == NULL) && (directory != NULL))
9867 ctxt->directory = directory;
9868
9869 return(ctxt);
9870}
9871
9872/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009873 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009874 * @sax: the SAX handler block
9875 * @filename: the filename
9876 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9877 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009878 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009879 *
9880 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9881 * compressed document is provided by default if found at compile-time.
9882 * It use the given SAX function block to handle the parsing callback.
9883 * If sax is NULL, fallback to the default DOM tree building routines.
9884 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009885 * User data (void *) is stored within the parser context, so it is
9886 * available nearly everywhere in libxml.
9887 *
Owen Taylor3473f882001-02-23 17:55:21 +00009888 * Returns the resulting document tree
9889 */
9890
9891xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009892xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9893 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009894 xmlDocPtr ret;
9895 xmlParserCtxtPtr ctxt;
9896 char *directory = NULL;
9897
Daniel Veillard635ef722001-10-29 11:48:19 +00009898 xmlInitParser();
9899
Owen Taylor3473f882001-02-23 17:55:21 +00009900 ctxt = xmlCreateFileParserCtxt(filename);
9901 if (ctxt == NULL) {
9902 return(NULL);
9903 }
9904 if (sax != NULL) {
9905 if (ctxt->sax != NULL)
9906 xmlFree(ctxt->sax);
9907 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009908 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009909 if (data!=NULL) {
9910 ctxt->_private=data;
9911 }
Owen Taylor3473f882001-02-23 17:55:21 +00009912
9913 if ((ctxt->directory == NULL) && (directory == NULL))
9914 directory = xmlParserGetDirectory(filename);
9915 if ((ctxt->directory == NULL) && (directory != NULL))
9916 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9917
9918 xmlParseDocument(ctxt);
9919
9920 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9921 else {
9922 ret = NULL;
9923 xmlFreeDoc(ctxt->myDoc);
9924 ctxt->myDoc = NULL;
9925 }
9926 if (sax != NULL)
9927 ctxt->sax = NULL;
9928 xmlFreeParserCtxt(ctxt);
9929
9930 return(ret);
9931}
9932
9933/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009934 * xmlSAXParseFile:
9935 * @sax: the SAX handler block
9936 * @filename: the filename
9937 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9938 * documents
9939 *
9940 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9941 * compressed document is provided by default if found at compile-time.
9942 * It use the given SAX function block to handle the parsing callback.
9943 * If sax is NULL, fallback to the default DOM tree building routines.
9944 *
9945 * Returns the resulting document tree
9946 */
9947
9948xmlDocPtr
9949xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9950 int recovery) {
9951 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
9952}
9953
9954/**
Owen Taylor3473f882001-02-23 17:55:21 +00009955 * xmlRecoverDoc:
9956 * @cur: a pointer to an array of xmlChar
9957 *
9958 * parse an XML in-memory document and build a tree.
9959 * In the case the document is not Well Formed, a tree is built anyway
9960 *
9961 * Returns the resulting document tree
9962 */
9963
9964xmlDocPtr
9965xmlRecoverDoc(xmlChar *cur) {
9966 return(xmlSAXParseDoc(NULL, cur, 1));
9967}
9968
9969/**
9970 * xmlParseFile:
9971 * @filename: the filename
9972 *
9973 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9974 * compressed document is provided by default if found at compile-time.
9975 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009976 * Returns the resulting document tree if the file was wellformed,
9977 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009978 */
9979
9980xmlDocPtr
9981xmlParseFile(const char *filename) {
9982 return(xmlSAXParseFile(NULL, filename, 0));
9983}
9984
9985/**
9986 * xmlRecoverFile:
9987 * @filename: the filename
9988 *
9989 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9990 * compressed document is provided by default if found at compile-time.
9991 * In the case the document is not Well Formed, a tree is built anyway
9992 *
9993 * Returns the resulting document tree
9994 */
9995
9996xmlDocPtr
9997xmlRecoverFile(const char *filename) {
9998 return(xmlSAXParseFile(NULL, filename, 1));
9999}
10000
10001
10002/**
10003 * xmlSetupParserForBuffer:
10004 * @ctxt: an XML parser context
10005 * @buffer: a xmlChar * buffer
10006 * @filename: a file name
10007 *
10008 * Setup the parser context to parse a new buffer; Clears any prior
10009 * contents from the parser context. The buffer parameter must not be
10010 * NULL, but the filename parameter can be
10011 */
10012void
10013xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10014 const char* filename)
10015{
10016 xmlParserInputPtr input;
10017
10018 input = xmlNewInputStream(ctxt);
10019 if (input == NULL) {
10020 perror("malloc");
10021 xmlFree(ctxt);
10022 return;
10023 }
10024
10025 xmlClearParserCtxt(ctxt);
10026 if (filename != NULL)
10027 input->filename = xmlMemStrdup(filename);
10028 input->base = buffer;
10029 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010030 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010031 inputPush(ctxt, input);
10032}
10033
10034/**
10035 * xmlSAXUserParseFile:
10036 * @sax: a SAX handler
10037 * @user_data: The user data returned on SAX callbacks
10038 * @filename: a file name
10039 *
10040 * parse an XML file and call the given SAX handler routines.
10041 * Automatic support for ZLIB/Compress compressed document is provided
10042 *
10043 * Returns 0 in case of success or a error number otherwise
10044 */
10045int
10046xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10047 const char *filename) {
10048 int ret = 0;
10049 xmlParserCtxtPtr ctxt;
10050
10051 ctxt = xmlCreateFileParserCtxt(filename);
10052 if (ctxt == NULL) return -1;
10053 if (ctxt->sax != &xmlDefaultSAXHandler)
10054 xmlFree(ctxt->sax);
10055 ctxt->sax = sax;
10056 if (user_data != NULL)
10057 ctxt->userData = user_data;
10058
10059 xmlParseDocument(ctxt);
10060
10061 if (ctxt->wellFormed)
10062 ret = 0;
10063 else {
10064 if (ctxt->errNo != 0)
10065 ret = ctxt->errNo;
10066 else
10067 ret = -1;
10068 }
10069 if (sax != NULL)
10070 ctxt->sax = NULL;
10071 xmlFreeParserCtxt(ctxt);
10072
10073 return ret;
10074}
10075
10076/************************************************************************
10077 * *
10078 * Front ends when parsing from memory *
10079 * *
10080 ************************************************************************/
10081
10082/**
10083 * xmlCreateMemoryParserCtxt:
10084 * @buffer: a pointer to a char array
10085 * @size: the size of the array
10086 *
10087 * Create a parser context for an XML in-memory document.
10088 *
10089 * Returns the new parser context or NULL
10090 */
10091xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010092xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010093 xmlParserCtxtPtr ctxt;
10094 xmlParserInputPtr input;
10095 xmlParserInputBufferPtr buf;
10096
10097 if (buffer == NULL)
10098 return(NULL);
10099 if (size <= 0)
10100 return(NULL);
10101
10102 ctxt = xmlNewParserCtxt();
10103 if (ctxt == NULL)
10104 return(NULL);
10105
10106 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10107 if (buf == NULL) return(NULL);
10108
10109 input = xmlNewInputStream(ctxt);
10110 if (input == NULL) {
10111 xmlFreeParserCtxt(ctxt);
10112 return(NULL);
10113 }
10114
10115 input->filename = NULL;
10116 input->buf = buf;
10117 input->base = input->buf->buffer->content;
10118 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010119 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010120
10121 inputPush(ctxt, input);
10122 return(ctxt);
10123}
10124
10125/**
10126 * xmlSAXParseMemory:
10127 * @sax: the SAX handler block
10128 * @buffer: an pointer to a char array
10129 * @size: the size of the array
10130 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10131 * documents
10132 *
10133 * parse an XML in-memory block and use the given SAX function block
10134 * to handle the parsing callback. If sax is NULL, fallback to the default
10135 * DOM tree building routines.
10136 *
10137 * Returns the resulting document tree
10138 */
10139xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010140xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10141 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010142 xmlDocPtr ret;
10143 xmlParserCtxtPtr ctxt;
10144
10145 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10146 if (ctxt == NULL) return(NULL);
10147 if (sax != NULL) {
10148 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010149 }
10150
10151 xmlParseDocument(ctxt);
10152
10153 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10154 else {
10155 ret = NULL;
10156 xmlFreeDoc(ctxt->myDoc);
10157 ctxt->myDoc = NULL;
10158 }
10159 if (sax != NULL)
10160 ctxt->sax = NULL;
10161 xmlFreeParserCtxt(ctxt);
10162
10163 return(ret);
10164}
10165
10166/**
10167 * xmlParseMemory:
10168 * @buffer: an pointer to a char array
10169 * @size: the size of the array
10170 *
10171 * parse an XML in-memory block and build a tree.
10172 *
10173 * Returns the resulting document tree
10174 */
10175
Daniel Veillard50822cb2001-07-26 20:05:51 +000010176xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010177 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10178}
10179
10180/**
10181 * xmlRecoverMemory:
10182 * @buffer: an pointer to a char array
10183 * @size: the size of the array
10184 *
10185 * parse an XML in-memory block and build a tree.
10186 * In the case the document is not Well Formed, a tree is built anyway
10187 *
10188 * Returns the resulting document tree
10189 */
10190
Daniel Veillard50822cb2001-07-26 20:05:51 +000010191xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010192 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10193}
10194
10195/**
10196 * xmlSAXUserParseMemory:
10197 * @sax: a SAX handler
10198 * @user_data: The user data returned on SAX callbacks
10199 * @buffer: an in-memory XML document input
10200 * @size: the length of the XML document in bytes
10201 *
10202 * A better SAX parsing routine.
10203 * parse an XML in-memory buffer and call the given SAX handler routines.
10204 *
10205 * Returns 0 in case of success or a error number otherwise
10206 */
10207int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010208 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010209 int ret = 0;
10210 xmlParserCtxtPtr ctxt;
10211 xmlSAXHandlerPtr oldsax = NULL;
10212
10213 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10214 if (ctxt == NULL) return -1;
10215 if (sax != NULL) {
10216 oldsax = ctxt->sax;
10217 ctxt->sax = sax;
10218 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010219 if (user_data != NULL)
10220 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010221
10222 xmlParseDocument(ctxt);
10223
10224 if (ctxt->wellFormed)
10225 ret = 0;
10226 else {
10227 if (ctxt->errNo != 0)
10228 ret = ctxt->errNo;
10229 else
10230 ret = -1;
10231 }
10232 if (sax != NULL) {
10233 ctxt->sax = oldsax;
10234 }
10235 xmlFreeParserCtxt(ctxt);
10236
10237 return ret;
10238}
10239
10240/**
10241 * xmlCreateDocParserCtxt:
10242 * @cur: a pointer to an array of xmlChar
10243 *
10244 * Creates a parser context for an XML in-memory document.
10245 *
10246 * Returns the new parser context or NULL
10247 */
10248xmlParserCtxtPtr
10249xmlCreateDocParserCtxt(xmlChar *cur) {
10250 int len;
10251
10252 if (cur == NULL)
10253 return(NULL);
10254 len = xmlStrlen(cur);
10255 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10256}
10257
10258/**
10259 * xmlSAXParseDoc:
10260 * @sax: the SAX handler block
10261 * @cur: a pointer to an array of xmlChar
10262 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10263 * documents
10264 *
10265 * parse an XML in-memory document and build a tree.
10266 * It use the given SAX function block to handle the parsing callback.
10267 * If sax is NULL, fallback to the default DOM tree building routines.
10268 *
10269 * Returns the resulting document tree
10270 */
10271
10272xmlDocPtr
10273xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10274 xmlDocPtr ret;
10275 xmlParserCtxtPtr ctxt;
10276
10277 if (cur == NULL) return(NULL);
10278
10279
10280 ctxt = xmlCreateDocParserCtxt(cur);
10281 if (ctxt == NULL) return(NULL);
10282 if (sax != NULL) {
10283 ctxt->sax = sax;
10284 ctxt->userData = NULL;
10285 }
10286
10287 xmlParseDocument(ctxt);
10288 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10289 else {
10290 ret = NULL;
10291 xmlFreeDoc(ctxt->myDoc);
10292 ctxt->myDoc = NULL;
10293 }
10294 if (sax != NULL)
10295 ctxt->sax = NULL;
10296 xmlFreeParserCtxt(ctxt);
10297
10298 return(ret);
10299}
10300
10301/**
10302 * xmlParseDoc:
10303 * @cur: a pointer to an array of xmlChar
10304 *
10305 * parse an XML in-memory document and build a tree.
10306 *
10307 * Returns the resulting document tree
10308 */
10309
10310xmlDocPtr
10311xmlParseDoc(xmlChar *cur) {
10312 return(xmlSAXParseDoc(NULL, cur, 0));
10313}
10314
Daniel Veillard8107a222002-01-13 14:10:10 +000010315/************************************************************************
10316 * *
10317 * Specific function to keep track of entities references *
10318 * and used by the XSLT debugger *
10319 * *
10320 ************************************************************************/
10321
10322static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10323
10324/**
10325 * xmlAddEntityReference:
10326 * @ent : A valid entity
10327 * @firstNode : A valid first node for children of entity
10328 * @lastNode : A valid last node of children entity
10329 *
10330 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10331 */
10332static void
10333xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10334 xmlNodePtr lastNode)
10335{
10336 if (xmlEntityRefFunc != NULL) {
10337 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10338 }
10339}
10340
10341
10342/**
10343 * xmlSetEntityReferenceFunc:
10344 * @func : A valid function
10345 *
10346 * Set the function to call call back when a xml reference has been made
10347 */
10348void
10349xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10350{
10351 xmlEntityRefFunc = func;
10352}
Owen Taylor3473f882001-02-23 17:55:21 +000010353
10354/************************************************************************
10355 * *
10356 * Miscellaneous *
10357 * *
10358 ************************************************************************/
10359
10360#ifdef LIBXML_XPATH_ENABLED
10361#include <libxml/xpath.h>
10362#endif
10363
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010364extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010365static int xmlParserInitialized = 0;
10366
10367/**
10368 * xmlInitParser:
10369 *
10370 * Initialization function for the XML parser.
10371 * This is not reentrant. Call once before processing in case of
10372 * use in multithreaded programs.
10373 */
10374
10375void
10376xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010377 if (xmlParserInitialized != 0)
10378 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010379
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010380 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10381 (xmlGenericError == NULL))
10382 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010383 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010384 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010385 xmlInitCharEncodingHandlers();
10386 xmlInitializePredefinedEntities();
10387 xmlDefaultSAXHandlerInit();
10388 xmlRegisterDefaultInputCallbacks();
10389 xmlRegisterDefaultOutputCallbacks();
10390#ifdef LIBXML_HTML_ENABLED
10391 htmlInitAutoClose();
10392 htmlDefaultSAXHandlerInit();
10393#endif
10394#ifdef LIBXML_XPATH_ENABLED
10395 xmlXPathInit();
10396#endif
10397 xmlParserInitialized = 1;
10398}
10399
10400/**
10401 * xmlCleanupParser:
10402 *
10403 * Cleanup function for the XML parser. It tries to reclaim all
10404 * parsing related global memory allocated for the parser processing.
10405 * It doesn't deallocate any document related memory. Calling this
10406 * function should not prevent reusing the parser.
10407 */
10408
10409void
10410xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010411 xmlCleanupCharEncodingHandlers();
10412 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010413#ifdef LIBXML_CATALOG_ENABLED
10414 xmlCatalogCleanup();
10415#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010416 xmlCleanupThreads();
10417 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010418}